Single-Agent Speaking Blocks Available

This commit is contained in:
马一丁
2025-12-04 17:15:44 +08:00
parent 25abc25c9d
commit af8c8815de
6 changed files with 324 additions and 45 deletions

View File

@@ -34,6 +34,7 @@ ALLOWED_BLOCK_TYPES: List[str] = [
"list",
"table",
"blockquote",
"engineQuote",
"hr",
"code",
"math",
@@ -177,6 +178,22 @@ blockquote_block: Dict[str, Any] = {
"additionalProperties": True,
}
engine_quote_block: Dict[str, Any] = {
"title": "EngineQuoteBlock",
"type": "object",
"properties": {
"type": {"const": "engineQuote"},
"engine": {"type": "string", "enum": ["insight", "media", "query"]},
"title": {"type": "string"},
"blocks": {
"type": "array",
"items": {"$ref": "#/definitions/block"},
},
},
"required": ["type", "engine", "blocks"],
"additionalProperties": True,
}
hr_block: Dict[str, Any] = {
"title": "HorizontalRuleBlock",
"type": "object",
@@ -315,6 +332,7 @@ block_variants: List[Dict[str, Any]] = [
list_block,
table_block,
blockquote_block,
engine_quote_block,
hr_block,
code_block,
math_block,

View File

@@ -138,6 +138,45 @@ class IRValidator:
for idx, sub_block in enumerate(inner):
self._validate_block(sub_block, f"{path}.blocks[{idx}]", errors)
def _validate_engineQuote_block(
self, block: Dict[str, Any], path: str, errors: List[str]
):
"""单引擎发言块需标注engine并包含子blocks"""
engine = block.get("engine")
if engine not in {"insight", "media", "query"}:
errors.append(f"{path}.engine 取值非法: {engine}")
inner = block.get("blocks")
if not isinstance(inner, list) or not inner:
errors.append(f"{path}.blocks 必须是非空数组")
return
for idx, sub_block in enumerate(inner):
sub_path = f"{path}.blocks[{idx}]"
if not isinstance(sub_block, dict):
errors.append(f"{sub_path} 必须是对象")
continue
if sub_block.get("type") != "paragraph":
errors.append(f"{sub_path}.type 仅允许 paragraph")
continue
# 复用 paragraph 结构校验,但限制 marks
inlines = sub_block.get("inlines")
if not isinstance(inlines, list) or not inlines:
errors.append(f"{sub_path}.inlines 必须是非空数组")
continue
for ridx, run in enumerate(inlines):
self._validate_inline_run(run, f"{sub_path}.inlines[{ridx}]", errors)
if not isinstance(run, dict):
continue
marks = run.get("marks") or []
if not isinstance(marks, list):
errors.append(f"{sub_path}.inlines[{ridx}].marks 必须是数组")
continue
for midx, mark in enumerate(marks):
mark_type = mark.get("type") if isinstance(mark, dict) else None
if mark_type not in {"bold", "italic"}:
errors.append(
f"{sub_path}.inlines[{ridx}].marks[{midx}].type 仅允许 bold/italic"
)
def _validate_callout_block(self, block: Dict[str, Any], path: str, errors: List[str]):
"""callout需声明tone并至少有一个子block"""
tone = block.get("tone")

View File

@@ -889,7 +889,7 @@ class ChapterGenerationNode(BaseNode):
block["items"] = normalized
for entry in block.get("items", []):
walk(entry)
elif block_type in {"callout", "blockquote"}:
elif block_type in {"callout", "blockquote", "engineQuote"}:
walk(block.get("blocks"))
elif block_type == "table":
for row in block.get("rows", []):
@@ -994,7 +994,7 @@ class ChapterGenerationNode(BaseNode):
total += walk(item)
return total
if block_type in {"blockquote", "callout"}:
if block_type in {"blockquote", "callout", "engineQuote"}:
return walk(node.get("blocks"))
if block_type == "table":
@@ -1015,7 +1015,7 @@ class ChapterGenerationNode(BaseNode):
def _count_narrative_characters(self, blocks: Any) -> int:
"""
统计paragraph/callout/list/blockquote等叙述性结构的字符数避免被表格/图表“刷长”。
统计paragraph/callout/list/blockquote/engineQuote等叙述性结构的字符数,避免被表格/图表“刷长”。
"""
def walk(node: Any) -> int:
@@ -1037,7 +1037,7 @@ class ChapterGenerationNode(BaseNode):
for item in node.get("items", []):
total += walk(item)
return total
if block_type in {"callout", "blockquote"}:
if block_type in {"callout", "blockquote", "engineQuote"}:
return walk(node.get("blocks"))
# list项可能是匿名dict兼容性遍历
@@ -1072,12 +1072,60 @@ class ChapterGenerationNode(BaseNode):
self._normalize_paragraph_block(block)
elif block_type == "table":
self._sanitize_table_block(block)
elif block_type == "engineQuote":
self._sanitize_engine_quote_block(block)
def _sanitize_table_block(self, block: Dict[str, Any]):
"""保证表格的rows/cells结构合法且每个单元格包含至少一个block"""
rows = self._normalize_table_rows(block.get("rows"))
block["rows"] = rows
def _sanitize_engine_quote_block(self, block: Dict[str, Any]):
"""engineQuote内部仅允许paragraph且仅保留bold/italic样式"""
allowed_marks = {"bold", "italic"}
raw_blocks = block.get("blocks")
candidates = raw_blocks if isinstance(raw_blocks, list) else ([raw_blocks] if raw_blocks else [])
sanitized_blocks: List[Dict[str, Any]] = []
for item in candidates:
if isinstance(item, dict) and item.get("type") == "paragraph":
para = dict(item)
else:
text = self._extract_block_text(item) if isinstance(item, dict) else (item or "")
para = self._as_paragraph_block(str(text))
inlines = para.get("inlines")
if not isinstance(inlines, list) or not inlines:
inlines = [self._as_inline_run(self._extract_block_text(para))]
cleaned_inlines: List[Dict[str, Any]] = []
for run in inlines:
if isinstance(run, dict):
text_val = run.get("text")
text_str = text_val if isinstance(text_val, str) else ("" if text_val is None else str(text_val))
marks_raw = run.get("marks") if isinstance(run.get("marks"), list) else []
marks_filtered: List[Dict[str, Any]] = []
for mark in marks_raw:
if not isinstance(mark, dict):
continue
mark_type = mark.get("type")
if mark_type in allowed_marks:
marks_filtered.append({"type": mark_type})
cleaned_inlines.append({"text": text_str, "marks": marks_filtered})
else:
cleaned_inlines.append(self._as_inline_run(str(run)))
if not cleaned_inlines:
cleaned_inlines.append(self._as_inline_run(""))
para["inlines"] = cleaned_inlines
para["type"] = "paragraph"
para.pop("blocks", None)
sanitized_blocks.append(para)
if not sanitized_blocks:
sanitized_blocks.append(self._as_paragraph_block(""))
block["blocks"] = sanitized_blocks
def _normalize_table_rows(self, rows: Any) -> List[Dict[str, Any]]:
"""确保rows始终是由row对象组成的列表"""
if rows is None:
@@ -1250,9 +1298,9 @@ class ChapterGenerationNode(BaseNode):
return merged
def _merge_nested_fragments(self, block: Dict[str, Any]) -> Dict[str, Any]:
"""对嵌套结构callout/list/table递归处理片段合并"""
"""对嵌套结构callout/blockquote/engineQuote/list/table递归处理片段合并"""
block_type = block.get("type")
if block_type in {"callout", "blockquote"}:
if block_type in {"callout", "blockquote", "engineQuote"}:
nested = block.get("blocks")
if isinstance(nested, list):
block["blocks"] = self._merge_fragment_sequences(nested)

View File

@@ -306,16 +306,17 @@ SYSTEM_PROMPT_CHAPTER_JSON = f"""
5. 表格需给出rows/cells/alignKPI卡请使用kpiGrid分割线用hr。
6. 如需引用图表/交互组件统一用widgetType表示例如chart.js/line、chart.js/doughnut
7. 鼓励结合outline中列出的子标题生成多层heading与细粒度内容同时可补充callout、blockquote等。
8. 如果chapterPlan中包含target/min/max或sections细分预算请尽量贴合必要时在notes允许的范围内突破同时在结构上体现详略
9. 一级标题需使用中文数字“一、二、三”二级标题使用阿拉伯数字“1.1、1.2”heading.text中直接写好编号与outline顺序对应
10. 严禁输出外部图片/AI生图链接仅可使用Chart.js图表、表格、色块、callout等HTML原生组件如需视觉辅助请改为文字描述或数据表
11. 段落混排需通过marks表达粗体、斜体、下划线、颜色等样式禁止残留Markdown语法如**text**
12. 行间公式用block.type="math"并填入math.latex行内公式在paragraph.inlines里将文本设为Latex并加上marks.type="math"渲染层会用MathJax处理
13. widget配色需与CSS变量兼容不要硬编码背景色或文字色legend/ticks由渲染层控制
14. 善用callout、kpiGrid、表格、widget等提升版面丰富度但必须遵守模板章节范围。
15. 输出前务必自检JSON语法禁止出现`{{}}{{`或`][`相连缺少逗号、列表项嵌套超过一层、未闭合的括号或未转义换行,`list` block的items必须是`[[block,...], ...]`结构若无法满足则返回错误提示而不是输出不合法JSON
16. 所有widget块必须在顶层提供`data`或`dataRef`可将props中的`data`上移确保Chart.js能够直接渲染缺失数据时宁可输出表格或段落绝不留空
17. 任何block都必须声明合法`type`heading/paragraph/list/...);若需要普通文本请使用`paragraph`并给出`inlines`,禁止返回`type:null`或未知值
8. 如需标注某个引擎的原话,请用 block.type="engineQuote"engine 取值 insight/media/query仅限这三种内部 blocks 只允许 paragraphparagraph.inlines 的 marks 仅可使用 bold/italic可留空禁止在 engineQuote 中放表格/图表/引用/公式等。
9. 如果chapterPlan中包含target/min/max或sections细分预算请尽量贴合必要时在notes允许的范围内突破同时在结构上体现详略
10. 一级标题需使用中文数字“一、二、三”二级标题使用阿拉伯数字“1.1、1.2”heading.text中直接写好编号与outline顺序对应
11. 严禁输出外部图片/AI生图链接仅可使用Chart.js图表、表格、色块、callout等HTML原生组件如需视觉辅助请改为文字描述或数据表
12. 段落混排需通过marks表达粗体、斜体、下划线、颜色等样式禁止残留Markdown语法如**text**
13. 行间公式用block.type="math"并填入math.latex行内公式在paragraph.inlines里将文本设为Latex并加上marks.type="math"渲染层会用MathJax处理
14. widget配色需与CSS变量兼容不要硬编码背景色或文字色legend/ticks由渲染层控制
15. 善用callout、kpiGrid、表格、widget等提升版面丰富度但必须遵守模板章节范围
16. 输出前务必自检JSON语法禁止出现`{{}}{{`或`][`相连缺少逗号、列表项嵌套超过一层、未闭合的括号或未转义换行,`list` block的items必须是`[[block,...], ...]`结构若无法满足则返回错误提示而不是输出不合法JSON
17. 所有widget块必须在顶层提供`data`或`dataRef`可将props中的`data`上移确保Chart.js能够直接渲染缺失数据时宁可输出表格或段落绝不留空
18. 任何block都必须声明合法`type`heading/paragraph/list/...);若需要普通文本请使用`paragraph`并给出`inlines`,禁止返回`type:null`或未知值。
<CHAPTER JSON SCHEMA>
{CHAPTER_JSON_SCHEMA_TEXT}

View File

@@ -47,6 +47,7 @@ class HTMLRenderer:
"math",
"figure",
"kpiGrid",
"engineQuote",
}
INLINE_ARTIFACT_KEYS = {
"props",
@@ -1020,6 +1021,7 @@ class HTMLRenderer:
"list": self._render_list,
"table": self._render_table,
"blockquote": self._render_blockquote,
"engineQuote": self._render_engine_quote,
"hr": lambda b: "<hr />",
"code": self._render_code,
"math": self._render_math,
@@ -1282,6 +1284,29 @@ class HTMLRenderer:
inner = self._render_blocks(block.get("blocks", []))
return f"<blockquote>{inner}</blockquote>"
def _render_engine_quote(self, block: Dict[str, Any]) -> str:
"""渲染单Engine发言块带独立配色与标题"""
engine_raw = (block.get("engine") or "").lower()
engine = engine_raw if engine_raw in {"insight", "media", "query"} else "insight"
title = (
block.get("title")
or {
"insight": "Insight Engine 发言",
"media": "Media Engine 发言",
"query": "Query Engine 发言",
}.get(engine, "Engine 发言")
)
inner = self._render_blocks(block.get("blocks", []))
return (
f'<div class="engine-quote engine-{self._escape_attr(engine)}">'
f' <div class="engine-quote__header">'
f' <span class="engine-quote__dot"></span>'
f' <span class="engine-quote__title">{self._escape_html(title)}</span>'
f' </div>'
f' <div class="engine-quote__body">{inner}</div>'
f'</div>'
)
def _render_code(self, block: Dict[str, Any]) -> str:
"""渲染代码块,附带语言信息"""
lang = block.get("lang") or ""
@@ -2392,6 +2417,16 @@ class HTMLRenderer:
--card-bg: {card};
--border-color: {border};
--shadow-color: {shadow};
--engine-insight-bg: #f4f7ff;
--engine-insight-border: #dce7ff;
--engine-insight-text: #1f4b99;
--engine-media-bg: #fff6ec;
--engine-media-border: #ffd9b3;
--engine-media-text: #b65a1a;
--engine-query-bg: #f1fbf5;
--engine-query-border: #c7ebd6;
--engine-query-text: #1d6b3f;
--engine-quote-shadow: 0 12px 30px rgba(0,0,0,0.04);
}}
.dark-mode {{
--bg-color: #121212;
@@ -2405,6 +2440,16 @@ class HTMLRenderer:
--card-bg: #1f1f1f;
--border-color: #2c2c2c;
--shadow-color: rgba(0, 0, 0, 0.4);
--engine-insight-bg: rgba(145, 202, 255, 0.08);
--engine-insight-border: rgba(145, 202, 255, 0.45);
--engine-insight-text: #9dc2ff;
--engine-media-bg: rgba(255, 196, 138, 0.08);
--engine-media-border: rgba(255, 196, 138, 0.45);
--engine-media-text: #ffcb9b;
--engine-query-bg: rgba(141, 215, 165, 0.08);
--engine-query-border: rgba(141, 215, 165, 0.45);
--engine-query-text: #a7e2ba;
--engine-quote-shadow: 0 12px 28px rgba(0, 0, 0, 0.35);
}}
* {{ box-sizing: border-box; }}
body {{
@@ -2416,7 +2461,7 @@ body {{
min-height: 100vh;
transition: background-color 0.45s ease, color 0.45s ease;
}}
.report-header, main, .hero-section, .chapter, .chart-card, .callout, .kpi-card, .toc, .table-wrap {{
.report-header, main, .hero-section, .chapter, .chart-card, .callout, .engine-quote, .kpi-card, .toc, .table-wrap {{
transition: background-color 0.45s ease, color 0.45s ease, border-color 0.45s ease, box-shadow 0.45s ease;
}}
.report-header {{
@@ -2785,6 +2830,49 @@ blockquote {{
background: rgba(0,0,0,0.04);
border-radius: 0 8px 8px 0;
}}
.engine-quote {{
--engine-quote-bg: var(--engine-insight-bg);
--engine-quote-border: var(--engine-insight-border);
--engine-quote-text: var(--engine-insight-text);
margin: 22px 0;
padding: 16px 18px;
border-radius: 14px;
border: 1px solid var(--engine-quote-border);
background: var(--engine-quote-bg);
box-shadow: var(--engine-quote-shadow);
line-height: 1.65;
}}
.engine-quote__header {{
display: flex;
align-items: center;
gap: 10px;
font-weight: 650;
color: var(--engine-quote-text);
margin-bottom: 8px;
letter-spacing: 0.02em;
}}
.engine-quote__dot {{
width: 10px;
height: 10px;
border-radius: 50%;
background: var(--engine-quote-text);
box-shadow: 0 0 0 8px rgba(0,0,0,0.02);
}}
.engine-quote__title {{
font-size: 0.98rem;
}}
.engine-quote__body > *:first-child {{ margin-top: 0; }}
.engine-quote__body > *:last-child {{ margin-bottom: 0; }}
.engine-quote.engine-media {{
--engine-quote-bg: var(--engine-media-bg);
--engine-quote-border: var(--engine-media-border);
--engine-quote-text: var(--engine-media-text);
}}
.engine-quote.engine-query {{
--engine-quote-bg: var(--engine-query-bg);
--engine-quote-border: var(--engine-query-border);
--engine-quote-text: var(--engine-query-text);
}}
.table-wrap {{
overflow-x: auto;
margin: 20px 0;
@@ -3020,34 +3108,35 @@ pre.code-block {{
}}
.chapter > *,
.hero-section,
.callout,
.chart-card,
.kpi-grid,
.table-wrap,
figure,
blockquote {{
break-inside: avoid;
page-break-inside: avoid;
max-width: 100%;
}}
.chapter h2,
.chapter h3,
.chapter h4 {{
break-after: avoid;
page-break-after: avoid;
break-inside: avoid;
}}
.chart-card,
.table-wrap {{
overflow: visible !important;
max-width: 100% !important;
box-sizing: border-box;
}}
.chart-card canvas {{
width: 100% !important;
height: auto !important;
max-width: 100% !important;
}}
.callout,
.engine-quote,
.chart-card,
.kpi-grid,
.table-wrap,
figure,
blockquote {{
break-inside: avoid;
page-break-inside: avoid;
max-width: 100%;
}}
.chapter h2,
.chapter h3,
.chapter h4 {{
break-after: avoid;
page-break-after: avoid;
break-inside: avoid;
}}
.chart-card,
.table-wrap {{
overflow: visible !important;
max-width: 100% !important;
box-sizing: border-box;
}}
.chart-card canvas {{
width: 100% !important;
height: auto !important;
max-width: 100% !important;
}}
.table-wrap {{
overflow-x: auto;
max-width: 100%;

View File

@@ -52,6 +52,90 @@ class ChapterSanitizationTestCase(unittest.TestCase):
"全国趋势",
)
def test_engine_quote_validation(self):
validator = IRValidator()
chapter = {
"chapterId": "S1",
"title": "Engine 引用校验",
"anchor": "section-1",
"order": 1,
"blocks": [
{
"type": "engineQuote",
"engine": "insight",
"blocks": [
{
"type": "paragraph",
"inlines": [{"text": "来自 Insight Engine 的观点"}],
}
],
}
],
}
valid, errors = validator.validate_chapter(chapter)
self.assertTrue(valid, errors)
self.assertFalse(errors)
def test_engine_quote_rejects_disallowed_marks_and_blocks(self):
validator = IRValidator()
chapter = {
"chapterId": "S1",
"title": "Engine 引用校验",
"anchor": "section-1",
"order": 1,
"blocks": [
{
"type": "engineQuote",
"engine": "media",
"blocks": [
{"type": "math", "latex": "x=y"},
{
"type": "paragraph",
"inlines": [
{"text": "test", "marks": [{"type": "color"}]}
],
},
],
}
],
}
valid, errors = validator.validate_chapter(chapter)
self.assertFalse(valid)
self.assertTrue(any("仅允许 paragraph" in err for err in errors))
self.assertTrue(any("仅允许 bold/italic" in err for err in errors))
def test_engine_quote_sanitization_strips_disallowed(self):
chapter = {
"blocks": [
{
"type": "engineQuote",
"engine": "query",
"blocks": [
{"type": "list", "items": [["非法"]]},
{
"type": "paragraph",
"inlines": [
{
"text": "abc",
"marks": [{"type": "bold"}, {"type": "highlight"}],
}
],
},
],
}
]
}
node = self.node
node._sanitize_chapter_blocks(chapter)
eq_block = chapter["blocks"][0]
self.assertEqual(eq_block["type"], "engineQuote")
inner_blocks = eq_block.get("blocks")
self.assertTrue(all(b.get("type") == "paragraph" for b in inner_blocks))
marks = inner_blocks[0]["inlines"][0].get("marks")
self.assertEqual(marks, [])
marks2 = inner_blocks[1]["inlines"][0].get("marks")
self.assertEqual(marks2, [{"type": "bold"}])
if __name__ == "__main__":
unittest.main()