Single-Agent Speaking Blocks Available

2025-12-04 17:15:44 +08:00
parent 25abc25c9d
commit af8c8815de
6 changed files with 324 additions and 45 deletions
--- a/ReportEngine/ir/schema.py
+++ b/ReportEngine/ir/schema.py
@@ -34,6 +34,7 @@ ALLOWED_BLOCK_TYPES: List[str] = [
    "list",
    "table",
    "blockquote",
+    "engineQuote",
    "hr",
    "code",
    "math",
@@ -177,6 +178,22 @@ blockquote_block: Dict[str, Any] = {
    "additionalProperties": True,
 }

+engine_quote_block: Dict[str, Any] = {
+    "title": "EngineQuoteBlock",
+    "type": "object",
+    "properties": {
+        "type": {"const": "engineQuote"},
+        "engine": {"type": "string", "enum": ["insight", "media", "query"]},
+        "title": {"type": "string"},
+        "blocks": {
+            "type": "array",
+            "items": {"$ref": "#/definitions/block"},
+        },
+    },
+    "required": ["type", "engine", "blocks"],
+    "additionalProperties": True,
+}
+
 hr_block: Dict[str, Any] = {
    "title": "HorizontalRuleBlock",
    "type": "object",
@@ -315,6 +332,7 @@ block_variants: List[Dict[str, Any]] = [
    list_block,
    table_block,
    blockquote_block,
+    engine_quote_block,
    hr_block,
    code_block,
    math_block,
--- a/ReportEngine/ir/validator.py
+++ b/ReportEngine/ir/validator.py
@@ -138,6 +138,45 @@ class IRValidator:
        for idx, sub_block in enumerate(inner):
            self._validate_block(sub_block, f"{path}.blocks[{idx}]", errors)

+    def _validate_engineQuote_block(
+        self, block: Dict[str, Any], path: str, errors: List[str]
+    ):
+        """单引擎发言块需标注engine并包含子blocks"""
+        engine = block.get("engine")
+        if engine not in {"insight", "media", "query"}:
+            errors.append(f"{path}.engine 取值非法: {engine}")
+        inner = block.get("blocks")
+        if not isinstance(inner, list) or not inner:
+            errors.append(f"{path}.blocks 必须是非空数组")
+            return
+        for idx, sub_block in enumerate(inner):
+            sub_path = f"{path}.blocks[{idx}]"
+            if not isinstance(sub_block, dict):
+                errors.append(f"{sub_path} 必须是对象")
+                continue
+            if sub_block.get("type") != "paragraph":
+                errors.append(f"{sub_path}.type 仅允许 paragraph")
+                continue
+            # 复用 paragraph 结构校验，但限制 marks
+            inlines = sub_block.get("inlines")
+            if not isinstance(inlines, list) or not inlines:
+                errors.append(f"{sub_path}.inlines 必须是非空数组")
+                continue
+            for ridx, run in enumerate(inlines):
+                self._validate_inline_run(run, f"{sub_path}.inlines[{ridx}]", errors)
+                if not isinstance(run, dict):
+                    continue
+                marks = run.get("marks") or []
+                if not isinstance(marks, list):
+                    errors.append(f"{sub_path}.inlines[{ridx}].marks 必须是数组")
+                    continue
+                for midx, mark in enumerate(marks):
+                    mark_type = mark.get("type") if isinstance(mark, dict) else None
+                    if mark_type not in {"bold", "italic"}:
+                        errors.append(
+                            f"{sub_path}.inlines[{ridx}].marks[{midx}].type 仅允许 bold/italic"
+                        )
+
    def _validate_callout_block(self, block: Dict[str, Any], path: str, errors: List[str]):
        """callout需声明tone，并至少有一个子block"""
        tone = block.get("tone")
--- a/ReportEngine/nodes/chapter_generation_node.py
+++ b/ReportEngine/nodes/chapter_generation_node.py
@@ -889,7 +889,7 @@ class ChapterGenerationNode(BaseNode):
                        block["items"] = normalized
                    for entry in block.get("items", []):
                        walk(entry)
-                elif block_type in {"callout", "blockquote"}:
+                elif block_type in {"callout", "blockquote", "engineQuote"}:
                    walk(block.get("blocks"))
                elif block_type == "table":
                    for row in block.get("rows", []):
@@ -994,7 +994,7 @@ class ChapterGenerationNode(BaseNode):
                    total += walk(item)
                return total

-            if block_type in {"blockquote", "callout"}:
+            if block_type in {"blockquote", "callout", "engineQuote"}:
                return walk(node.get("blocks"))

            if block_type == "table":
@@ -1015,7 +1015,7 @@ class ChapterGenerationNode(BaseNode):

    def _count_narrative_characters(self, blocks: Any) -> int:
        """
-        统计paragraph/callout/list/blockquote等叙述性结构的字符数，避免被表格/图表“刷长”。
+        统计paragraph/callout/list/blockquote/engineQuote等叙述性结构的字符数，避免被表格/图表“刷长”。
        """

        def walk(node: Any) -> int:
@@ -1037,7 +1037,7 @@ class ChapterGenerationNode(BaseNode):
                for item in node.get("items", []):
                    total += walk(item)
                return total
-            if block_type in {"callout", "blockquote"}:
+            if block_type in {"callout", "blockquote", "engineQuote"}:
                return walk(node.get("blocks"))

            # list项可能是匿名dict，兼容性遍历
@@ -1072,12 +1072,60 @@ class ChapterGenerationNode(BaseNode):
            self._normalize_paragraph_block(block)
        elif block_type == "table":
            self._sanitize_table_block(block)
+        elif block_type == "engineQuote":
+            self._sanitize_engine_quote_block(block)

    def _sanitize_table_block(self, block: Dict[str, Any]):
        """保证表格的rows/cells结构合法且每个单元格包含至少一个block"""
        rows = self._normalize_table_rows(block.get("rows"))
        block["rows"] = rows

+    def _sanitize_engine_quote_block(self, block: Dict[str, Any]):
+        """engineQuote内部仅允许paragraph，且仅保留bold/italic样式"""
+        allowed_marks = {"bold", "italic"}
+        raw_blocks = block.get("blocks")
+        candidates = raw_blocks if isinstance(raw_blocks, list) else ([raw_blocks] if raw_blocks else [])
+        sanitized_blocks: List[Dict[str, Any]] = []
+
+        for item in candidates:
+            if isinstance(item, dict) and item.get("type") == "paragraph":
+                para = dict(item)
+            else:
+                text = self._extract_block_text(item) if isinstance(item, dict) else (item or "")
+                para = self._as_paragraph_block(str(text))
+
+            inlines = para.get("inlines")
+            if not isinstance(inlines, list) or not inlines:
+                inlines = [self._as_inline_run(self._extract_block_text(para))]
+
+            cleaned_inlines: List[Dict[str, Any]] = []
+            for run in inlines:
+                if isinstance(run, dict):
+                    text_val = run.get("text")
+                    text_str = text_val if isinstance(text_val, str) else ("" if text_val is None else str(text_val))
+                    marks_raw = run.get("marks") if isinstance(run.get("marks"), list) else []
+                    marks_filtered: List[Dict[str, Any]] = []
+                    for mark in marks_raw:
+                        if not isinstance(mark, dict):
+                            continue
+                        mark_type = mark.get("type")
+                        if mark_type in allowed_marks:
+                            marks_filtered.append({"type": mark_type})
+                    cleaned_inlines.append({"text": text_str, "marks": marks_filtered})
+                else:
+                    cleaned_inlines.append(self._as_inline_run(str(run)))
+
+            if not cleaned_inlines:
+                cleaned_inlines.append(self._as_inline_run(""))
+            para["inlines"] = cleaned_inlines
+            para["type"] = "paragraph"
+            para.pop("blocks", None)
+            sanitized_blocks.append(para)
+
+        if not sanitized_blocks:
+            sanitized_blocks.append(self._as_paragraph_block(""))
+        block["blocks"] = sanitized_blocks
+
    def _normalize_table_rows(self, rows: Any) -> List[Dict[str, Any]]:
        """确保rows始终是由row对象组成的列表"""
        if rows is None:
@@ -1250,9 +1298,9 @@ class ChapterGenerationNode(BaseNode):
        return merged

    def _merge_nested_fragments(self, block: Dict[str, Any]) -> Dict[str, Any]:
-        """对嵌套结构（callout/list/table）递归处理片段合并"""
+        """对嵌套结构（callout/blockquote/engineQuote/list/table）递归处理片段合并"""
        block_type = block.get("type")
-        if block_type in {"callout", "blockquote"}:
+        if block_type in {"callout", "blockquote", "engineQuote"}:
            nested = block.get("blocks")
            if isinstance(nested, list):
                block["blocks"] = self._merge_fragment_sequences(nested)
--- a/ReportEngine/prompts/prompts.py
+++ b/ReportEngine/prompts/prompts.py
@@ -306,16 +306,17 @@ SYSTEM_PROMPT_CHAPTER_JSON = f"""
 5. 表格需给出rows/cells/align，KPI卡请使用kpiGrid，分割线用hr。
 6. 如需引用图表/交互组件，统一用widgetType表示（例如chart.js/line、chart.js/doughnut）。
 7. 鼓励结合outline中列出的子标题，生成多层heading与细粒度内容，同时可补充callout、blockquote等。
-8. 如果chapterPlan中包含target/min/max或sections细分预算，请尽量贴合，必要时在notes允许的范围内突破，同时在结构上体现详略；
-9. 一级标题需使用中文数字（“一、二、三”），二级标题使用阿拉伯数字（“1.1、1.2”），heading.text中直接写好编号，与outline顺序对应；
-10. 严禁输出外部图片/AI生图链接，仅可使用Chart.js图表、表格、色块、callout等HTML原生组件；如需视觉辅助请改为文字描述或数据表；
-11. 段落混排需通过marks表达粗体、斜体、下划线、颜色等样式，禁止残留Markdown语法（如**text**）；
-12. 行间公式用block.type="math"并填入math.latex，行内公式在paragraph.inlines里将文本设为Latex并加上marks.type="math"，渲染层会用MathJax处理；
-13. widget配色需与CSS变量兼容，不要硬编码背景色或文字色，legend/ticks由渲染层控制；
-14. 善用callout、kpiGrid、表格、widget等提升版面丰富度，但必须遵守模板章节范围。
-15. 输出前务必自检JSON语法：禁止出现`{{}}{{`或`][`相连缺少逗号、列表项嵌套超过一层、未闭合的括号或未转义换行，`list` block的items必须是`[[block,...], ...]`结构，若无法满足则返回错误提示而不是输出不合法JSON。
-16. 所有widget块必须在顶层提供`data`或`dataRef`（可将props中的`data`上移），确保Chart.js能够直接渲染；缺失数据时宁可输出表格或段落，绝不留空。
-17. 任何block都必须声明合法`type`（heading/paragraph/list/...）；若需要普通文本请使用`paragraph`并给出`inlines`，禁止返回`type:null`或未知值。
+8. 如需标注某个引擎的原话，请用 block.type="engineQuote"，engine 取值 insight/media/query（仅限这三种），内部 blocks 只允许 paragraph，paragraph.inlines 的 marks 仅可使用 bold/italic（可留空），禁止在 engineQuote 中放表格/图表/引用/公式等。
+9. 如果chapterPlan中包含target/min/max或sections细分预算，请尽量贴合，必要时在notes允许的范围内突破，同时在结构上体现详略；
+10. 一级标题需使用中文数字（“一、二、三”），二级标题使用阿拉伯数字（“1.1、1.2”），heading.text中直接写好编号，与outline顺序对应；
+11. 严禁输出外部图片/AI生图链接，仅可使用Chart.js图表、表格、色块、callout等HTML原生组件；如需视觉辅助请改为文字描述或数据表；
+12. 段落混排需通过marks表达粗体、斜体、下划线、颜色等样式，禁止残留Markdown语法（如**text**）；
+13. 行间公式用block.type="math"并填入math.latex，行内公式在paragraph.inlines里将文本设为Latex并加上marks.type="math"，渲染层会用MathJax处理；
+14. widget配色需与CSS变量兼容，不要硬编码背景色或文字色，legend/ticks由渲染层控制；
+15. 善用callout、kpiGrid、表格、widget等提升版面丰富度，但必须遵守模板章节范围。
+16. 输出前务必自检JSON语法：禁止出现`{{}}{{`或`][`相连缺少逗号、列表项嵌套超过一层、未闭合的括号或未转义换行，`list` block的items必须是`[[block,...], ...]`结构，若无法满足则返回错误提示而不是输出不合法JSON。
+17. 所有widget块必须在顶层提供`data`或`dataRef`（可将props中的`data`上移），确保Chart.js能够直接渲染；缺失数据时宁可输出表格或段落，绝不留空。
+18. 任何block都必须声明合法`type`（heading/paragraph/list/...）；若需要普通文本请使用`paragraph`并给出`inlines`，禁止返回`type:null`或未知值。

 <CHAPTER JSON SCHEMA>
 {CHAPTER_JSON_SCHEMA_TEXT}
--- a/ReportEngine/renderers/html_renderer.py
+++ b/ReportEngine/renderers/html_renderer.py
@@ -47,6 +47,7 @@ class HTMLRenderer:
        "math",
        "figure",
        "kpiGrid",
+        "engineQuote",
    }
    INLINE_ARTIFACT_KEYS = {
        "props",
@@ -1020,6 +1021,7 @@ class HTMLRenderer:
            "list": self._render_list,
            "table": self._render_table,
            "blockquote": self._render_blockquote,
+            "engineQuote": self._render_engine_quote,
            "hr": lambda b: "<hr />",
            "code": self._render_code,
            "math": self._render_math,
@@ -1282,6 +1284,29 @@ class HTMLRenderer:
        inner = self._render_blocks(block.get("blocks", []))
        return f"<blockquote>{inner}</blockquote>"

+    def _render_engine_quote(self, block: Dict[str, Any]) -> str:
+        """渲染单Engine发言块，带独立配色与标题"""
+        engine_raw = (block.get("engine") or "").lower()
+        engine = engine_raw if engine_raw in {"insight", "media", "query"} else "insight"
+        title = (
+            block.get("title")
+            or {
+                "insight": "Insight Engine 发言",
+                "media": "Media Engine 发言",
+                "query": "Query Engine 发言",
+            }.get(engine, "Engine 发言")
+        )
+        inner = self._render_blocks(block.get("blocks", []))
+        return (
+            f'<div class="engine-quote engine-{self._escape_attr(engine)}">'
+            f'  <div class="engine-quote__header">'
+            f'    <span class="engine-quote__dot"></span>'
+            f'    <span class="engine-quote__title">{self._escape_html(title)}</span>'
+            f'  </div>'
+            f'  <div class="engine-quote__body">{inner}</div>'
+            f'</div>'
+        )
+
    def _render_code(self, block: Dict[str, Any]) -> str:
        """渲染代码块，附带语言信息"""
        lang = block.get("lang") or ""
@@ -2392,6 +2417,16 @@ class HTMLRenderer:
  --card-bg: {card};
  --border-color: {border};
  --shadow-color: {shadow};
+  --engine-insight-bg: #f4f7ff;
+  --engine-insight-border: #dce7ff;
+  --engine-insight-text: #1f4b99;
+  --engine-media-bg: #fff6ec;
+  --engine-media-border: #ffd9b3;
+  --engine-media-text: #b65a1a;
+  --engine-query-bg: #f1fbf5;
+  --engine-query-border: #c7ebd6;
+  --engine-query-text: #1d6b3f;
+  --engine-quote-shadow: 0 12px 30px rgba(0,0,0,0.04);
 }}
 .dark-mode {{
  --bg-color: #121212;
@@ -2405,6 +2440,16 @@ class HTMLRenderer:
  --card-bg: #1f1f1f;
  --border-color: #2c2c2c;
  --shadow-color: rgba(0, 0, 0, 0.4);
+  --engine-insight-bg: rgba(145, 202, 255, 0.08);
+  --engine-insight-border: rgba(145, 202, 255, 0.45);
+  --engine-insight-text: #9dc2ff;
+  --engine-media-bg: rgba(255, 196, 138, 0.08);
+  --engine-media-border: rgba(255, 196, 138, 0.45);
+  --engine-media-text: #ffcb9b;
+  --engine-query-bg: rgba(141, 215, 165, 0.08);
+  --engine-query-border: rgba(141, 215, 165, 0.45);
+  --engine-query-text: #a7e2ba;
+  --engine-quote-shadow: 0 12px 28px rgba(0, 0, 0, 0.35);
 }}
 * {{ box-sizing: border-box; }}
 body {{
@@ -2416,7 +2461,7 @@ body {{
  min-height: 100vh;
  transition: background-color 0.45s ease, color 0.45s ease;
 }}
-.report-header, main, .hero-section, .chapter, .chart-card, .callout, .kpi-card, .toc, .table-wrap {{
+.report-header, main, .hero-section, .chapter, .chart-card, .callout, .engine-quote, .kpi-card, .toc, .table-wrap {{
  transition: background-color 0.45s ease, color 0.45s ease, border-color 0.45s ease, box-shadow 0.45s ease;
 }}
 .report-header {{
@@ -2785,6 +2830,49 @@ blockquote {{
  background: rgba(0,0,0,0.04);
  border-radius: 0 8px 8px 0;
 }}
+.engine-quote {{
+  --engine-quote-bg: var(--engine-insight-bg);
+  --engine-quote-border: var(--engine-insight-border);
+  --engine-quote-text: var(--engine-insight-text);
+  margin: 22px 0;
+  padding: 16px 18px;
+  border-radius: 14px;
+  border: 1px solid var(--engine-quote-border);
+  background: var(--engine-quote-bg);
+  box-shadow: var(--engine-quote-shadow);
+  line-height: 1.65;
+}}
+.engine-quote__header {{
+  display: flex;
+  align-items: center;
+  gap: 10px;
+  font-weight: 650;
+  color: var(--engine-quote-text);
+  margin-bottom: 8px;
+  letter-spacing: 0.02em;
+}}
+.engine-quote__dot {{
+  width: 10px;
+  height: 10px;
+  border-radius: 50%;
+  background: var(--engine-quote-text);
+  box-shadow: 0 0 0 8px rgba(0,0,0,0.02);
+}}
+.engine-quote__title {{
+  font-size: 0.98rem;
+}}
+.engine-quote__body > *:first-child {{ margin-top: 0; }}
+.engine-quote__body > *:last-child {{ margin-bottom: 0; }}
+.engine-quote.engine-media {{
+  --engine-quote-bg: var(--engine-media-bg);
+  --engine-quote-border: var(--engine-media-border);
+  --engine-quote-text: var(--engine-media-text);
+}}
+.engine-quote.engine-query {{
+  --engine-quote-bg: var(--engine-query-bg);
+  --engine-quote-border: var(--engine-query-border);
+  --engine-quote-text: var(--engine-query-text);
+}}
 .table-wrap {{
  overflow-x: auto;
  margin: 20px 0;
@@ -3020,34 +3108,35 @@ pre.code-block {{
  }}
  .chapter > *,
  .hero-section,
-.callout,
-.chart-card,
-.kpi-grid,
-.table-wrap,
-figure,
-blockquote {{
-  break-inside: avoid;
-  page-break-inside: avoid;
-  max-width: 100%;
-}}
-.chapter h2,
-.chapter h3,
-.chapter h4 {{
-  break-after: avoid;
-  page-break-after: avoid;
-  break-inside: avoid;
-}}
-.chart-card,
-.table-wrap {{
-  overflow: visible !important;
-  max-width: 100% !important;
-  box-sizing: border-box;
-}}
-.chart-card canvas {{
-  width: 100% !important;
-  height: auto !important;
-  max-width: 100% !important;
-}}
+  .callout,
+  .engine-quote,
+  .chart-card,
+  .kpi-grid,
+  .table-wrap,
+  figure,
+  blockquote {{
+    break-inside: avoid;
+    page-break-inside: avoid;
+    max-width: 100%;
+  }}
+  .chapter h2,
+  .chapter h3,
+  .chapter h4 {{
+    break-after: avoid;
+    page-break-after: avoid;
+    break-inside: avoid;
+  }}
+  .chart-card,
+  .table-wrap {{
+    overflow: visible !important;
+    max-width: 100% !important;
+    box-sizing: border-box;
+  }}
+  .chart-card canvas {{
+    width: 100% !important;
+    height: auto !important;
+    max-width: 100% !important;
+  }}
 .table-wrap {{
  overflow-x: auto;
  max-width: 100%;
--- a/tests/test_report_engine_sanitization.py
+++ b/tests/test_report_engine_sanitization.py
@@ -52,6 +52,90 @@ class ChapterSanitizationTestCase(unittest.TestCase):
            "全国趋势",
        )

+    def test_engine_quote_validation(self):
+        validator = IRValidator()
+        chapter = {
+            "chapterId": "S1",
+            "title": "Engine 引用校验",
+            "anchor": "section-1",
+            "order": 1,
+            "blocks": [
+                {
+                    "type": "engineQuote",
+                    "engine": "insight",
+                    "blocks": [
+                        {
+                            "type": "paragraph",
+                            "inlines": [{"text": "来自 Insight Engine 的观点"}],
+                        }
+                    ],
+                }
+            ],
+        }
+        valid, errors = validator.validate_chapter(chapter)
+        self.assertTrue(valid, errors)
+        self.assertFalse(errors)
+
+    def test_engine_quote_rejects_disallowed_marks_and_blocks(self):
+        validator = IRValidator()
+        chapter = {
+            "chapterId": "S1",
+            "title": "Engine 引用校验",
+            "anchor": "section-1",
+            "order": 1,
+            "blocks": [
+                {
+                    "type": "engineQuote",
+                    "engine": "media",
+                    "blocks": [
+                        {"type": "math", "latex": "x=y"},
+                        {
+                            "type": "paragraph",
+                            "inlines": [
+                                {"text": "test", "marks": [{"type": "color"}]}
+                            ],
+                        },
+                    ],
+                }
+            ],
+        }
+        valid, errors = validator.validate_chapter(chapter)
+        self.assertFalse(valid)
+        self.assertTrue(any("仅允许 paragraph" in err for err in errors))
+        self.assertTrue(any("仅允许 bold/italic" in err for err in errors))
+
+    def test_engine_quote_sanitization_strips_disallowed(self):
+        chapter = {
+            "blocks": [
+                {
+                    "type": "engineQuote",
+                    "engine": "query",
+                    "blocks": [
+                        {"type": "list", "items": [["非法"]]},
+                        {
+                            "type": "paragraph",
+                            "inlines": [
+                                {
+                                    "text": "abc",
+                                    "marks": [{"type": "bold"}, {"type": "highlight"}],
+                                }
+                            ],
+                        },
+                    ],
+                }
+            ]
+        }
+        node = self.node
+        node._sanitize_chapter_blocks(chapter)
+        eq_block = chapter["blocks"][0]
+        self.assertEqual(eq_block["type"], "engineQuote")
+        inner_blocks = eq_block.get("blocks")
+        self.assertTrue(all(b.get("type") == "paragraph" for b in inner_blocks))
+        marks = inner_blocks[0]["inlines"][0].get("marks")
+        self.assertEqual(marks, [])
+        marks2 = inner_blocks[1]["inlines"][0].get("marks")
+        self.assertEqual(marks2, [{"type": "bold"}])
+

 if __name__ == "__main__":
    unittest.main()