Fixed the Issue of Charts being Repeatedly Repaired

This commit is contained in:
马一丁
2025-11-24 19:27:02 +08:00
parent 4be9cc8a19
commit 107a58a08a
3 changed files with 200 additions and 12 deletions

View File

@@ -90,6 +90,9 @@ class HTMLRenderer:
validator=self.chart_validator,
llm_repair_fns=llm_repair_fns
)
# 记录修复失败的图表避免多次触发LLM循环修复
self._chart_failure_notes: Dict[str, str] = {}
self._chart_failure_recorded: set[str] = set()
# 统计信息
self.chart_validation_stats = {
@@ -260,6 +263,8 @@ class HTMLRenderer:
'repaired_api': 0,
'failed': 0
}
# 每次渲染重新统计失败计数但保留失败原因避免重复LLM调用
self._chart_failure_recorded = set()
metadata = self.metadata
theme_tokens = metadata.get("themeTokens") or self.document.get("themeTokens", {})
@@ -1441,6 +1446,82 @@ class HTMLRenderer:
return True
def _chart_cache_key(self, block: Dict[str, Any]) -> str:
"""使用修复器的缓存算法生成稳定的key便于跨阶段共享结果"""
if hasattr(self, "chart_repairer") and block:
try:
return self.chart_repairer.build_cache_key(block)
except Exception:
pass
return str(id(block))
def _note_chart_failure(self, cache_key: str, reason: str) -> None:
"""记录修复失败原因,后续渲染直接使用占位提示"""
if not cache_key:
return
if not reason:
reason = "LLM返回的图表信息格式有误无法正常显示"
self._chart_failure_notes[cache_key] = reason
def _record_chart_failure_stat(self, cache_key: str | None = None) -> None:
"""确保失败计数只统计一次"""
if cache_key and cache_key in self._chart_failure_recorded:
return
self.chart_validation_stats['failed'] += 1
if cache_key:
self._chart_failure_recorded.add(cache_key)
def _format_chart_error_reason(
self,
validation_result: ValidationResult | None = None,
fallback_reason: str | None = None
) -> str:
"""拼接友好的失败提示"""
base = "LLM返回的图表信息格式有误已尝试本地与多模型修复但仍无法正常显示。"
detail = None
if validation_result:
if validation_result.errors:
detail = validation_result.errors[0]
elif validation_result.warnings:
detail = validation_result.warnings[0]
if not detail and fallback_reason:
detail = fallback_reason
if detail:
text = f"{base} 提示:{detail}"
return text[:180] + ("..." if len(text) > 180 else "")
return base
def _render_chart_error_placeholder(
self,
title: str | None,
reason: str,
widget_id: str | None = None
) -> str:
"""输出图表失败时的简洁占位提示避免破坏HTML/PDF布局"""
safe_title = self._escape_html(title or "图表未能展示")
safe_reason = self._escape_html(reason)
widget_attr = f' data-widget-id="{self._escape_attr(widget_id)}"' if widget_id else ""
return f"""
<div class="chart-card chart-card--error"{widget_attr}>
<div class="chart-error">
<div class="chart-error__icon">!</div>
<div class="chart-error__body">
<div class="chart-error__title">{safe_title}</div>
<p class="chart-error__desc">{safe_reason}</p>
</div>
</div>
</div>
"""
def _has_chart_failure(self, block: Dict[str, Any]) -> tuple[bool, str | None]:
"""检查是否已有修复失败记录"""
cache_key = self._chart_cache_key(block)
if block.get("_chart_renderable") is False:
return True, block.get("_chart_error_reason")
if cache_key in self._chart_failure_notes:
return True, self._chart_failure_notes.get(cache_key)
return False, None
def _normalize_chart_block(
self,
block: Dict[str, Any],
@@ -1522,7 +1603,7 @@ class HTMLRenderer:
1. 验证图表数据格式
2. 如果无效,尝试本地修复
3. 如果本地修复失败尝试API修复
4. 如果所有修复都失败,使用原始数据(前端会降级处理)
4. 如果所有修复都失败,输出提示占位并跳过再次修复
参数:
block: widget类型的block包含widgetId/props/data。
@@ -1537,10 +1618,21 @@ class HTMLRenderer:
widget_type = block.get('widgetType', '')
is_chart = isinstance(widget_type, str) and widget_type.startswith('chart.js')
is_wordcloud = isinstance(widget_type, str) and 'wordcloud' in widget_type.lower()
widget_id = block.get('widgetId')
cache_key = self._chart_cache_key(block) if is_chart else ""
props_snapshot = block.get("props") if isinstance(block.get("props"), dict) else {}
display_title = props_snapshot.get("title") or block.get("title") or widget_id or "图表"
if is_chart:
self.chart_validation_stats['total'] += 1
# 如果此前已记录失败,直接使用占位提示,避免重复修复
has_failed, cached_reason = self._has_chart_failure(block)
if has_failed:
self._record_chart_failure_stat(cache_key)
reason = cached_reason or "LLM返回的图表信息格式有误无法正常显示"
return self._render_chart_error_placeholder(display_title, reason, widget_id)
# 验证图表数据
validation_result = self.chart_validator.validate(block)
@@ -1566,12 +1658,16 @@ class HTMLRenderer:
elif repair_result.method == 'api':
self.chart_validation_stats['repaired_api'] += 1
else:
# 修复失败,使用原始数据,前端会尝试降级渲染
# 修复失败,记录失败并输出占位提示
fail_reason = self._format_chart_error_reason(validation_result)
block["_chart_renderable"] = False
block["_chart_error_reason"] = fail_reason
self._note_chart_failure(cache_key, fail_reason)
self._record_chart_failure_stat(cache_key)
logger.warning(
f"图表 {block.get('widgetId', 'unknown')} 修复失败,"
f"将使用原始数据前端会尝试降级渲染或显示fallback"
f"图表 {block.get('widgetId', 'unknown')} 修复失败,已跳过渲染: {fail_reason}"
)
self.chart_validation_stats['failed'] += 1
return self._render_chart_error_placeholder(display_title, fail_reason, widget_id)
else:
# 验证通过
self.chart_validation_stats['valid'] += 1
@@ -1725,7 +1821,7 @@ class HTMLRenderer:
logger.warning(
f" ✗ 修复失败: {stats['failed']} "
f"({stats['failed']/stats['total']*100:.1f}%) - "
f"这些图表将使用降级渲染或显示fallback表格"
f"这些图表将展示简洁占位提示"
)
logger.info("=" * 60)
@@ -2558,6 +2654,41 @@ table th {{
border-radius: 12px;
background: rgba(0,0,0,0.01);
}}
.chart-card.chart-card--error {{
border-style: dashed;
background: linear-gradient(135deg, rgba(0,0,0,0.015), rgba(0,0,0,0.04));
}}
.chart-error {{
display: flex;
gap: 12px;
padding: 14px 12px;
border-radius: 10px;
align-items: flex-start;
background: rgba(0,0,0,0.03);
color: var(--secondary-color);
}}
.chart-error__icon {{
width: 28px;
height: 28px;
flex-shrink: 0;
border-radius: 50%;
display: inline-flex;
align-items: center;
justify-content: center;
font-weight: 700;
color: var(--secondary-color-dark);
background: rgba(0,0,0,0.06);
font-size: 0.9rem;
}}
.chart-error__title {{
font-weight: 600;
color: var(--text-color);
}}
.chart-error__desc {{
margin: 4px 0 0;
color: var(--secondary-color);
line-height: 1.6;
}}
.chart-card.wordcloud-card .chart-container {{
min-height: 260px;
}}

View File

@@ -211,8 +211,15 @@ class PDFRenderer:
)
else:
repair_stats['failed'] += 1
reason = self.html_renderer._format_chart_error_reason(validation)
block["_chart_renderable"] = False
block["_chart_error_reason"] = reason
self.html_renderer._note_chart_failure(
self.html_renderer._chart_cache_key(block),
reason
)
logger.warning(
f"图表 {block.get('widgetId')} 修复失败,将使用原始数据"
f"图表 {block.get('widgetId')} 修复失败,将使用占位提示: {reason}"
)
# 递归处理嵌套的blocks
@@ -324,6 +331,13 @@ class PDFRenderer:
# 只处理chart.js类型的widget
if widget_id and widget_type.startswith('chart.js'):
failed, fail_reason = self.html_renderer._has_chart_failure(block)
if block.get("_chart_renderable") is False or failed:
logger.debug(
f"跳过转换失败的图表 {widget_id}"
f"{f',原因: {fail_reason}' if fail_reason else ''}"
)
continue
try:
svg_content = self.chart_converter.convert_widget_to_svg(
block,

View File

@@ -21,6 +21,7 @@ from __future__ import annotations
import copy
import json
import hashlib
from typing import Any, Dict, List, Optional, Tuple, Callable
from dataclasses import dataclass
from loguru import logger
@@ -383,6 +384,30 @@ class ChartRepairer:
"""
self.validator = validator
self.llm_repair_fns = llm_repair_fns or []
# 缓存修复结果避免同一个图表在多处被重复调用LLM
self._result_cache: Dict[str, RepairResult] = {}
def build_cache_key(self, widget_block: Dict[str, Any]) -> str:
"""
为图表生成稳定的缓存key保证同样的数据不会重复触发修复。
- 优先使用widgetId
- 结合数据内容的哈希避免同ID但内容变化时误用旧结果。
"""
widget_id = ""
if isinstance(widget_block, dict):
widget_id = widget_block.get('widgetId') or widget_block.get('id') or ""
try:
serialized = json.dumps(
widget_block,
ensure_ascii=False,
sort_keys=True,
default=str
)
except Exception:
serialized = repr(widget_block)
digest = hashlib.md5(serialized.encode('utf-8', errors='ignore')).hexdigest()
return f"{widget_id}:{digest}"
def repair(
self,
@@ -399,6 +424,20 @@ class ChartRepairer:
Returns:
RepairResult: 修复结果
"""
cache_key = self.build_cache_key(widget_block)
cached = self._result_cache.get(cache_key)
if cached:
# 返回缓存的深拷贝,避免外部修改影响缓存
return copy.deepcopy(cached)
def _cache_and_return(res: RepairResult) -> RepairResult:
try:
self._result_cache[cache_key] = copy.deepcopy(res)
except Exception:
self._result_cache[cache_key] = res
return res
# 1. 如果没有验证结果,先验证
if validation_result is None:
validation_result = self.validator.validate(widget_block)
@@ -412,7 +451,9 @@ class ChartRepairer:
repaired_validation = self.validator.validate(local_result.repaired_block)
if repaired_validation.is_valid:
logger.info(f"本地修复成功: {local_result.changes}")
return RepairResult(True, local_result.repaired_block, 'local', local_result.changes)
return _cache_and_return(
RepairResult(True, local_result.repaired_block, 'local', local_result.changes)
)
else:
logger.warning(f"本地修复后仍然无效: {repaired_validation.errors}")
@@ -426,20 +467,22 @@ class ChartRepairer:
repaired_validation = self.validator.validate(api_result.repaired_block)
if repaired_validation.is_valid:
logger.info(f"API修复成功: {api_result.changes}")
return api_result
return _cache_and_return(api_result)
else:
logger.warning(f"API修复后仍然无效: {repaired_validation.errors}")
# 5. 如果验证通过,返回原始或修复后的数据
if validation_result.is_valid:
if local_result.has_changes():
return RepairResult(True, local_result.repaired_block, 'local', local_result.changes)
return _cache_and_return(
RepairResult(True, local_result.repaired_block, 'local', local_result.changes)
)
else:
return RepairResult(True, widget_block, 'none', [])
return _cache_and_return(RepairResult(True, widget_block, 'none', []))
# 6. 所有修复都失败,返回原始数据
logger.warning("所有修复尝试失败,保持原始数据")
return RepairResult(False, widget_block, 'none', [])
return _cache_and_return(RepairResult(False, widget_block, 'none', []))
def repair_locally(
self,