Repair and Optimize the Chart Rendering

This commit is contained in:
马一丁
2025-11-17 16:34:44 +08:00
parent 50b6ab403e
commit c20cc24c78
4 changed files with 1579 additions and 1 deletions

View File

@@ -11,6 +11,15 @@ import json
import os
from pathlib import Path
from typing import Any, Dict, List
from loguru import logger
from ReportEngine.utils.chart_validator import (
ChartValidator,
ChartRepairer,
create_chart_validator,
create_chart_repairer
)
from ReportEngine.utils.chart_repair_api import create_llm_repair_functions
class HTMLRenderer:
@@ -65,6 +74,23 @@ class HTMLRenderer:
self.hero_kpi_signature: tuple | None = None
self._lib_cache: Dict[str, str] = {}
# 初始化图表验证和修复器
self.chart_validator = create_chart_validator()
llm_repair_fns = create_llm_repair_functions()
self.chart_repairer = create_chart_repairer(
validator=self.chart_validator,
llm_repair_fns=llm_repair_fns
)
# 统计信息
self.chart_validation_stats = {
'total': 0,
'valid': 0,
'repaired_locally': 0,
'repaired_api': 0,
'failed': 0
}
@staticmethod
def _get_lib_path() -> Path:
"""获取第三方库文件的目录路径"""
@@ -124,6 +150,15 @@ class HTMLRenderer:
self.heading_label_map = self._compute_heading_labels(self.chapters)
self.toc_entries = self._collect_toc_entries(self.chapters)
# 重置图表验证统计
self.chart_validation_stats = {
'total': 0,
'valid': 0,
'repaired_locally': 0,
'repaired_api': 0,
'failed': 0
}
metadata = self.metadata
theme_tokens = metadata.get("themeTokens") or self.document.get("themeTokens", {})
title = metadata.get("title") or metadata.get("query") or "智能舆情报告"
@@ -132,6 +167,10 @@ class HTMLRenderer:
head = self._render_head(title, theme_tokens)
body = self._render_body()
# 输出图表验证统计
self._log_chart_validation_stats()
return f"<!DOCTYPE html>\n<html lang=\"zh-CN\" class=\"no-js\">\n{head}\n{body}\n</html>"
# ====== 头部 / 正文 ======
@@ -1150,12 +1189,66 @@ class HTMLRenderer:
"""
渲染Chart.js等交互组件的占位容器并记录配置JSON。
在渲染前进行图表验证和修复:
1. 验证图表数据格式
2. 如果无效,尝试本地修复
3. 如果本地修复失败尝试API修复
4. 如果所有修复都失败,使用原始数据(前端会降级处理)
参数:
block: widget类型的block包含widgetId/props/data。
返回:
str: 含canvas与配置脚本的HTML。
"""
# 统计
widget_type = block.get('widgetType', '')
is_chart = isinstance(widget_type, str) and widget_type.startswith('chart.js')
if is_chart:
self.chart_validation_stats['total'] += 1
# 验证图表数据
validation_result = self.chart_validator.validate(block)
if not validation_result.is_valid:
logger.warning(
f"图表 {block.get('widgetId', 'unknown')} 验证失败: {validation_result.errors}"
)
# 尝试修复
repair_result = self.chart_repairer.repair(block, validation_result)
if repair_result.success and repair_result.repaired_block:
# 修复成功,使用修复后的数据
block = repair_result.repaired_block
logger.info(
f"图表 {block.get('widgetId', 'unknown')} 修复成功 "
f"(方法: {repair_result.method}): {repair_result.changes}"
)
# 更新统计
if repair_result.method == 'local':
self.chart_validation_stats['repaired_locally'] += 1
elif repair_result.method == 'api':
self.chart_validation_stats['repaired_api'] += 1
else:
# 修复失败,使用原始数据,前端会尝试降级渲染
logger.warning(
f"图表 {block.get('widgetId', 'unknown')} 修复失败,"
f"将使用原始数据前端会尝试降级渲染或显示fallback"
)
self.chart_validation_stats['failed'] += 1
else:
# 验证通过
self.chart_validation_stats['valid'] += 1
if validation_result.warnings:
logger.info(
f"图表 {block.get('widgetId', 'unknown')} 验证通过,"
f"但有警告: {validation_result.warnings}"
)
# 渲染图表HTML
self.chart_counter += 1
canvas_id = f"chart-{self.chart_counter}"
config_id = f"chart-config-{self.chart_counter}"
@@ -1220,6 +1313,39 @@ class HTMLRenderer:
"""
return table_html
def _log_chart_validation_stats(self):
"""输出图表验证统计信息"""
stats = self.chart_validation_stats
if stats['total'] == 0:
return
logger.info("=" * 60)
logger.info("图表验证统计")
logger.info("=" * 60)
logger.info(f"总图表数量: {stats['total']}")
logger.info(f" ✓ 验证通过: {stats['valid']} ({stats['valid']/stats['total']*100:.1f}%)")
if stats['repaired_locally'] > 0:
logger.info(
f" ⚠ 本地修复: {stats['repaired_locally']} "
f"({stats['repaired_locally']/stats['total']*100:.1f}%)"
)
if stats['repaired_api'] > 0:
logger.info(
f" ⚠ API修复: {stats['repaired_api']} "
f"({stats['repaired_api']/stats['total']*100:.1f}%)"
)
if stats['failed'] > 0:
logger.warning(
f" ✗ 修复失败: {stats['failed']} "
f"({stats['failed']/stats['total']*100:.1f}%) - "
f"这些图表将使用降级渲染或显示fallback表格"
)
logger.info("=" * 60)
# ====== 前置信息防护 ======
def _kpi_signature_from_items(self, items: Any) -> tuple | None:
@@ -2317,6 +2443,80 @@ function buildChartOptions(payload) {
return mergeOptions(baseOptions, overrideOptions);
}
function validateChartData(payload, type) {
/**
* 前端验证图表数据
* 返回: { valid: boolean, errors: string[] }
*/
const errors = [];
if (!payload || typeof payload !== 'object') {
errors.push('无效的payload');
return { valid: false, errors };
}
const data = payload.data;
if (!data || typeof data !== 'object') {
errors.push('缺少data字段');
return { valid: false, errors };
}
// 特殊图表类型scatter, bubble
const specialTypes = { 'scatter': true, 'bubble': true };
if (specialTypes[type]) {
// 这些类型需要特殊的数据格式 {x, y} 或 {x, y, r}
// 跳过标准验证
return { valid: true, errors };
}
// 标准图表类型验证
const datasets = data.datasets;
if (!Array.isArray(datasets)) {
errors.push('datasets必须是数组');
return { valid: false, errors };
}
if (datasets.length === 0) {
errors.push('datasets数组为空');
return { valid: false, errors };
}
// 验证每个dataset
for (let i = 0; i < datasets.length; i++) {
const dataset = datasets[i];
if (!dataset || typeof dataset !== 'object') {
errors.push(`datasets[${i}]不是对象`);
continue;
}
if (!Array.isArray(dataset.data)) {
errors.push(`datasets[${i}].data不是数组`);
} else if (dataset.data.length === 0) {
errors.push(`datasets[${i}].data为空`);
}
}
// 需要labels的图表类型
const labelRequiredTypes = {
'line': true, 'bar': true, 'radar': true,
'polarArea': true, 'pie': true, 'doughnut': true
};
if (labelRequiredTypes[type]) {
const labels = data.labels;
if (!Array.isArray(labels)) {
errors.push('缺少labels数组');
} else if (labels.length === 0) {
errors.push('labels数组为空');
}
}
return {
valid: errors.length === 0,
errors
};
}
function instantiateChart(ctx, payload, optionsTemplate, type) {
if (!ctx) {
return null;
@@ -2358,9 +2558,17 @@ function hydrateCharts() {
renderChartFallback(canvas, payload, 'Canvas 初始化失败');
return;
}
// 前端数据验证
const desiredType = chartTypes[0];
const validation = validateChartData(payload, desiredType);
if (!validation.valid) {
console.warn('图表数据验证失败:', validation.errors);
// 验证失败但仍然尝试渲染,因为可能会降级成功
}
const card = canvas.closest('.chart-card') || canvas.parentElement;
const optionsTemplate = buildChartOptions(payload);
const desiredType = chartTypes[0];
let chartInstance = null;
let selectedType = null;
let lastError;

View File

@@ -0,0 +1,283 @@
"""
图表API修复模块。
提供调用4个EngineReportEngine, ForumEngine, InsightEngine, MediaEngine的LLM API
来修复图表数据的功能。
"""
from __future__ import annotations
import json
from typing import Any, Dict, List, Optional
from loguru import logger
from ReportEngine.utils.config import settings
# 图表修复提示词
CHART_REPAIR_SYSTEM_PROMPT = """你是一个专业的图表数据修复助手。你的任务是修复Chart.js图表数据中的格式错误确保图表能够正常渲染。
**Chart.js标准数据格式**
1. 标准图表line, bar, pie, doughnut, radar, polarArea
```json
{
"type": "widget",
"widgetType": "chart.js/bar",
"widgetId": "chart-001",
"props": {
"type": "bar",
"title": "图表标题",
"options": {
"responsive": true,
"plugins": {
"legend": {
"display": true
}
}
}
},
"data": {
"labels": ["A", "B", "C"],
"datasets": [
{
"label": "系列1",
"data": [10, 20, 30]
}
]
}
}
```
2. 特殊图表scatter, bubble
```json
{
"data": {
"datasets": [
{
"label": "系列1",
"data": [
{"x": 10, "y": 20},
{"x": 15, "y": 25}
]
}
]
}
}
```
**修复原则:**
1. **宁愿不改,也不要改错** - 如果不确定如何修复,保持原始数据
2. **最小改动** - 只修复明确的错误,不要过度修改
3. **保持数据完整性** - 不要丢失原始数据
4. **验证修复结果** - 确保修复后符合Chart.js格式
**常见错误及修复方法:**
1. 缺少labels字段 → 根据数据生成默认labels
2. datasets不是数组 → 转换为数组格式
3. 数据长度不匹配 → 截断或补null
4. 非数值数据 → 尝试转换或设为null
5. 缺少必需字段 → 添加默认值
请根据错误信息修复图表数据并返回修复后的完整widget blockJSON格式
"""
def build_chart_repair_prompt(
widget_block: Dict[str, Any],
validation_errors: List[str]
) -> str:
"""
构建图表修复提示词。
Args:
widget_block: 原始widget block
validation_errors: 验证错误列表
Returns:
str: 提示词
"""
block_json = json.dumps(widget_block, ensure_ascii=False, indent=2)
errors_text = "\n".join(f"- {error}" for error in validation_errors)
prompt = f"""请修复以下图表数据中的错误:
**原始数据:**
```json
{block_json}
```
**检测到的错误:**
{errors_text}
**要求:**
1. 返回修复后的完整widget blockJSON格式
2. 只修复明确的错误,保持其他数据不变
3. 确保修复后的数据符合Chart.js格式要求
4. 如果无法确定如何修复,保持原始数据
**重要的输出格式要求:**
1. 只返回纯JSON对象不要添加任何说明文字
2. 不要使用```json```标记包裹
3. 确保JSON语法完全正确
4. 所有字符串使用双引号
"""
return prompt
def create_llm_repair_functions() -> List:
"""
创建LLM修复函数列表。
返回4个Engine的修复函数
1. ReportEngine
2. ForumEngine (通过ForumHost)
3. InsightEngine
4. MediaEngine
Returns:
List[Callable]: 修复函数列表
"""
repair_functions = []
# 1. ReportEngine修复函数
if settings.REPORT_ENGINE_API_KEY and settings.REPORT_ENGINE_BASE_URL:
def repair_with_report_engine(widget_block: Dict[str, Any], errors: List[str]) -> Optional[Dict[str, Any]]:
"""使用ReportEngine的LLM修复图表"""
try:
from llm_client import LLMClient
client = LLMClient(
api_key=settings.REPORT_ENGINE_API_KEY,
base_url=settings.REPORT_ENGINE_BASE_URL,
model_name=settings.REPORT_ENGINE_MODEL_NAME or "gpt-4",
provider="openai"
)
prompt = build_chart_repair_prompt(widget_block, errors)
response = client.invoke(
CHART_REPAIR_SYSTEM_PROMPT,
prompt,
temperature=0.0,
top_p=0.05
)
if not response:
return None
# 解析响应
repaired = json.loads(response)
return repaired
except Exception as e:
logger.error(f"ReportEngine图表修复失败: {e}")
return None
repair_functions.append(repair_with_report_engine)
# 2. ForumEngine修复函数
if settings.FORUM_HOST_API_KEY and settings.FORUM_HOST_BASE_URL:
def repair_with_forum_engine(widget_block: Dict[str, Any], errors: List[str]) -> Optional[Dict[str, Any]]:
"""使用ForumEngine的LLM修复图表"""
try:
from llm_client import LLMClient
client = LLMClient(
api_key=settings.FORUM_HOST_API_KEY,
base_url=settings.FORUM_HOST_BASE_URL,
model_name=settings.FORUM_HOST_MODEL_NAME or "gpt-4",
provider="openai"
)
prompt = build_chart_repair_prompt(widget_block, errors)
response = client.invoke(
CHART_REPAIR_SYSTEM_PROMPT,
prompt,
temperature=0.0,
top_p=0.05
)
if not response:
return None
repaired = json.loads(response)
return repaired
except Exception as e:
logger.error(f"ForumEngine图表修复失败: {e}")
return None
repair_functions.append(repair_with_forum_engine)
# 3. InsightEngine修复函数
if settings.INSIGHT_ENGINE_API_KEY and settings.INSIGHT_ENGINE_BASE_URL:
def repair_with_insight_engine(widget_block: Dict[str, Any], errors: List[str]) -> Optional[Dict[str, Any]]:
"""使用InsightEngine的LLM修复图表"""
try:
from llm_client import LLMClient
client = LLMClient(
api_key=settings.INSIGHT_ENGINE_API_KEY,
base_url=settings.INSIGHT_ENGINE_BASE_URL,
model_name=settings.INSIGHT_ENGINE_MODEL_NAME or "gpt-4",
provider="openai"
)
prompt = build_chart_repair_prompt(widget_block, errors)
response = client.invoke(
CHART_REPAIR_SYSTEM_PROMPT,
prompt,
temperature=0.0,
top_p=0.05
)
if not response:
return None
repaired = json.loads(response)
return repaired
except Exception as e:
logger.error(f"InsightEngine图表修复失败: {e}")
return None
repair_functions.append(repair_with_insight_engine)
# 4. MediaEngine修复函数
if settings.MEDIA_ENGINE_API_KEY and settings.MEDIA_ENGINE_BASE_URL:
def repair_with_media_engine(widget_block: Dict[str, Any], errors: List[str]) -> Optional[Dict[str, Any]]:
"""使用MediaEngine的LLM修复图表"""
try:
from llm_client import LLMClient
client = LLMClient(
api_key=settings.MEDIA_ENGINE_API_KEY,
base_url=settings.MEDIA_ENGINE_BASE_URL,
model_name=settings.MEDIA_ENGINE_MODEL_NAME or "gpt-4",
provider="openai"
)
prompt = build_chart_repair_prompt(widget_block, errors)
response = client.invoke(
CHART_REPAIR_SYSTEM_PROMPT,
prompt,
temperature=0.0,
top_p=0.05
)
if not response:
return None
repaired = json.loads(response)
return repaired
except Exception as e:
logger.error(f"MediaEngine图表修复失败: {e}")
return None
repair_functions.append(repair_with_media_engine)
if not repair_functions:
logger.warning("未配置任何Engine API图表API修复功能将不可用")
return repair_functions

View File

@@ -0,0 +1,631 @@
"""
图表验证和修复工具。
提供对Chart.js图表数据的验证和修复能力
1. 验证图表数据格式是否符合Chart.js要求
2. 本地规则修复常见问题
3. LLM API辅助修复复杂问题
4. 遵循"宁愿不改,也不要改错"的原则
支持的图表类型:
- line (折线图)
- bar (柱状图)
- pie (饼图)
- doughnut (圆环图)
- radar (雷达图)
- polarArea (极地区域图)
- scatter (散点图)
"""
from __future__ import annotations
import copy
import json
from typing import Any, Dict, List, Optional, Tuple, Callable
from dataclasses import dataclass
from loguru import logger
@dataclass
class ValidationResult:
"""验证结果"""
is_valid: bool
errors: List[str]
warnings: List[str]
def has_critical_errors(self) -> bool:
"""是否有严重错误(会导致渲染失败)"""
return not self.is_valid and len(self.errors) > 0
@dataclass
class RepairResult:
"""修复结果"""
success: bool
repaired_block: Optional[Dict[str, Any]]
method: str # 'none', 'local', 'api'
changes: List[str]
def has_changes(self) -> bool:
"""是否有修改"""
return len(self.changes) > 0
class ChartValidator:
"""
图表验证器 - 验证Chart.js图表数据格式是否正确。
验证规则:
1. 基本结构验证widgetType, props, data字段
2. 图表类型验证:支持的图表类型
3. 数据格式验证labels和datasets结构
4. 数据一致性验证labels和datasets长度匹配
5. 数值类型验证:数据值类型正确
"""
# 支持的图表类型
SUPPORTED_CHART_TYPES = {
'line', 'bar', 'pie', 'doughnut', 'radar', 'polarArea', 'scatter',
'bubble', 'horizontalBar'
}
# 需要labels的图表类型
LABEL_REQUIRED_TYPES = {
'line', 'bar', 'radar', 'polarArea', 'pie', 'doughnut'
}
# 需要数值数据的图表类型
NUMERIC_DATA_TYPES = {
'line', 'bar', 'radar', 'polarArea', 'pie', 'doughnut'
}
# 需要特殊数据格式的图表类型
SPECIAL_DATA_TYPES = {
'scatter': {'x', 'y'},
'bubble': {'x', 'y', 'r'}
}
def __init__(self):
pass
def validate(self, widget_block: Dict[str, Any]) -> ValidationResult:
"""
验证图表格式。
Args:
widget_block: widget类型的block包含widgetId/widgetType/props/data
Returns:
ValidationResult: 验证结果
"""
errors = []
warnings = []
# 1. 基本结构验证
if not isinstance(widget_block, dict):
errors.append("widget_block必须是字典类型")
return ValidationResult(False, errors, warnings)
# 2. 检查widgetType
widget_type = widget_block.get('widgetType', '')
if not widget_type or not isinstance(widget_type, str):
errors.append("缺少widgetType字段或类型不正确")
return ValidationResult(False, errors, warnings)
# 检查是否是chart.js类型
if not widget_type.startswith('chart.js'):
# 不是图表类型,跳过验证
return ValidationResult(True, errors, warnings)
# 3. 提取图表类型
chart_type = self._extract_chart_type(widget_block)
if not chart_type:
errors.append("无法确定图表类型")
return ValidationResult(False, errors, warnings)
# 4. 检查是否支持该图表类型
if chart_type not in self.SUPPORTED_CHART_TYPES:
warnings.append(f"图表类型 '{chart_type}' 可能不被支持,将尝试降级渲染")
# 5. 验证数据结构
data = widget_block.get('data')
if not isinstance(data, dict):
errors.append("data字段必须是字典类型")
return ValidationResult(False, errors, warnings)
# 6. 根据图表类型验证数据
if chart_type in self.SPECIAL_DATA_TYPES:
# 特殊数据格式scatter, bubble
self._validate_special_data(data, chart_type, errors, warnings)
else:
# 标准数据格式labels + datasets
self._validate_standard_data(data, chart_type, errors, warnings)
# 7. 验证props
props = widget_block.get('props')
if props is not None and not isinstance(props, dict):
warnings.append("props字段应该是字典类型")
is_valid = len(errors) == 0
return ValidationResult(is_valid, errors, warnings)
def _extract_chart_type(self, widget_block: Dict[str, Any]) -> Optional[str]:
"""
提取图表类型。
优先级:
1. props.type
2. widgetType中的类型chart.js/bar -> bar
3. data.type
"""
# 1. 从props中获取
props = widget_block.get('props') or {}
if isinstance(props, dict):
chart_type = props.get('type')
if chart_type and isinstance(chart_type, str):
return chart_type.lower()
# 2. 从widgetType中提取
widget_type = widget_block.get('widgetType', '')
if '/' in widget_type:
chart_type = widget_type.split('/')[-1]
if chart_type:
return chart_type.lower()
# 3. 从data中获取
data = widget_block.get('data') or {}
if isinstance(data, dict):
chart_type = data.get('type')
if chart_type and isinstance(chart_type, str):
return chart_type.lower()
return None
def _validate_standard_data(
self,
data: Dict[str, Any],
chart_type: str,
errors: List[str],
warnings: List[str]
):
"""验证标准数据格式labels + datasets"""
labels = data.get('labels')
datasets = data.get('datasets')
# 验证labels
if chart_type in self.LABEL_REQUIRED_TYPES:
if not labels:
errors.append(f"{chart_type}类型图表必须包含labels字段")
elif not isinstance(labels, list):
errors.append("labels必须是数组类型")
elif len(labels) == 0:
warnings.append("labels数组为空图表可能无法正常显示")
# 验证datasets
if datasets is None:
errors.append("缺少datasets字段")
return
if not isinstance(datasets, list):
errors.append("datasets必须是数组类型")
return
if len(datasets) == 0:
errors.append("datasets数组为空")
return
# 验证每个dataset
for idx, dataset in enumerate(datasets):
if not isinstance(dataset, dict):
errors.append(f"datasets[{idx}]必须是对象类型")
continue
# 验证data字段
ds_data = dataset.get('data')
if ds_data is None:
errors.append(f"datasets[{idx}]缺少data字段")
continue
if not isinstance(ds_data, list):
errors.append(f"datasets[{idx}].data必须是数组类型")
continue
if len(ds_data) == 0:
warnings.append(f"datasets[{idx}].data数组为空")
continue
# 验证数据长度一致性
if labels and isinstance(labels, list):
if len(ds_data) != len(labels):
warnings.append(
f"datasets[{idx}].data长度({len(ds_data)})与labels长度({len(labels)})不匹配"
)
# 验证数值类型
if chart_type in self.NUMERIC_DATA_TYPES:
for data_idx, value in enumerate(ds_data):
if value is not None and not isinstance(value, (int, float)):
errors.append(
f"datasets[{idx}].data[{data_idx}]的值'{value}'不是有效的数值类型"
)
break # 只报告第一个错误
def _validate_special_data(
self,
data: Dict[str, Any],
chart_type: str,
errors: List[str],
warnings: List[str]
):
"""验证特殊数据格式scatter, bubble"""
datasets = data.get('datasets')
if not datasets:
errors.append("缺少datasets字段")
return
if not isinstance(datasets, list):
errors.append("datasets必须是数组类型")
return
if len(datasets) == 0:
errors.append("datasets数组为空")
return
required_keys = self.SPECIAL_DATA_TYPES.get(chart_type, set())
# 验证每个dataset
for idx, dataset in enumerate(datasets):
if not isinstance(dataset, dict):
errors.append(f"datasets[{idx}]必须是对象类型")
continue
ds_data = dataset.get('data')
if ds_data is None:
errors.append(f"datasets[{idx}]缺少data字段")
continue
if not isinstance(ds_data, list):
errors.append(f"datasets[{idx}].data必须是数组类型")
continue
if len(ds_data) == 0:
warnings.append(f"datasets[{idx}].data数组为空")
continue
# 验证数据点格式
for data_idx, point in enumerate(ds_data):
if not isinstance(point, dict):
errors.append(
f"datasets[{idx}].data[{data_idx}]必须是对象类型(包含{required_keys}字段)"
)
break
# 检查必需的键
missing_keys = required_keys - set(point.keys())
if missing_keys:
errors.append(
f"datasets[{idx}].data[{data_idx}]缺少必需字段: {missing_keys}"
)
break
# 验证数值类型
for key in required_keys:
value = point.get(key)
if value is not None and not isinstance(value, (int, float)):
errors.append(
f"datasets[{idx}].data[{data_idx}].{key}的值'{value}'不是有效的数值类型"
)
break
def can_render(self, widget_block: Dict[str, Any]) -> bool:
"""
判断图表是否能正常渲染(快速检查)。
Args:
widget_block: widget类型的block
Returns:
bool: 是否能正常渲染
"""
result = self.validate(widget_block)
return result.is_valid
class ChartRepairer:
"""
图表修复器 - 尝试修复图表数据。
修复策略:
1. 本地规则修复:修复常见问题
2. API修复使用LLM修复复杂问题
3. 验证修复结果:确保修复后能正常渲染
"""
def __init__(
self,
validator: ChartValidator,
llm_repair_fns: Optional[List[Callable]] = None
):
"""
初始化修复器。
Args:
validator: 图表验证器实例
llm_repair_fns: LLM修复函数列表对应4个Engine
"""
self.validator = validator
self.llm_repair_fns = llm_repair_fns or []
def repair(
self,
widget_block: Dict[str, Any],
validation_result: Optional[ValidationResult] = None
) -> RepairResult:
"""
尝试修复图表数据。
Args:
widget_block: widget类型的block
validation_result: 验证结果(可选,如果没有会先进行验证)
Returns:
RepairResult: 修复结果
"""
# 1. 如果没有验证结果,先验证
if validation_result is None:
validation_result = self.validator.validate(widget_block)
# 2. 尝试本地修复(即使验证通过也尝试,因为可能有警告)
logger.info(f"尝试本地修复图表")
local_result = self.repair_locally(widget_block, validation_result)
# 3. 验证修复结果
if local_result.has_changes():
repaired_validation = self.validator.validate(local_result.repaired_block)
if repaired_validation.is_valid:
logger.info(f"本地修复成功: {local_result.changes}")
return RepairResult(True, local_result.repaired_block, 'local', local_result.changes)
else:
logger.warning(f"本地修复后仍然无效: {repaired_validation.errors}")
# 4. 如果本地修复失败且有严重错误尝试API修复
if validation_result.has_critical_errors() and len(self.llm_repair_fns) > 0:
logger.info("本地修复失败尝试API修复")
api_result = self.repair_with_api(widget_block, validation_result)
if api_result.success:
# 验证修复结果
repaired_validation = self.validator.validate(api_result.repaired_block)
if repaired_validation.is_valid:
logger.info(f"API修复成功: {api_result.changes}")
return api_result
else:
logger.warning(f"API修复后仍然无效: {repaired_validation.errors}")
# 5. 如果验证通过,返回原始或修复后的数据
if validation_result.is_valid:
if local_result.has_changes():
return RepairResult(True, local_result.repaired_block, 'local', local_result.changes)
else:
return RepairResult(True, widget_block, 'none', [])
# 6. 所有修复都失败,返回原始数据
logger.warning("所有修复尝试失败,保持原始数据")
return RepairResult(False, widget_block, 'none', [])
def repair_locally(
self,
widget_block: Dict[str, Any],
validation_result: ValidationResult
) -> RepairResult:
"""
使用本地规则修复。
修复规则:
1. 补全缺失的基本字段
2. 修复数据类型错误
3. 修复数据长度不匹配
4. 清理无效数据
5. 添加默认值
"""
repaired = copy.deepcopy(widget_block)
changes = []
# 1. 确保基本结构存在
if 'props' not in repaired or not isinstance(repaired.get('props'), dict):
repaired['props'] = {}
changes.append("添加缺失的props字段")
if 'data' not in repaired or not isinstance(repaired.get('data'), dict):
repaired['data'] = {}
changes.append("添加缺失的data字段")
# 2. 确保图表类型存在
chart_type = self.validator._extract_chart_type(repaired)
props = repaired['props']
if not chart_type:
# 尝试从widgetType推断
widget_type = repaired.get('widgetType', '')
if '/' in widget_type:
chart_type = widget_type.split('/')[-1].lower()
props['type'] = chart_type
changes.append(f"从widgetType推断图表类型: {chart_type}")
else:
# 默认使用bar类型
chart_type = 'bar'
props['type'] = chart_type
changes.append("设置默认图表类型: bar")
elif 'type' not in props or not props['type']:
# chart_type存在但props中没有type字段需要添加
props['type'] = chart_type
changes.append(f"将推断的图表类型添加到props: {chart_type}")
# 3. 修复数据结构
data = repaired['data']
# 确保datasets存在
if 'datasets' not in data or not isinstance(data.get('datasets'), list):
data['datasets'] = []
changes.append("添加缺失的datasets字段")
# 如果datasets为空但data中有其他数据尝试构造datasets
if len(data['datasets']) == 0:
constructed = self._try_construct_datasets(data, chart_type)
if constructed:
data['datasets'] = constructed
changes.append("从data中构造datasets")
elif 'labels' in data and isinstance(data.get('labels'), list) and len(data['labels']) > 0:
# 如果有labels但没有数据创建一个空dataset
data['datasets'] = [{
'label': '数据',
'data': [0] * len(data['labels'])
}]
changes.append("根据labels创建默认dataset使用零值")
# 确保labels存在如果需要
if chart_type in ChartValidator.LABEL_REQUIRED_TYPES:
if 'labels' not in data or not isinstance(data.get('labels'), list):
# 尝试根据datasets长度生成labels
if data['datasets'] and len(data['datasets']) > 0:
first_ds = data['datasets'][0]
if isinstance(first_ds, dict) and isinstance(first_ds.get('data'), list):
data_len = len(first_ds['data'])
data['labels'] = [f"项目 {i+1}" for i in range(data_len)]
changes.append(f"生成{data_len}个默认labels")
# 4. 修复datasets中的数据
for idx, dataset in enumerate(data.get('datasets', [])):
if not isinstance(dataset, dict):
continue
# 确保有data字段
if 'data' not in dataset or not isinstance(dataset.get('data'), list):
dataset['data'] = []
changes.append(f"为datasets[{idx}]添加空data数组")
# 确保有label
if 'label' not in dataset:
dataset['label'] = f"系列 {idx + 1}"
changes.append(f"为datasets[{idx}]添加默认label")
# 修复数据长度不匹配
labels = data.get('labels', [])
ds_data = dataset.get('data', [])
if isinstance(labels, list) and isinstance(ds_data, list):
if len(ds_data) < len(labels):
# 数据不够补null
dataset['data'] = ds_data + [None] * (len(labels) - len(ds_data))
changes.append(f"datasets[{idx}]数据长度不足补充null")
elif len(ds_data) > len(labels):
# 数据过多,截断
dataset['data'] = ds_data[:len(labels)]
changes.append(f"datasets[{idx}]数据长度过长,截断")
# 转换非数值数据为数值(如果可能)
if chart_type in ChartValidator.NUMERIC_DATA_TYPES:
ds_data = dataset.get('data', [])
converted = False
for i, value in enumerate(ds_data):
if value is None:
continue
if not isinstance(value, (int, float)):
# 尝试转换
try:
if isinstance(value, str):
# 尝试转换字符串
ds_data[i] = float(value)
converted = True
except (ValueError, TypeError):
# 转换失败设为null
ds_data[i] = None
converted = True
if converted:
changes.append(f"datasets[{idx}]包含非数值数据,已尝试转换")
# 5. 验证修复结果
success = len(changes) > 0
return RepairResult(success, repaired, 'local', changes)
def _try_construct_datasets(
self,
data: Dict[str, Any],
chart_type: str
) -> Optional[List[Dict[str, Any]]]:
"""尝试从data中构造datasets"""
# 如果data直接包含数据数组尝试构造
if 'values' in data and isinstance(data['values'], list):
return [{
'label': '数据',
'data': data['values']
}]
# 如果data包含series字段
if 'series' in data and isinstance(data['series'], list):
datasets = []
for idx, series in enumerate(data['series']):
if isinstance(series, dict):
datasets.append({
'label': series.get('name', f'系列 {idx + 1}'),
'data': series.get('data', [])
})
elif isinstance(series, list):
datasets.append({
'label': f'系列 {idx + 1}',
'data': series
})
if datasets:
return datasets
return None
def repair_with_api(
self,
widget_block: Dict[str, Any],
validation_result: ValidationResult
) -> RepairResult:
"""
使用API修复调用4个Engine的LLM
策略按顺序尝试不同的Engine直到修复成功
"""
if not self.llm_repair_fns:
return RepairResult(False, None, 'api', [])
for idx, repair_fn in enumerate(self.llm_repair_fns):
try:
logger.info(f"尝试使用Engine {idx + 1}修复图表")
repaired = repair_fn(widget_block, validation_result.errors)
if repaired and isinstance(repaired, dict):
# 验证修复结果
repaired_validation = self.validator.validate(repaired)
if repaired_validation.is_valid:
return RepairResult(
True,
repaired,
'api',
[f"使用Engine {idx + 1}修复成功"]
)
except Exception as e:
logger.error(f"Engine {idx + 1}修复失败: {e}")
continue
return RepairResult(False, None, 'api', [])
def create_chart_validator() -> ChartValidator:
"""创建图表验证器实例"""
return ChartValidator()
def create_chart_repairer(
validator: Optional[ChartValidator] = None,
llm_repair_fns: Optional[List[Callable]] = None
) -> ChartRepairer:
"""创建图表修复器实例"""
if validator is None:
validator = create_chart_validator()
return ChartRepairer(validator, llm_repair_fns)

View File

@@ -0,0 +1,456 @@
"""
图表验证器和修复器的测试用例。
运行测试:
python -m pytest ReportEngine/utils/test_chart_validator.py -v
"""
import pytest
from ReportEngine.utils.chart_validator import (
ChartValidator,
ChartRepairer,
ValidationResult,
RepairResult,
create_chart_validator,
create_chart_repairer
)
class TestChartValidator:
"""测试ChartValidator类"""
def setup_method(self):
"""每个测试前初始化"""
self.validator = create_chart_validator()
def test_valid_bar_chart(self):
"""测试有效的柱状图"""
widget_block = {
"type": "widget",
"widgetType": "chart.js/bar",
"widgetId": "chart-001",
"props": {
"type": "bar",
"title": "销售数据"
},
"data": {
"labels": ["一月", "二月", "三月"],
"datasets": [
{
"label": "销售额",
"data": [100, 200, 150]
}
]
}
}
result = self.validator.validate(widget_block)
assert result.is_valid
assert len(result.errors) == 0
def test_valid_line_chart(self):
"""测试有效的折线图"""
widget_block = {
"type": "widget",
"widgetType": "chart.js/line",
"widgetId": "chart-002",
"props": {
"type": "line"
},
"data": {
"labels": ["周一", "周二", "周三"],
"datasets": [
{
"label": "访问量",
"data": [50, 75, 60]
}
]
}
}
result = self.validator.validate(widget_block)
assert result.is_valid
def test_valid_pie_chart(self):
"""测试有效的饼图"""
widget_block = {
"widgetType": "chart.js/pie",
"props": {"type": "pie"},
"data": {
"labels": ["A", "B", "C"],
"datasets": [
{
"data": [30, 40, 30]
}
]
}
}
result = self.validator.validate(widget_block)
assert result.is_valid
def test_missing_widgetType(self):
"""测试缺少widgetType"""
widget_block = {
"props": {},
"data": {}
}
result = self.validator.validate(widget_block)
assert not result.is_valid
assert "widgetType" in result.errors[0]
def test_missing_data_field(self):
"""测试缺少data字段"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"}
}
result = self.validator.validate(widget_block)
assert not result.is_valid
assert "data" in result.errors[0]
def test_missing_datasets(self):
"""测试缺少datasets"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"labels": ["A", "B"]
}
}
result = self.validator.validate(widget_block)
assert not result.is_valid
assert "datasets" in result.errors[0]
def test_empty_datasets(self):
"""测试空datasets"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"labels": ["A", "B"],
"datasets": []
}
}
result = self.validator.validate(widget_block)
assert not result.is_valid
assert "" in result.errors[0]
def test_missing_labels_for_bar_chart(self):
"""测试柱状图缺少labels"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"datasets": [
{
"label": "系列1",
"data": [10, 20, 30]
}
]
}
}
result = self.validator.validate(widget_block)
assert not result.is_valid
assert "labels" in result.errors[0]
def test_invalid_data_type(self):
"""测试数据类型错误"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"labels": ["A", "B"],
"datasets": [
{
"label": "系列1",
"data": ["abc", "def"] # 应该是数值
}
]
}
}
result = self.validator.validate(widget_block)
assert not result.is_valid
assert "数值类型" in result.errors[0]
def test_data_length_mismatch_warning(self):
"""测试数据长度不匹配(警告)"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"labels": ["A", "B", "C"],
"datasets": [
{
"label": "系列1",
"data": [10, 20] # 长度不匹配
}
]
}
}
result = self.validator.validate(widget_block)
# 长度不匹配是警告,不是错误
assert len(result.warnings) > 0
assert "不匹配" in result.warnings[0]
def test_scatter_chart(self):
"""测试散点图(特殊数据格式)"""
widget_block = {
"widgetType": "chart.js/scatter",
"props": {"type": "scatter"},
"data": {
"datasets": [
{
"label": "数据点",
"data": [
{"x": 10, "y": 20},
{"x": 15, "y": 25}
]
}
]
}
}
result = self.validator.validate(widget_block)
assert result.is_valid
def test_non_chart_widget(self):
"""测试非图表类型的widget应该跳过验证"""
widget_block = {
"widgetType": "custom/widget",
"props": {},
"data": {}
}
result = self.validator.validate(widget_block)
# 非chart.js类型跳过验证返回valid
assert result.is_valid
class TestChartRepairer:
"""测试ChartRepairer类"""
def setup_method(self):
"""每个测试前初始化"""
self.validator = create_chart_validator()
self.repairer = create_chart_repairer(validator=self.validator)
def test_repair_missing_props(self):
"""测试修复缺少props字段"""
widget_block = {
"widgetType": "chart.js/bar",
"data": {
"labels": ["A", "B"],
"datasets": [
{
"label": "系列1",
"data": [10, 20]
}
]
}
}
result = self.repairer.repair(widget_block)
assert result.success
assert "props" in result.repaired_block
assert result.method == "local"
def test_repair_missing_chart_type(self):
"""测试修复缺少图表类型"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {},
"data": {
"labels": ["A", "B"],
"datasets": [
{
"label": "系列1",
"data": [10, 20]
}
]
}
}
result = self.repairer.repair(widget_block)
assert result.success
assert result.repaired_block["props"]["type"] == "bar"
assert "图表类型" in str(result.changes)
def test_repair_missing_datasets(self):
"""测试修复缺少datasets"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"labels": ["A", "B"]
}
}
result = self.repairer.repair(widget_block)
assert result.success
assert "datasets" in result.repaired_block["data"]
assert isinstance(result.repaired_block["data"]["datasets"], list)
def test_repair_missing_labels(self):
"""测试修复缺少labels"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"datasets": [
{
"label": "系列1",
"data": [10, 20, 30]
}
]
}
}
result = self.repairer.repair(widget_block)
assert result.success
assert "labels" in result.repaired_block["data"]
assert len(result.repaired_block["data"]["labels"]) == 3
def test_repair_data_length_mismatch(self):
"""测试修复数据长度不匹配"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"labels": ["A", "B", "C", "D"],
"datasets": [
{
"label": "系列1",
"data": [10, 20] # 长度不足
}
]
}
}
result = self.repairer.repair(widget_block)
assert result.success
# 应该补充到4个元素
assert len(result.repaired_block["data"]["datasets"][0]["data"]) == 4
def test_repair_string_to_number(self):
"""测试修复字符串类型的数值"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"labels": ["A", "B"],
"datasets": [
{
"label": "系列1",
"data": ["10", "20"] # 字符串数值
}
]
}
}
result = self.repairer.repair(widget_block)
assert result.success
# 应该转换为数值
assert isinstance(result.repaired_block["data"]["datasets"][0]["data"][0], float)
def test_repair_construct_datasets_from_values(self):
"""测试从values字段构造datasets"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"labels": ["A", "B"],
"values": [10, 20] # 使用values而不是datasets
}
}
result = self.repairer.repair(widget_block)
assert result.success
assert "datasets" in result.repaired_block["data"]
assert len(result.repaired_block["data"]["datasets"]) > 0
def test_no_repair_needed(self):
"""测试不需要修复的情况"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"labels": ["A", "B"],
"datasets": [
{
"label": "系列1",
"data": [10, 20]
}
]
}
}
result = self.repairer.repair(widget_block)
assert result.success
assert result.method == "none"
assert len(result.changes) == 0
def test_repair_adds_default_label(self):
"""测试修复添加默认label"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"labels": ["A", "B"],
"datasets": [
{
# 缺少label
"data": [10, 20]
}
]
}
}
result = self.repairer.repair(widget_block)
assert result.success
assert "label" in result.repaired_block["data"]["datasets"][0]
class TestValidatorIntegration:
"""集成测试"""
def test_full_validation_and_repair_workflow(self):
"""测试完整的验证和修复流程"""
validator = create_chart_validator()
repairer = create_chart_repairer(validator=validator)
# 一个有多个问题的图表
widget_block = {
"widgetType": "chart.js/bar",
"data": {
"datasets": [
{
"data": ["10", "20", "30"] # 字符串数值
}
]
}
}
# 1. 验证(应该失败)
validation = validator.validate(widget_block)
assert not validation.is_valid
# 2. 修复
repair_result = repairer.repair(widget_block, validation)
assert repair_result.success
# 3. 再次验证(应该通过)
final_validation = validator.validate(repair_result.repaired_block)
assert final_validation.is_valid
if __name__ == "__main__":
# 运行测试
pytest.main([__file__, "-v", "--tb=short"])