Improved Support for Word Clouds

This commit is contained in:
马一丁
2025-11-27 03:10:30 +08:00
parent e362ef6bdc
commit 2a5d984aeb
3 changed files with 354 additions and 142 deletions

View File

@@ -167,6 +167,11 @@ class ChartToSVGConverter:
return None
# 根据图表类型调用相应的渲染方法
if 'wordcloud' in str(chart_type).lower():
# 词云由专用渲染逻辑处理这里跳过SVG转换以避免告警
logger.debug("检测到词云图表跳过chart_to_svg转换")
return None
render_method = getattr(self, f'_render_{chart_type}', None)
if not render_method:
logger.warning(f"不支持的图表类型: {chart_type}")

View File

@@ -1628,57 +1628,61 @@ class HTMLRenderer:
if is_chart:
self.chart_validation_stats['total'] += 1
# 如果此前已记录失败,直接使用占位提示,避免重复修复
has_failed, cached_reason = self._has_chart_failure(block)
if has_failed:
self._record_chart_failure_stat(cache_key)
reason = cached_reason or "LLM返回的图表信息格式有误无法正常显示"
return self._render_chart_error_placeholder(display_title, reason, widget_id)
# 验证图表数据
validation_result = self.chart_validator.validate(block)
if not validation_result.is_valid:
logger.warning(
f"图表 {block.get('widgetId', 'unknown')} 验证失败: {validation_result.errors}"
)
# 尝试修复
repair_result = self.chart_repairer.repair(block, validation_result)
if repair_result.success and repair_result.repaired_block:
# 修复成功,使用修复后的数据
block = repair_result.repaired_block
logger.info(
f"图表 {block.get('widgetId', 'unknown')} 修复成功 "
f"(方法: {repair_result.method}): {repair_result.changes}"
)
# 更新统计
if repair_result.method == 'local':
self.chart_validation_stats['repaired_locally'] += 1
elif repair_result.method == 'api':
self.chart_validation_stats['repaired_api'] += 1
else:
# 修复失败,记录失败并输出占位提示
fail_reason = self._format_chart_error_reason(validation_result)
block["_chart_renderable"] = False
block["_chart_error_reason"] = fail_reason
self._note_chart_failure(cache_key, fail_reason)
self._record_chart_failure_stat(cache_key)
logger.warning(
f"图表 {block.get('widgetId', 'unknown')} 修复失败,已跳过渲染: {fail_reason}"
)
return self._render_chart_error_placeholder(display_title, fail_reason, widget_id)
else:
# 验证通过
# 词云使用专用渲染逻辑不按Chart.js规则验证直接跳过防止误判
if is_wordcloud:
self.chart_validation_stats['valid'] += 1
if validation_result.warnings:
logger.info(
f"图表 {block.get('widgetId', 'unknown')} 验证通过,"
f"但有警告: {validation_result.warnings}"
else:
# 如果此前已记录失败,直接使用占位提示,避免重复修复
has_failed, cached_reason = self._has_chart_failure(block)
if has_failed:
self._record_chart_failure_stat(cache_key)
reason = cached_reason or "LLM返回的图表信息格式有误无法正常显示"
return self._render_chart_error_placeholder(display_title, reason, widget_id)
# 验证图表数据
validation_result = self.chart_validator.validate(block)
if not validation_result.is_valid:
logger.warning(
f"图表 {block.get('widgetId', 'unknown')} 验证失败: {validation_result.errors}"
)
# 尝试修复
repair_result = self.chart_repairer.repair(block, validation_result)
if repair_result.success and repair_result.repaired_block:
# 修复成功,使用修复后的数据
block = repair_result.repaired_block
logger.info(
f"图表 {block.get('widgetId', 'unknown')} 修复成功 "
f"(方法: {repair_result.method}): {repair_result.changes}"
)
# 更新统计
if repair_result.method == 'local':
self.chart_validation_stats['repaired_locally'] += 1
elif repair_result.method == 'api':
self.chart_validation_stats['repaired_api'] += 1
else:
# 修复失败,记录失败并输出占位提示
fail_reason = self._format_chart_error_reason(validation_result)
block["_chart_renderable"] = False
block["_chart_error_reason"] = fail_reason
self._note_chart_failure(cache_key, fail_reason)
self._record_chart_failure_stat(cache_key)
logger.warning(
f"图表 {block.get('widgetId', 'unknown')} 修复失败,已跳过渲染: {fail_reason}"
)
return self._render_chart_error_placeholder(display_title, fail_reason, widget_id)
else:
# 验证通过
self.chart_validation_stats['valid'] += 1
if validation_result.warnings:
logger.info(
f"图表 {block.get('widgetId', 'unknown')} 验证通过,"
f"但有警告: {validation_result.warnings}"
)
# 渲染图表HTML
self.chart_counter += 1
canvas_id = f"chart-{self.chart_counter}"
@@ -1700,7 +1704,7 @@ class HTMLRenderer:
title = props.get("title")
title_html = f'<div class="chart-title">{self._escape_html(title)}</div>' if title else ""
fallback_html = (
self._render_wordcloud_fallback(props, block.get("widgetId"))
self._render_wordcloud_fallback(props, block.get("widgetId"), block.get("data"))
if is_wordcloud
else self._render_widget_fallback(normalized_data, block.get("widgetId"))
)
@@ -1750,20 +1754,57 @@ class HTMLRenderer:
"""
return table_html
def _render_wordcloud_fallback(self, props: Dict[str, Any] | None, widget_id: str | None = None) -> str:
def _render_wordcloud_fallback(
self,
props: Dict[str, Any] | None,
widget_id: str | None = None,
block_data: Any | None = None,
) -> str:
"""为词云提供表格兜底避免WordCloud渲染失败后页面空白"""
words = []
if isinstance(props, dict):
raw = props.get("data")
def _collect_items(raw: Any) -> list[dict]:
collected: list[dict] = []
if isinstance(raw, list):
for item in raw:
if not isinstance(item, dict):
continue
text = item.get("word") or item.get("text") or item.get("label")
weight = item.get("weight")
category = item.get("category") or ""
if text:
words.append({"word": str(text), "weight": weight, "category": str(category)})
if isinstance(item, dict):
text = item.get("word") or item.get("text") or item.get("label")
weight = item.get("weight")
category = item.get("category") or ""
if text:
collected.append({"word": str(text), "weight": weight, "category": str(category)})
elif isinstance(item, (list, tuple)) and item:
text = item[0]
weight = item[1] if len(item) > 1 else None
category = item[2] if len(item) > 2 else ""
if text:
collected.append({"word": str(text), "weight": weight, "category": str(category)})
elif isinstance(item, str):
collected.append({"word": item, "weight": 1.0, "category": ""})
elif isinstance(raw, dict):
if not {"labels", "datasets"}.intersection(raw.keys()):
for text, weight in raw.items():
collected.append({"word": str(text), "weight": weight, "category": ""})
return collected
words: list[dict] = []
seen: set[str] = set()
candidates = []
if isinstance(props, dict):
for key in ("data", "items", "words"):
if key in props:
candidates.append(props[key])
candidates.append((props or {}).get("sourceData"))
# 允许使用block.data兜底避免缺失props时出现空白
if block_data is not None:
candidates.append(block_data)
for raw in candidates:
for item in _collect_items(raw):
key = f"{item['word']}::{item.get('category','')}"
if key in seen:
continue
seen.add(key)
words.append(item)
if not words:
return ""
@@ -2712,11 +2753,11 @@ table th {{
line-height: 1.6;
}}
.chart-card.wordcloud-card .chart-container {{
min-height: 260px;
min-height: 180px;
}}
.chart-container {{
position: relative;
min-height: 320px;
min-height: 220px;
}}
.chart-fallback {{
display: none;
@@ -2743,6 +2784,31 @@ table th {{
.chart-fallback th {{
background: rgba(0,0,0,0.04);
}}
.wordcloud-fallback .wordcloud-badges {{
display: flex;
flex-wrap: wrap;
gap: 6px;
margin-top: 6px;
}}
.wordcloud-badge {{
display: inline-flex;
align-items: center;
gap: 4px;
padding: 4px 8px;
border-radius: 999px;
border: 1px solid rgba(74, 144, 226, 0.35);
color: var(--text-color);
background: linear-gradient(135deg, rgba(74, 144, 226, 0.14) 0%, rgba(74, 144, 226, 0.24) 100%);
box-shadow: 0 4px 10px rgba(15, 23, 42, 0.06);
}}
.dark-mode .wordcloud-badge {{
box-shadow: 0 6px 16px rgba(0, 0, 0, 0.35);
}}
.wordcloud-badge small {{
color: var(--secondary-color);
font-weight: 600;
font-size: 0.75rem;
}}
.chart-note {{
margin-top: 8px;
font-size: 0.85rem;
@@ -3039,6 +3105,45 @@ function liftDarkColor(color) {
return normalized;
}
function mixColors(colorA, colorB, amount) {
const rgbA = rgbFromColor(colorA);
const rgbB = rgbFromColor(colorB);
if (!rgbA && !rgbB) return colorA || colorB;
if (!rgbA) return colorB;
if (!rgbB) return colorA;
const t = Math.min(1, Math.max(0, amount || 0));
const mixed = rgbA.map((v, idx) => Math.round(v * (1 - t) + rgbB[idx] * t));
return `rgb(${mixed[0]}, ${mixed[1]}, ${mixed[2]})`;
}
function pickComputedColor(keys, fallback, styles) {
const styleRef = styles || getComputedStyle(document.body);
for (const key of keys) {
const val = styleRef.getPropertyValue(key);
if (val && val.trim()) {
const normalized = normalizeColorToken(val.trim());
if (normalized) return normalized;
}
}
return fallback;
}
function resolveWordcloudTheme() {
const styles = getComputedStyle(document.body);
const isDark = document.body.classList.contains('dark-mode');
const text = pickComputedColor(['--text-color'], isDark ? '#e5e7eb' : '#111827', styles);
const secondary = pickComputedColor(['--secondary-color', '--color-text-secondary'], isDark ? '#cbd5e1' : '#475569', styles);
const accent = liftDarkColor(
pickComputedColor(['--primary-color', '--color-accent', '--re-accent-color'], '#4A90E2', styles)
);
const cardBg = pickComputedColor(
['--card-bg', '--paper-bg', '--bg', '--bg-color', '--background', '--page-bg'],
isDark ? '#0f172a' : '#ffffff',
styles
);
return { text, secondary, accent, cardBg, isDark };
}
function normalizeDatasetColors(payload, chartType) {
const changes = [];
const data = payload && payload.data;
@@ -3246,29 +3351,79 @@ function isWordCloudWidget(payload) {
return typeof type === 'string' && type.toLowerCase().includes('wordcloud');
}
function hashString(str) {
let h = 0;
if (!str) return h;
for (let i = 0; i < str.length; i++) {
h = (h << 5) - h + str.charCodeAt(i);
h |= 0;
}
return h;
}
function normalizeWordcloudItems(payload) {
const source = payload && payload.props && payload.props.data;
if (!Array.isArray(source)) return [];
return source.map(item => {
if (!item || typeof item !== 'object') return null;
const word = item.word || item.text || item.label;
if (!word) return null;
const rawWeight = item.weight;
let weight = 0;
if (typeof rawWeight === 'number' && !Number.isNaN(rawWeight)) {
weight = rawWeight;
} else if (typeof rawWeight === 'string') {
const parsed = parseFloat(rawWeight);
weight = Number.isNaN(parsed) ? 0 : parsed;
const sources = [];
const props = payload && payload.props;
const dataField = payload && payload.data;
if (props) {
['data', 'items', 'words', 'sourceData'].forEach(key => {
if (props[key]) sources.push(props[key]);
});
}
if (dataField) {
sources.push(dataField);
}
const seen = new Map();
const pushItem = (word, weight, category) => {
if (!word) return;
let numeric = 1;
if (typeof weight === 'number' && Number.isFinite(weight)) {
numeric = weight;
} else if (typeof weight === 'string') {
const parsed = parseFloat(weight);
numeric = Number.isFinite(parsed) ? parsed : 1;
}
const category = (item.category || '').toString().toLowerCase();
return { word: String(word), weight, category };
}).filter(Boolean);
if (!(numeric > 0)) numeric = 1;
const cat = (category || '').toString().toLowerCase();
const key = `${word}__${cat}`;
const existing = seen.get(key);
const payloadItem = { word: String(word), weight: numeric, category: cat };
if (!existing || numeric > existing.weight) {
seen.set(key, payloadItem);
}
};
const consume = (raw) => {
if (!raw) return;
if (Array.isArray(raw)) {
raw.forEach(item => {
if (!item) return;
if (Array.isArray(item)) {
pushItem(item[0], item[1], item[2]);
} else if (typeof item === 'object') {
pushItem(item.word || item.text || item.label, item.weight, item.category);
} else if (typeof item === 'string') {
pushItem(item, 1, '');
}
});
} else if (typeof raw === 'object') {
Object.entries(raw).forEach(([word, weight]) => pushItem(word, weight, ''));
}
};
sources.forEach(consume);
const items = Array.from(seen.values());
items.sort((a, b) => (b.weight || 0) - (a.weight || 0));
return items.slice(0, 150);
}
function wordcloudColor(category) {
const key = typeof category === 'string' ? category.toLowerCase() : '';
return WORDCLOUD_CATEGORY_COLORS[key] || '#334155';
const palette = resolveWordcloudTheme();
const base = WORDCLOUD_CATEGORY_COLORS[key] || palette.accent || palette.secondary || '#334155';
return liftDarkColor(base);
}
function renderWordCloudFallback(canvas, items, reason) {
@@ -3282,26 +3437,44 @@ function renderWordCloudFallback(canvas, items, reason) {
} else {
canvas.style.display = 'none';
}
let fallback = card.querySelector('.chart-fallback');
let fallback = card.querySelector('.chart-fallback[data-dynamic="true"]');
if (!fallback) {
fallback = card.querySelector('.chart-fallback');
}
if (!fallback) {
fallback = document.createElement('div');
fallback.className = 'chart-fallback wordcloud-fallback';
fallback.setAttribute('data-dynamic', 'true');
card.appendChild(fallback);
}
fallback.className = 'chart-fallback wordcloud-fallback';
fallback.setAttribute('data-dynamic', 'true');
fallback.style.display = 'block';
fallback.innerHTML = '';
card.setAttribute('data-chart-state', 'fallback');
if (reason) {
let notice = fallback.querySelector('.chart-fallback__notice');
if (!notice) {
notice = document.createElement('p');
notice.className = 'chart-fallback__notice';
fallback.insertBefore(notice, fallback.firstChild || null);
const buildBadge = (item, maxWeight) => {
const badge = document.createElement('span');
badge.className = 'wordcloud-badge';
const clampedWeight = Math.max(0.5, (item.weight || 1));
const normalized = Math.min(1, clampedWeight / (maxWeight || 1));
const fontSize = 0.85 + normalized * 0.9;
badge.style.fontSize = `${fontSize}rem`;
badge.style.background = `linear-gradient(135deg, ${lightenColor(wordcloudColor(item.category), 0.05)} 0%, ${lightenColor(wordcloudColor(item.category), 0.15)} 100%)`;
badge.style.borderColor = lightenColor(wordcloudColor(item.category), 0.25);
badge.textContent = item.word;
if (item.weight !== undefined && item.weight !== null) {
const meta = document.createElement('small');
meta.textContent = item.weight >= 0 && item.weight <= 1.5
? `${(item.weight * 100).toFixed(0)}%`
: item.weight.toFixed(1).replace(/\.0+$/, '').replace(/0+$/, '').replace(/\.$/, '');
badge.appendChild(meta);
}
notice.textContent = `词云未能渲染${reason ? `${reason}` : ''},已展示数据表。`;
}
if (fallback.querySelector('table')) {
return;
return badge;
};
if (reason) {
const notice = document.createElement('p');
notice.className = 'chart-fallback__notice';
notice.textContent = `词云未能渲染${reason ? `${reason}` : ''},已展示关键词列表。`;
fallback.appendChild(notice);
}
if (!items || !items.length) {
const empty = document.createElement('p');
@@ -3309,39 +3482,13 @@ function renderWordCloudFallback(canvas, items, reason) {
fallback.appendChild(empty);
return;
}
const table = document.createElement('table');
const thead = document.createElement('thead');
const headRow = document.createElement('tr');
['关键词', '权重', '类别'].forEach(text => {
const th = document.createElement('th');
th.textContent = text;
headRow.appendChild(th);
});
thead.appendChild(headRow);
table.appendChild(thead);
const tbody = document.createElement('tbody');
const badges = document.createElement('div');
badges.className = 'wordcloud-badges';
const maxWeight = items.reduce((max, item) => Math.max(max, item.weight || 0), 1);
items.forEach(item => {
const row = document.createElement('tr');
const wordCell = document.createElement('td');
wordCell.textContent = item.word;
const weightCell = document.createElement('td');
if (typeof item.weight === 'number' && !Number.isNaN(item.weight)) {
weightCell.textContent = item.weight >= 0 && item.weight <= 1.5
? `${(item.weight * 100).toFixed(1)}%`
: item.weight.toFixed(2).replace(/\.0+$/, '').replace(/0+$/, '').replace(/\.$/, '');
} else {
weightCell.textContent = item.weight !== undefined && item.weight !== null ? String(item.weight) : '';
}
const categoryCell = document.createElement('td');
categoryCell.textContent = item.category || '';
categoryCell.style.color = wordcloudColor(item.category);
row.appendChild(wordCell);
row.appendChild(weightCell);
row.appendChild(categoryCell);
tbody.appendChild(row);
badges.appendChild(buildBadge(item, maxWeight));
});
table.appendChild(tbody);
fallback.appendChild(table);
fallback.appendChild(badges);
}
function renderWordCloud(canvas, payload, skipRegistry) {
@@ -3358,38 +3505,82 @@ function renderWordCloud(canvas, payload, skipRegistry) {
renderWordCloudFallback(canvas, items, '词云依赖未加载');
return;
}
const theme = resolveWordcloudTheme();
const dpr = Math.max(1, window.devicePixelRatio || 1);
const width = Math.max(240, (container ? container.clientWidth : canvas.clientWidth || canvas.width || 320));
const height = Math.max(180, Math.round(width * 0.62));
const width = Math.max(260, (container ? container.clientWidth : canvas.clientWidth || canvas.width || 320));
const height = Math.max(120, Math.round(width / 5)); // 5:1 宽高比
canvas.width = Math.round(width * dpr);
canvas.height = Math.round(height * dpr);
canvas.style.width = `${width}px`;
canvas.style.height = `${height}px`;
canvas.style.backgroundColor = 'transparent';
const resolveBgColor = () => {
const cardEl = card || container || document.body;
const style = getComputedStyle(cardEl);
const tokens = ['--card-bg', '--panel-bg', '--paper-bg', '--bg', '--background', '--page-bg'];
for (const key of tokens) {
const val = style.getPropertyValue(key);
if (val && val.trim() && val.trim() !== 'transparent') return val.trim();
}
if (style.backgroundColor && style.backgroundColor !== 'rgba(0, 0, 0, 0)') return style.backgroundColor;
const bodyStyle = getComputedStyle(document.body);
for (const key of tokens) {
const val = bodyStyle.getPropertyValue(key);
if (val && val.trim() && val.trim() !== 'transparent') return val.trim();
}
if (bodyStyle.backgroundColor && bodyStyle.backgroundColor !== 'rgba(0, 0, 0, 0)') {
return bodyStyle.backgroundColor;
}
return 'transparent';
};
const bgColor = resolveBgColor() || theme.cardBg || 'transparent';
const maxWeight = items.reduce((max, item) => Math.max(max, item.weight || 0), 0) || 1;
const weightLookup = new Map();
const categoryLookup = new Map();
items.forEach(it => {
weightLookup.set(it.word, it.weight || 1);
categoryLookup.set(it.word, it.category || '');
});
const list = items.map(item => [item.word, item.weight && item.weight > 0 ? item.weight : 1]);
try {
WordCloud(canvas, {
list,
gridSize: Math.max(8, Math.floor(Math.sqrt(canvas.width * canvas.height) / 80)),
gridSize: Math.max(3, Math.floor(Math.sqrt(canvas.width * canvas.height) / 170)),
weightFactor: (val) => {
const normalized = Math.max(0, val) / maxWeight;
const size = 16 + normalized * 32;
const cap = Math.min(width, height);
const base = Math.max(9, cap / 5.5);
const size = base * (0.8 + normalized * 1.3);
return size * dpr;
},
color: (word) => {
const found = items.find(entry => entry.word === word);
return lightenColor(wordcloudColor(found && found.category), 0.05);
const w = weightLookup.get(word) || 1;
const ratio = Math.max(0, Math.min(1, w / (maxWeight || 1)));
const category = categoryLookup.get(word) || '';
const base = wordcloudColor(category);
const target = theme.isDark ? '#ffffff' : (theme.text || '#111827');
const mixAmount = theme.isDark
? 0.28 + (1 - ratio) * 0.22
: 0.12 + (1 - ratio) * 0.35;
const mixed = mixColors(base, target, mixAmount);
return ensureAlpha(mixed || base, theme.isDark ? 0.95 : 1);
},
rotateRatio: 0.15,
rotateRatio: 0,
rotationSteps: 0,
shuffle: false,
shrinkToFit: true,
drawOutOfBound: false,
backgroundColor: getComputedStyle(document.body).getPropertyValue('--card-bg').trim() || '#fff'
shape: 'square',
ellipticity: 0.45,
clearCanvas: true,
backgroundColor: bgColor
});
if (container) {
container.style.display = '';
container.style.minHeight = `${height}px`;
container.style.background = 'transparent';
}
const fallback = card && card.querySelector('.chart-fallback');
if (fallback) {
@@ -3870,11 +4061,19 @@ function exportPdf() {
}
document.addEventListener('DOMContentLoaded', () => {
const rerenderWordclouds = debounce(() => {
wordCloudRegistry.forEach(fn => {
if (typeof fn === 'function') {
fn();
}
});
}, 260);
const themeBtn = document.getElementById('theme-toggle');
if (themeBtn) {
themeBtn.addEventListener('click', () => {
document.body.classList.toggle('dark-mode');
chartRegistry.forEach(applyChartTheme);
rerenderWordclouds();
});
}
const printBtn = document.getElementById('print-btn');
@@ -3885,13 +4084,6 @@ document.addEventListener('DOMContentLoaded', () => {
if (exportBtn) {
exportBtn.addEventListener('click', exportPdf);
}
const rerenderWordclouds = debounce(() => {
wordCloudRegistry.forEach(fn => {
if (typeof fn === 'function') {
fn();
}
});
}, 260);
window.addEventListener('resize', rerenderWordclouds);
hydrateCharts();
});

View File

@@ -331,6 +331,13 @@ class PDFRenderer:
# 只处理chart.js类型的widget
if widget_id and widget_type.startswith('chart.js'):
widget_type_lower = widget_type.lower()
props = block.get('props')
props_type = str(props.get('type') or '').lower() if isinstance(props, dict) else ''
if 'wordcloud' in widget_type_lower or 'wordcloud' in props_type:
logger.debug(f"检测到词云 {widget_id}跳过SVG转换并使用图片注入流程")
continue
failed, fail_reason = self.html_renderer._has_chart_failure(block)
if block.get("_chart_renderable") is False or failed:
logger.debug(
@@ -392,7 +399,13 @@ class PDFRenderer:
widget_id = block.get('widgetId')
widget_type = block.get('widgetType', '')
if widget_id and isinstance(widget_type, str) and 'wordcloud' in widget_type.lower():
props = block.get('props')
props_type = str(props.get('type') or '') if isinstance(props, dict) else ''
is_wordcloud = (
isinstance(widget_type, str) and 'wordcloud' in widget_type.lower()
) or ('wordcloud' in props_type.lower())
if widget_id and is_wordcloud:
try:
data_uri = self._generate_wordcloud_image(block)
if data_uri:
@@ -464,12 +477,14 @@ class PDFRenderer:
font_path = str(self._get_font_path())
wc = WordCloud(
width=900,
height=520,
width=1000,
height=360,
background_color="white",
font_path=font_path,
prefer_horizontal=0.9,
prefer_horizontal=0.98,
random_state=42,
max_words=180,
collocations=False,
)
wc.generate_from_frequencies(frequencies)