Modify the Logic for "Export as PDF"

2025-11-18 20:10:11 +08:00
parent acfe77a326
commit 5e82185bee
7 changed files with 1087 additions and 62 deletions
--- a/ReportEngine/flask_interface.py
+++ b/ReportEngine/flask_interface.py
@@ -1008,3 +1008,140 @@ def clear_log():
            'success': False,
            'error': f'清空日志失败: {str(e)}'
        }), 500
+
+
+@report_bp.route('/export/pdf/<task_id>', methods=['GET'])
+def export_pdf(task_id: str):
+    """
+    导出报告为PDF格式。
+
+    从IR JSON文件生成优化的PDF，支持自动布局调整。
+
+    参数:
+        task_id: 任务ID
+
+    查询参数:
+        optimize: 是否启用布局优化（默认true）
+
+    返回:
+        Response: PDF文件流或错误信息
+    """
+    try:
+        # 获取任务信息
+        task = tasks_registry.get(task_id)
+        if not task:
+            return jsonify({
+                'success': False,
+                'error': '任务不存在'
+            }), 404
+
+        # 检查任务是否完成
+        if task.status != 'completed':
+            return jsonify({
+                'success': False,
+                'error': f'任务未完成，当前状态: {task.status}'
+            }), 400
+
+        # 获取IR文件路径
+        if not task.ir_file_path or not os.path.exists(task.ir_file_path):
+            return jsonify({
+                'success': False,
+                'error': 'IR文件不存在'
+            }), 404
+
+        # 读取IR数据
+        with open(task.ir_file_path, 'r', encoding='utf-8') as f:
+            document_ir = json.load(f)
+
+        # 检查是否启用布局优化
+        optimize = request.args.get('optimize', 'true').lower() == 'true'
+
+        # 创建PDF渲染器并生成PDF
+        from .renderers import PDFRenderer
+        renderer = PDFRenderer()
+
+        logger.info(f"开始导出PDF，任务ID: {task_id}，布局优化: {optimize}")
+
+        # 生成PDF字节流
+        pdf_bytes = renderer.render_to_bytes(document_ir, optimize_layout=optimize)
+
+        # 确定下载文件名
+        topic = document_ir.get('metadata', {}).get('topic', 'report')
+        pdf_filename = f"report_{topic}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
+
+        # 返回PDF文件
+        return Response(
+            pdf_bytes,
+            mimetype='application/pdf',
+            headers={
+                'Content-Disposition': f'attachment; filename="{pdf_filename}"',
+                'Content-Type': 'application/pdf'
+            }
+        )
+
+    except Exception as e:
+        logger.exception(f"导出PDF失败: {str(e)}")
+        return jsonify({
+            'success': False,
+            'error': f'导出PDF失败: {str(e)}'
+        }), 500
+
+
+@report_bp.route('/export/pdf-from-ir', methods=['POST'])
+def export_pdf_from_ir():
+    """
+    从IR JSON直接导出PDF（不需要任务ID）。
+
+    适用于前端直接传递IR数据的场景。
+
+    请求体:
+        {
+            "document_ir": {...},  // Document IR JSON
+            "optimize": true       // 是否启用布局优化（可选）
+        }
+
+    返回:
+        Response: PDF文件流或错误信息
+    """
+    try:
+        data = request.get_json()
+
+        if not data or 'document_ir' not in data:
+            return jsonify({
+                'success': False,
+                'error': '缺少document_ir参数'
+            }), 400
+
+        document_ir = data['document_ir']
+        optimize = data.get('optimize', True)
+
+        # 创建PDF渲染器并生成PDF
+        from .renderers import PDFRenderer
+        renderer = PDFRenderer()
+
+        logger.info(f"从IR直接导出PDF，布局优化: {optimize}")
+
+        # 生成PDF字节流
+        pdf_bytes = renderer.render_to_bytes(document_ir, optimize_layout=optimize)
+
+        # 确定下载文件名
+        topic = document_ir.get('metadata', {}).get('topic', 'report')
+        pdf_filename = f"report_{topic}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
+
+        # 返回PDF文件
+        return Response(
+            pdf_bytes,
+            mimetype='application/pdf',
+            headers={
+                'Content-Disposition': f'attachment; filename="{pdf_filename}"',
+                'Content-Type': 'application/pdf'
+            }
+        )
+
+    except Exception as e:
+        logger.exception(f"从IR导出PDF失败: {str(e)}")
+        return jsonify({
+            'success': False,
+            'error': f'导出PDF失败: {str(e)}'
+        }), 500
+
--- a/ReportEngine/renderers/init.py
+++ b/ReportEngine/renderers/init.py
@@ -1,9 +1,31 @@
 """
 Report Engine渲染器集合。

-目前仅提供 HTMLRenderer，未来可扩展为PDF/Markdown等输出。
+提供 HTMLRenderer 和 PDFRenderer，支持HTML和PDF输出。
 """

 from .html_renderer import HTMLRenderer
+from .pdf_renderer import PDFRenderer
+from .pdf_layout_optimizer import (
+    PDFLayoutOptimizer,
+    PDFLayoutConfig,
+    PageLayout,
+    KPICardLayout,
+    CalloutLayout,
+    TableLayout,
+    ChartLayout,
+    GridLayout,
+)

-__all__ = ["HTMLRenderer"]
+__all__ = [
+    "HTMLRenderer",
+    "PDFRenderer",
+    "PDFLayoutOptimizer",
+    "PDFLayoutConfig",
+    "PageLayout",
+    "KPICardLayout",
+    "CalloutLayout",
+    "TableLayout",
+    "ChartLayout",
+    "GridLayout",
+]
--- a/ReportEngine/renderers/html_renderer.py
+++ b/ReportEngine/renderers/html_renderer.py
@@ -102,7 +102,7 @@ class HTMLRenderer:
    @staticmethod
    def _get_font_path() -> Path:
        """返回PDF导出所需字体的路径（使用优化后的子集字体）"""
-        return Path(__file__).parent / "assets" / "fonts" / "SourceHanSerifSC-Medium-Subset.otf"
+        return Path(__file__).parent / "assets" / "fonts" / "SourceHanSerifSC-Medium-Subset.ttf"

    def _load_lib(self, filename: str) -> str:
        """
@@ -2881,8 +2881,8 @@ function exportPdf() {
  const pdf = new jspdf.jsPDF('p', 'mm', 'a4');
  try {
    if (window.pdfFontData) {
-      pdf.addFileToVFS('SourceHanSerifSC-Medium.otf', window.pdfFontData);
-      pdf.addFont('SourceHanSerifSC-Medium.otf', 'SourceHanSerif', 'normal');
+      pdf.addFileToVFS('SourceHanSerifSC-Medium.ttf', window.pdfFontData);
+      pdf.addFont('SourceHanSerifSC-Medium.ttf', 'SourceHanSerif', 'normal');
      pdf.setFont('SourceHanSerif');
      console.log('PDF字体已成功加载');
    } else {
--- a/ReportEngine/renderers/pdf_layout_optimizer.py
+++ b/ReportEngine/renderers/pdf_layout_optimizer.py
@@ -0,0 +1,554 @@
+"""
+PDF布局优化器
+
+自动分析和优化PDF布局，确保内容不溢出、排版美观。
+支持：
+- 自动调整字号
+- 优化行间距
+- 调整色块大小
+- 智能排列信息块
+- 保存和加载优化方案
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+from dataclasses import dataclass, asdict
+from datetime import datetime
+from loguru import logger
+
+
+@dataclass
+class KPICardLayout:
+    """KPI卡片布局配置"""
+    font_size_value: int = 32  # 数值字号
+    font_size_label: int = 14  # 标签字号
+    font_size_change: int = 13  # 变化值字号
+    padding: int = 20  # 内边距
+    min_height: int = 120  # 最小高度
+    value_max_length: int = 10  # 数值最大字符数（超过则缩小字号）
+
+
+@dataclass
+class CalloutLayout:
+    """提示框布局配置"""
+    font_size_title: int = 16  # 标题字号
+    font_size_content: int = 14  # 内容字号
+    padding: int = 20  # 内边距
+    line_height: float = 1.6  # 行高倍数
+    max_width: str = "100%"  # 最大宽度
+
+
+@dataclass
+class TableLayout:
+    """表格布局配置"""
+    font_size_header: int = 13  # 表头字号
+    font_size_body: int = 12  # 表体字号
+    cell_padding: int = 12  # 单元格内边距
+    max_cell_width: int = 200  # 最大单元格宽度（像素）
+    overflow_strategy: str = "wrap"  # 溢出策略：wrap(换行) / ellipsis(省略号)
+
+
+@dataclass
+class ChartLayout:
+    """图表布局配置"""
+    font_size_title: int = 16  # 图表标题字号
+    font_size_label: int = 12  # 标签字号
+    min_height: int = 300  # 最小高度
+    max_height: int = 600  # 最大高度
+    padding: int = 20  # 内边距
+
+
+@dataclass
+class GridLayout:
+    """网格布局配置"""
+    columns: int = 2  # 每行列数
+    gap: int = 20  # 间距
+    responsive_breakpoint: int = 768  # 响应式断点（宽度）
+
+
+@dataclass
+class PageLayout:
+    """页面整体布局配置"""
+    font_size_base: int = 14  # 基础字号
+    font_size_h1: int = 28  # 一级标题
+    font_size_h2: int = 24  # 二级标题
+    font_size_h3: int = 20  # 三级标题
+    font_size_h4: int = 16  # 四级标题
+    line_height: float = 1.6  # 行高倍数
+    paragraph_spacing: int = 16  # 段落间距
+    section_spacing: int = 32  # 章节间距
+    page_padding: int = 40  # 页面边距
+    max_content_width: int = 800  # 最大内容宽度
+
+
+@dataclass
+class PDFLayoutConfig:
+    """完整的PDF布局配置"""
+    page: PageLayout
+    kpi_card: KPICardLayout
+    callout: CalloutLayout
+    table: TableLayout
+    chart: ChartLayout
+    grid: GridLayout
+
+    # 优化策略配置
+    auto_adjust_font_size: bool = True  # 自动调整字号
+    auto_adjust_grid_columns: bool = True  # 自动调整网格列数
+    prevent_orphan_headers: bool = True  # 防止标题孤行
+    optimize_for_print: bool = True  # 打印优化
+
+    def to_dict(self) -> Dict[str, Any]:
+        """转换为字典"""
+        return {
+            'page': asdict(self.page),
+            'kpi_card': asdict(self.kpi_card),
+            'callout': asdict(self.callout),
+            'table': asdict(self.table),
+            'chart': asdict(self.chart),
+            'grid': asdict(self.grid),
+            'auto_adjust_font_size': self.auto_adjust_font_size,
+            'auto_adjust_grid_columns': self.auto_adjust_grid_columns,
+            'prevent_orphan_headers': self.prevent_orphan_headers,
+            'optimize_for_print': self.optimize_for_print,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> PDFLayoutConfig:
+        """从字典创建配置"""
+        return cls(
+            page=PageLayout(**data['page']),
+            kpi_card=KPICardLayout(**data['kpi_card']),
+            callout=CalloutLayout(**data['callout']),
+            table=TableLayout(**data['table']),
+            chart=ChartLayout(**data['chart']),
+            grid=GridLayout(**data['grid']),
+            auto_adjust_font_size=data.get('auto_adjust_font_size', True),
+            auto_adjust_grid_columns=data.get('auto_adjust_grid_columns', True),
+            prevent_orphan_headers=data.get('prevent_orphan_headers', True),
+            optimize_for_print=data.get('optimize_for_print', True),
+        )
+
+
+class PDFLayoutOptimizer:
+    """
+    PDF布局优化器
+
+    根据内容特征自动优化PDF布局，防止溢出和排版问题。
+    """
+
+    def __init__(self, config: Optional[PDFLayoutConfig] = None):
+        """
+        初始化优化器
+
+        参数:
+            config: 布局配置，如果为None则使用默认配置
+        """
+        self.config = config or self._create_default_config()
+        self.optimization_log = []
+
+    @staticmethod
+    def _create_default_config() -> PDFLayoutConfig:
+        """创建默认配置"""
+        return PDFLayoutConfig(
+            page=PageLayout(),
+            kpi_card=KPICardLayout(),
+            callout=CalloutLayout(),
+            table=TableLayout(),
+            chart=ChartLayout(),
+            grid=GridLayout(),
+        )
+
+    def optimize_for_document(self, document_ir: Dict[str, Any]) -> PDFLayoutConfig:
+        """
+        根据文档IR内容优化布局配置
+
+        参数:
+            document_ir: Document IR数据
+
+        返回:
+            PDFLayoutConfig: 优化后的布局配置
+        """
+        logger.info("开始分析文档并优化布局...")
+
+        # 分析文档结构
+        stats = self._analyze_document(document_ir)
+
+        # 根据分析结果调整配置
+        optimized_config = self._adjust_config_based_on_stats(stats)
+
+        # 记录优化日志
+        self._log_optimization(stats, optimized_config)
+
+        return optimized_config
+
+    def _analyze_document(self, document_ir: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        分析文档内容特征
+
+        返回统计信息：
+        - kpi_count: KPI卡片数量
+        - table_count: 表格数量
+        - chart_count: 图表数量
+        - max_kpi_value_length: 最长KPI数值长度
+        - max_table_columns: 最多表格列数
+        - total_content_length: 总内容长度
+        """
+        stats = {
+            'kpi_count': 0,
+            'table_count': 0,
+            'chart_count': 0,
+            'callout_count': 0,
+            'max_kpi_value_length': 0,
+            'max_table_columns': 0,
+            'max_table_rows': 0,
+            'total_content_length': 0,
+            'has_long_text': False,
+        }
+
+        # 遍历章节
+        sections = document_ir.get('sections', [])
+        for section in sections:
+            self._analyze_section(section, stats)
+
+        logger.info(f"文档分析完成: {stats}")
+        return stats
+
+    def _analyze_section(self, section: Dict[str, Any], stats: Dict[str, Any]):
+        """递归分析章节"""
+        children = section.get('children', [])
+
+        for child in children:
+            node_type = child.get('type')
+
+            if node_type == 'kpi_grid':
+                kpis = child.get('kpis', [])
+                stats['kpi_count'] += len(kpis)
+
+                # 检查KPI数值长度
+                for kpi in kpis:
+                    value = str(kpi.get('value', ''))
+                    stats['max_kpi_value_length'] = max(
+                        stats['max_kpi_value_length'],
+                        len(value)
+                    )
+
+            elif node_type == 'table':
+                stats['table_count'] += 1
+
+                # 分析表格结构
+                headers = child.get('headers', [])
+                rows = child.get('rows', [])
+                stats['max_table_columns'] = max(
+                    stats['max_table_columns'],
+                    len(headers)
+                )
+                stats['max_table_rows'] = max(
+                    stats['max_table_rows'],
+                    len(rows)
+                )
+
+            elif node_type == 'chart':
+                stats['chart_count'] += 1
+
+            elif node_type == 'callout':
+                stats['callout_count'] += 1
+                content = child.get('content', '')
+                if len(content) > 200:
+                    stats['has_long_text'] = True
+
+            elif node_type == 'paragraph':
+                text = child.get('text', '')
+                stats['total_content_length'] += len(text)
+                if len(text) > 500:
+                    stats['has_long_text'] = True
+
+            # 递归处理子章节
+            if node_type == 'section':
+                self._analyze_section(child, stats)
+
+    def _adjust_config_based_on_stats(
+        self,
+        stats: Dict[str, Any]
+    ) -> PDFLayoutConfig:
+        """根据统计信息调整配置"""
+        config = PDFLayoutConfig(
+            page=PageLayout(**asdict(self.config.page)),
+            kpi_card=KPICardLayout(**asdict(self.config.kpi_card)),
+            callout=CalloutLayout(**asdict(self.config.callout)),
+            table=TableLayout(**asdict(self.config.table)),
+            chart=ChartLayout(**asdict(self.config.chart)),
+            grid=GridLayout(**asdict(self.config.grid)),
+            auto_adjust_font_size=self.config.auto_adjust_font_size,
+            auto_adjust_grid_columns=self.config.auto_adjust_grid_columns,
+            prevent_orphan_headers=self.config.prevent_orphan_headers,
+            optimize_for_print=self.config.optimize_for_print,
+        )
+
+        # 根据KPI数值长度调整字号
+        if stats['max_kpi_value_length'] > 10:
+            config.kpi_card.font_size_value = 28
+            self.optimization_log.append(
+                f"KPI数值过长({stats['max_kpi_value_length']}字符)，"
+                f"字号从32调整为28"
+            )
+        elif stats['max_kpi_value_length'] > 15:
+            config.kpi_card.font_size_value = 24
+            self.optimization_log.append(
+                f"KPI数值很长({stats['max_kpi_value_length']}字符)，"
+                f"字号从32调整为24"
+            )
+
+        # 根据KPI数量调整网格列数
+        if stats['kpi_count'] > 6:
+            config.grid.columns = 3
+            config.kpi_card.min_height = 100
+            self.optimization_log.append(
+                f"KPI卡片较多({stats['kpi_count']}个)，"
+                f"每行列数从2调整为3"
+            )
+        elif stats['kpi_count'] <= 2:
+            config.grid.columns = 1
+            self.optimization_log.append(
+                f"KPI卡片较少({stats['kpi_count']}个)，"
+                f"每行列数从2调整为1"
+            )
+
+        # 根据表格列数调整字号
+        if stats['max_table_columns'] > 6:
+            config.table.font_size_header = 11
+            config.table.font_size_body = 10
+            config.table.cell_padding = 8
+            self.optimization_log.append(
+                f"表格列数较多({stats['max_table_columns']}列)，"
+                f"缩小字号和内边距"
+            )
+
+        # 如果有长文本，增加行高
+        if stats['has_long_text']:
+            config.page.line_height = 1.8
+            config.callout.line_height = 1.8
+            self.optimization_log.append(
+                "检测到长文本，增加行高至1.8提高可读性"
+            )
+
+        return config
+
+    def _log_optimization(
+        self,
+        stats: Dict[str, Any],
+        config: PDFLayoutConfig
+    ):
+        """记录优化过程"""
+        log_entry = {
+            'timestamp': datetime.now().isoformat(),
+            'document_stats': stats,
+            'optimizations': self.optimization_log.copy(),
+            'final_config': config.to_dict(),
+        }
+
+        logger.info(f"布局优化完成，应用了{len(self.optimization_log)}项优化")
+        for opt in self.optimization_log:
+            logger.info(f"  - {opt}")
+
+        # 清空日志供下次使用
+        self.optimization_log.clear()
+
+        return log_entry
+
+    def save_config(self, path: str | Path, log_entry: Optional[Dict] = None):
+        """
+        保存配置到文件
+
+        参数:
+            path: 保存路径
+            log_entry: 优化日志条目（可选）
+        """
+        path = Path(path)
+        path.parent.mkdir(parents=True, exist_ok=True)
+
+        data = {
+            'config': self.config.to_dict(),
+        }
+
+        if log_entry:
+            data['optimization_log'] = log_entry
+
+        with open(path, 'w', encoding='utf-8') as f:
+            json.dump(data, f, ensure_ascii=False, indent=2)
+
+        logger.info(f"布局配置已保存: {path}")
+
+    @classmethod
+    def load_config(cls, path: str | Path) -> PDFLayoutOptimizer:
+        """
+        从文件加载配置
+
+        参数:
+            path: 配置文件路径
+
+        返回:
+            PDFLayoutOptimizer: 加载了配置的优化器实例
+        """
+        path = Path(path)
+
+        if not path.exists():
+            logger.warning(f"配置文件不存在: {path}，使用默认配置")
+            return cls()
+
+        with open(path, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+
+        config = PDFLayoutConfig.from_dict(data['config'])
+        optimizer = cls(config)
+
+        logger.info(f"布局配置已加载: {path}")
+        return optimizer
+
+    def generate_pdf_css(self) -> str:
+        """
+        根据当前配置生成PDF专用CSS
+
+        返回:
+            str: CSS样式字符串
+        """
+        cfg = self.config
+
+        css = f"""
+/* PDF布局优化样式 - 由PDFLayoutOptimizer自动生成 */
+
+/* 页面基础样式 */
+body {{
+    font-size: {cfg.page.font_size_base}px;
+    line-height: {cfg.page.line_height};
+}}
+
+main {{
+    padding: {cfg.page.page_padding}px !important;
+    max-width: {cfg.page.max_content_width}px;
+    margin: 0 auto;
+}}
+
+/* 标题样式 */
+h1 {{ font-size: {cfg.page.font_size_h1}px !important; }}
+h2 {{ font-size: {cfg.page.font_size_h2}px !important; }}
+h3 {{ font-size: {cfg.page.font_size_h3}px !important; }}
+h4 {{ font-size: {cfg.page.font_size_h4}px !important; }}
+
+/* 段落间距 */
+p {{
+    margin-bottom: {cfg.page.paragraph_spacing}px;
+}}
+
+.chapter {{
+    margin-bottom: {cfg.page.section_spacing}px;
+}}
+
+/* KPI卡片优化 */
+.kpi-grid {{
+    display: grid;
+    grid-template-columns: repeat({cfg.grid.columns}, 1fr);
+    gap: {cfg.grid.gap}px;
+    margin: 20px 0;
+}}
+
+.kpi-card {{
+    padding: {cfg.kpi_card.padding}px !important;
+    min-height: {cfg.kpi_card.min_height}px;
+    break-inside: avoid;
+    page-break-inside: avoid;
+}}
+
+.kpi-card .value {{
+    font-size: {cfg.kpi_card.font_size_value}px !important;
+    line-height: 1.2;
+    word-break: break-word;
+}}
+
+.kpi-card .label {{
+    font-size: {cfg.kpi_card.font_size_label}px !important;
+}}
+
+.kpi-card .change {{
+    font-size: {cfg.kpi_card.font_size_change}px !important;
+}}
+
+/* 提示框优化 */
+.callout {{
+    padding: {cfg.callout.padding}px !important;
+    margin: 20px 0;
+    line-height: {cfg.callout.line_height};
+    break-inside: avoid;
+    page-break-inside: avoid;
+}}
+
+.callout-title {{
+    font-size: {cfg.callout.font_size_title}px !important;
+    margin-bottom: 10px;
+}}
+
+.callout-content {{
+    font-size: {cfg.callout.font_size_content}px !important;
+}}
+
+/* 表格优化 */
+table {{
+    width: 100%;
+    break-inside: avoid;
+    page-break-inside: avoid;
+}}
+
+th {{
+    font-size: {cfg.table.font_size_header}px !important;
+    padding: {cfg.table.cell_padding}px !important;
+}}
+
+td {{
+    font-size: {cfg.table.font_size_body}px !important;
+    padding: {cfg.table.cell_padding}px !important;
+    max-width: {cfg.table.max_cell_width}px;
+    word-wrap: break-word;
+    overflow-wrap: break-word;
+}}
+
+/* 图表优化 */
+.chart-card {{
+    min-height: {cfg.chart.min_height}px;
+    max-height: {cfg.chart.max_height}px;
+    padding: {cfg.chart.padding}px;
+    break-inside: avoid;
+    page-break-inside: avoid;
+}}
+
+.chart-title {{
+    font-size: {cfg.chart.font_size_title}px !important;
+}}
+
+/* 防止标题孤行 */
+h1, h2, h3, h4, h5, h6 {{
+    break-after: avoid;
+    page-break-after: avoid;
+}}
+
+/* 确保内容块不被分页 */
+.content-block {{
+    break-inside: avoid;
+    page-break-inside: avoid;
+}}
+"""
+
+        return css
+
+
+__all__ = [
+    'PDFLayoutOptimizer',
+    'PDFLayoutConfig',
+    'PageLayout',
+    'KPICardLayout',
+    'CalloutLayout',
+    'TableLayout',
+    'ChartLayout',
+    'GridLayout',
+]
--- a/ReportEngine/renderers/pdf_renderer.py
+++ b/ReportEngine/renderers/pdf_renderer.py
@@ -0,0 +1,250 @@
+"""
+PDF渲染器 - 使用WeasyPrint从HTML生成PDF
+支持完整的CSS样式和中文字体
+"""
+
+from __future__ import annotations
+
+import base64
+from pathlib import Path
+from typing import Any, Dict
+from datetime import datetime
+from loguru import logger
+
+try:
+    from weasyprint import HTML, CSS
+    from weasyprint.text.fonts import FontConfiguration
+    WEASYPRINT_AVAILABLE = True
+except ImportError:
+    WEASYPRINT_AVAILABLE = False
+    logger.warning("WeasyPrint未安装，PDF导出功能将不可用")
+
+from .html_renderer import HTMLRenderer
+from .pdf_layout_optimizer import PDFLayoutOptimizer, PDFLayoutConfig
+
+
+class PDFRenderer:
+    """
+    基于WeasyPrint的PDF渲染器
+
+    - 直接从HTML生成PDF，保留所有CSS样式
+    - 完美支持中文字体
+    - 自动处理分页和布局
+    """
+
+    def __init__(
+        self,
+        config: Dict[str, Any] | None = None,
+        layout_optimizer: PDFLayoutOptimizer | None = None
+    ):
+        """
+        初始化PDF渲染器
+
+        参数:
+            config: 渲染器配置
+            layout_optimizer: PDF布局优化器（可选）
+        """
+        self.config = config or {}
+        self.html_renderer = HTMLRenderer(config)
+        self.layout_optimizer = layout_optimizer or PDFLayoutOptimizer()
+
+        if not WEASYPRINT_AVAILABLE:
+            raise RuntimeError("WeasyPrint未安装，请运行: pip install weasyprint")
+
+    @staticmethod
+    def _get_font_path() -> Path:
+        """获取字体文件路径"""
+        # 优先使用完整字体以确保字符覆盖
+        fonts_dir = Path(__file__).parent / "assets" / "fonts"
+
+        # 检查完整字体
+        full_font = fonts_dir / "SourceHanSerifSC-Medium.otf"
+        if full_font.exists():
+            logger.info(f"使用完整字体: {full_font}")
+            return full_font
+
+        # 检查TTF子集字体
+        subset_ttf = fonts_dir / "SourceHanSerifSC-Medium-Subset.ttf"
+        if subset_ttf.exists():
+            logger.info(f"使用TTF子集字体: {subset_ttf}")
+            return subset_ttf
+
+        # 检查OTF子集字体
+        subset_otf = fonts_dir / "SourceHanSerifSC-Medium-Subset.otf"
+        if subset_otf.exists():
+            logger.info(f"使用OTF子集字体: {subset_otf}")
+            return subset_otf
+
+        raise FileNotFoundError(f"未找到字体文件，请检查 {fonts_dir} 目录")
+
+    def _get_pdf_html(
+        self,
+        document_ir: Dict[str, Any],
+        optimize_layout: bool = True
+    ) -> str:
+        """
+        生成适用于PDF的HTML内容
+
+        - 移除交互式元素（按钮、导航等）
+        - 添加PDF专用样式
+        - 嵌入字体文件
+        - 应用布局优化
+
+        参数:
+            document_ir: Document IR数据
+            optimize_layout: 是否启用布局优化
+
+        返回:
+            str: 优化后的HTML内容
+        """
+        # 如果启用布局优化，先分析文档并生成优化配置
+        if optimize_layout:
+            logger.info("启用PDF布局优化...")
+            layout_config = self.layout_optimizer.optimize_for_document(document_ir)
+
+            # 保存优化日志
+            log_dir = Path('logs/pdf_layouts')
+            log_dir.mkdir(parents=True, exist_ok=True)
+            log_file = log_dir / f"layout_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
+
+            # 保存配置和优化日志
+            optimization_log = self.layout_optimizer._log_optimization(
+                self.layout_optimizer._analyze_document(document_ir),
+                layout_config
+            )
+            self.layout_optimizer.config = layout_config
+            self.layout_optimizer.save_config(log_file, optimization_log)
+        else:
+            layout_config = self.layout_optimizer.config
+
+        # 使用HTML渲染器生成基础HTML
+        html = self.html_renderer.render(document_ir)
+
+        # 获取字体路径并转换为base64（用于嵌入）
+        font_path = self._get_font_path()
+        font_data = font_path.read_bytes()
+        font_base64 = base64.b64encode(font_data).decode('ascii')
+
+        # 判断字体格式
+        font_format = 'opentype' if font_path.suffix == '.otf' else 'truetype'
+
+        # 生成优化后的CSS
+        optimized_css = self.layout_optimizer.generate_pdf_css()
+
+        # 添加PDF专用CSS
+        pdf_css = f"""
+<style>
+/* PDF专用字体嵌入 */
+@font-face {{
+    font-family: 'SourceHanSerif';
+    src: url(data:font/{font_format};base64,{font_base64}) format('{font_format}');
+    font-weight: normal;
+    font-style: normal;
+}}
+
+/* 强制所有文本使用思源宋体 */
+body, h1, h2, h3, h4, h5, h6, p, li, td, th, div, span {{
+    font-family: 'SourceHanSerif', serif !important;
+}}
+
+/* PDF专用样式调整 */
+.report-header {{
+    display: none !important;
+}}
+
+.no-print {{
+    display: none !important;
+}}
+
+body {{
+    background: white !important;
+}}
+
+/* 隐藏图表canvas，显示fallback表格 */
+.chart-container {{
+    display: none !important;
+}}
+
+.chart-fallback {{
+    display: block !important;
+}}
+
+{optimized_css}
+</style>
+"""
+
+        # 在</head>前插入PDF专用CSS
+        html = html.replace('</head>', f'{pdf_css}\n</head>')
+
+        return html
+
+    def render_to_pdf(
+        self,
+        document_ir: Dict[str, Any],
+        output_path: str | Path,
+        optimize_layout: bool = True
+    ) -> Path:
+        """
+        将Document IR渲染为PDF文件
+
+        参数:
+            document_ir: Document IR数据
+            output_path: PDF输出路径
+            optimize_layout: 是否启用布局优化（默认True）
+
+        返回:
+            Path: 生成的PDF文件路径
+        """
+        output_path = Path(output_path)
+
+        logger.info(f"开始生成PDF: {output_path}")
+
+        # 生成HTML内容
+        html_content = self._get_pdf_html(document_ir, optimize_layout)
+
+        # 配置字体
+        font_config = FontConfiguration()
+
+        # 从HTML字符串创建WeasyPrint HTML对象
+        html_doc = HTML(string=html_content, base_url=str(Path.cwd()))
+
+        # 生成PDF
+        try:
+            html_doc.write_pdf(
+                output_path,
+                font_config=font_config,
+                presentational_hints=True  # 保留HTML的呈现提示
+            )
+            logger.info(f"✓ PDF生成成功: {output_path}")
+            return output_path
+
+        except Exception as e:
+            logger.error(f"PDF生成失败: {e}")
+            raise
+
+    def render_to_bytes(
+        self,
+        document_ir: Dict[str, Any],
+        optimize_layout: bool = True
+    ) -> bytes:
+        """
+        将Document IR渲染为PDF字节流
+
+        参数:
+            document_ir: Document IR数据
+            optimize_layout: 是否启用布局优化（默认True）
+
+        返回:
+            bytes: PDF文件的字节内容
+        """
+        html_content = self._get_pdf_html(document_ir, optimize_layout)
+        font_config = FontConfiguration()
+        html_doc = HTML(string=html_content, base_url=str(Path.cwd()))
+
+        return html_doc.write_pdf(
+            font_config=font_config,
+            presentational_hints=True
+        )
+
+
+__all__ = ["PDFRenderer"]
--- a/ReportEngine/scripts/export_to_pdf.py
+++ b/ReportEngine/scripts/export_to_pdf.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+"""
+PDF导出工具 - 使用Python直接生成PDF，无乱码
+
+用法:
+    python ReportEngine/scripts/export_to_pdf.py <报告IR JSON文件> [输出PDF路径]
+
+示例:
+    python ReportEngine/scripts/export_to_pdf.py final_reports/ir/report_ir_xxx.json output.pdf
+    python ReportEngine/scripts/export_to_pdf.py final_reports/ir/report_ir_xxx.json
+"""
+
+import sys
+import json
+from pathlib import Path
+from loguru import logger
+
+from ReportEngine.renderers import PDFRenderer
+
+
+def export_to_pdf(ir_json_path: str, output_pdf_path: str = None):
+    """
+    从IR JSON文件生成PDF
+
+    参数:
+        ir_json_path: Document IR JSON文件路径
+        output_pdf_path: 输出PDF路径（可选，默认为同名.pdf）
+    """
+    ir_path = Path(ir_json_path)
+
+    if not ir_path.exists():
+        logger.error(f"文件不存在: {ir_path}")
+        return False
+
+    # 读取IR数据
+    logger.info(f"读取报告: {ir_path}")
+    with open(ir_path, 'r', encoding='utf-8') as f:
+        document_ir = json.load(f)
+
+    # 确定输出路径
+    if output_pdf_path is None:
+        output_pdf_path = ir_path.parent / f"{ir_path.stem}.pdf"
+    else:
+        output_pdf_path = Path(output_pdf_path)
+
+    # 生成PDF
+    logger.info(f"开始生成PDF...")
+    renderer = PDFRenderer()
+
+    try:
+        renderer.render_to_pdf(document_ir, output_pdf_path)
+        logger.success(f"✓ PDF已生成: {output_pdf_path}")
+        return True
+    except Exception as e:
+        logger.error(f"✗ PDF生成失败: {e}")
+        logger.exception("详细错误信息:")
+        return False
+
+
+def main():
+    """主函数"""
+    if len(sys.argv) < 2:
+        print(__doc__)
+        sys.exit(1)
+
+    ir_json_path = sys.argv[1]
+    output_pdf_path = sys.argv[2] if len(sys.argv) > 2 else None
+
+    # 检查环境变量
+    import os
+    if 'DYLD_LIBRARY_PATH' not in os.environ:
+        logger.warning("未设置DYLD_LIBRARY_PATH，尝试自动设置...")
+        os.environ['DYLD_LIBRARY_PATH'] = '/opt/homebrew/lib'
+
+    success = export_to_pdf(ir_json_path, output_pdf_path)
+    sys.exit(0 if success else 1)
+
+
+if __name__ == "__main__":
+    main()
--- a/templates/index.html
+++ b/templates/index.html
@@ -3702,71 +3702,53 @@
        }

        async function downloadPdfFromPreview() {
-            const iframe = document.getElementById('report-iframe');
            const btn = document.getElementById('downloadPdfButton');
-            if (!iframe || !iframe.contentDocument) {
-                showMessage('请先加载报告预览再下载PDF', 'error');
-                return;
-            }
-            const target = iframe.contentDocument.documentElement;
-            if (!target) {
-                showMessage('报告内容未就绪', 'error');
+            const taskId = btn?.dataset.taskId;
+
+            if (!taskId) {
+                showMessage('无可用的报告任务，请先生成报告', 'error');
                return;
            }
+
            if (btn) btn.disabled = true;
-            showMessage('正在生成PDF，请稍候...', 'info');
+            showMessage('正在生成优化的PDF，请稍候...', 'info');
+
            try {
-                const { jsPDF } = window.jspdf || {};
-                if (!jsPDF) {
-                    throw new Error('PDF依赖未加载');
-                }
-                const pdf = new jsPDF('p', 'mm', 'a4');
-
-                // 添加中文字体支持
-                try {
-                    const fontData = iframe.contentWindow.pdfFontData || window.pdfFontData;
-                    if (fontData) {
-                        pdf.addFileToVFS('SourceHanSerifSC-Medium.otf', fontData);
-                        pdf.addFont('SourceHanSerifSC-Medium.otf', 'SourceHanSerif', 'normal');
-                        pdf.setFont('SourceHanSerif');
-                        console.log('PDF字体已加载：SourceHanSerif');
-                    } else {
-                        console.warn('PDF字体数据未找到，将使用默认字体');
-                    }
-                } catch (fontErr) {
-                    console.warn('PDF字体加载失败:', fontErr);
-                }
-
-                const pageWidth = pdf.internal.pageSize.getWidth();
-                const pxWidth = Math.max(target.scrollWidth || 0, Math.round(pageWidth * 3.78));
-                const renderTask = pdf.html(target, {
-                    x: 10,
-                    y: 10,
-                    width: pageWidth - 20,
-                    windowWidth: pxWidth,
-                    margin: [10, 10, 16, 10],
-                    autoPaging: 'text',
-                    html2canvas: {
-                        scale: Math.min(1.5, Math.max(1.0, pageWidth / (target.clientWidth || pageWidth))),
-                        useCORS: true,
-                        scrollX: 0,
-                        scrollY: -iframe.contentWindow.scrollY,
-                        logging: false,
-                        allowTaint: true,
-                        backgroundColor: '#ffffff'
-                    },
-                    pagebreak: {
-                        mode: ['css', 'legacy'],
-                        avoid: ['.chapter', '.callout', '.chart-card', '.table-wrap', '.kpi-grid', '.hero-section'],
-                        before: '.chapter-divider'
-                    }
+                // 调用后端PDF导出API
+                const response = await fetch(`/api/report/export/pdf/${taskId}?optimize=true`, {
+                    method: 'GET'
                });
-                await (renderTask && typeof renderTask.then === 'function' ? renderTask : Promise.resolve());
-                pdf.save('report.pdf');
+
+                if (!response.ok) {
+                    const error = await response.json();
+                    throw new Error(error.error || 'PDF导出失败');
+                }
+
+                // 获取PDF文件名（从响应头）
+                const contentDisposition = response.headers.get('Content-Disposition');
+                let filename = 'report.pdf';
+                if (contentDisposition) {
+                    const matches = /filename="?([^"]+)"?/.exec(contentDisposition);
+                    if (matches && matches[1]) {
+                        filename = matches[1];
+                    }
+                }
+
+                // 下载PDF
+                const blob = await response.blob();
+                const url = window.URL.createObjectURL(blob);
+                const link = document.createElement('a');
+                link.href = url;
+                link.download = filename;
+                document.body.appendChild(link);
+                link.click();
+                document.body.removeChild(link);
+                window.URL.revokeObjectURL(url);
+
                showMessage('PDF生成完成，已开始下载', 'success');
            } catch (err) {
-                console.error('生成PDF失败:', err);
-                showMessage('生成PDF失败: ' + err.message, 'error');
+                console.error('导出PDF失败:', err);
+                showMessage('导出PDF失败: ' + err.message, 'error');
            } finally {
                if (btn) btn.disabled = false;
            }