Merge pull request #155 from DoiiarX/fix-agent-communication
日志解析修复、及日志解析修复测试用例
This commit is contained in:
@@ -35,13 +35,13 @@ class ForumHost:
|
|||||||
初始化论坛主持人
|
初始化论坛主持人
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
api_key: 硅基流动API密钥,如果不提供则从配置文件读取
|
api_key: 论坛主持人 LLM API 密钥,如果不提供则从配置文件读取
|
||||||
base_url: 接口基础地址,默认使用配置文件提供的SiliconFlow地址
|
base_url: 论坛主持人 LLM API 接口基础地址,默认使用配置文件提供的SiliconFlow地址
|
||||||
"""
|
"""
|
||||||
self.api_key = api_key or settings.FORUM_HOST_API_KEY
|
self.api_key = api_key or settings.FORUM_HOST_API_KEY
|
||||||
|
|
||||||
if not self.api_key:
|
if not self.api_key:
|
||||||
raise ValueError("未找到硅基流动API密钥,请在config.py中设置FORUM_HOST_API_KEY")
|
raise ValueError("未找到论坛主持人API密钥,请在环境变量文件中设置FORUM_HOST_API_KEY")
|
||||||
|
|
||||||
self.base_url = base_url or settings.FORUM_HOST_BASE_URL
|
self.base_url = base_url or settings.FORUM_HOST_BASE_URL
|
||||||
|
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ try:
|
|||||||
from .llm_host import generate_host_speech
|
from .llm_host import generate_host_speech
|
||||||
HOST_AVAILABLE = True
|
HOST_AVAILABLE = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
logger.warning("ForumEngine: 论坛主持人模块未找到,将以纯监控模式运行")
|
logger.exception("ForumEngine: 论坛主持人模块未找到,将以纯监控模式运行")
|
||||||
HOST_AVAILABLE = False
|
HOST_AVAILABLE = False
|
||||||
|
|
||||||
class LogMonitor:
|
class LogMonitor:
|
||||||
@@ -50,10 +50,20 @@ class LogMonitor:
|
|||||||
self.host_speech_threshold = 5 # 每5条agent发言触发一次主持人发言
|
self.host_speech_threshold = 5 # 每5条agent发言触发一次主持人发言
|
||||||
self.is_host_generating = False # 主持人是否正在生成发言
|
self.is_host_generating = False # 主持人是否正在生成发言
|
||||||
|
|
||||||
# 目标节点名称 - 直接匹配字符串
|
# 目标节点识别模式
|
||||||
self.target_nodes = [
|
# 1. 类名(旧格式可能包含)
|
||||||
'FirstSummaryNode',
|
# 2. 完整模块路径(实际日志格式,包含引擎前缀)
|
||||||
'ReflectionSummaryNode'
|
# 3. 部分模块路径(兼容性)
|
||||||
|
# 4. 关键标识文本
|
||||||
|
self.target_node_patterns = [
|
||||||
|
'FirstSummaryNode', # 类名
|
||||||
|
'ReflectionSummaryNode', # 类名
|
||||||
|
'InsightEngine.nodes.summary_node', # InsightEngine完整路径
|
||||||
|
'MediaEngine.nodes.summary_node', # MediaEngine完整路径
|
||||||
|
'QueryEngine.nodes.summary_node', # QueryEngine完整路径
|
||||||
|
'nodes.summary_node', # 模块路径(兼容性,用于部分匹配)
|
||||||
|
'正在生成首次段落总结', # FirstSummaryNode的标识
|
||||||
|
'正在生成反思总结', # ReflectionSummaryNode的标识
|
||||||
]
|
]
|
||||||
|
|
||||||
# 多行内容捕获状态
|
# 多行内容捕获状态
|
||||||
@@ -107,12 +117,33 @@ class LogMonitor:
|
|||||||
f.flush()
|
f.flush()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception(f"ForumEngine: 写入forum.log失败: {e}")
|
logger.exception(f"ForumEngine: 写入forum.log失败: {e}")
|
||||||
|
|
||||||
def is_target_log_line(self, line: str) -> bool:
|
def is_target_log_line(self, line: str) -> bool:
|
||||||
"""检查是否是目标日志行(SummaryNode)"""
|
"""检查是否是目标日志行(SummaryNode)
|
||||||
# 简单字符串包含检查,更可靠
|
|
||||||
for node_name in self.target_nodes:
|
支持多种识别方式:
|
||||||
if node_name in line:
|
1. 类名:FirstSummaryNode, ReflectionSummaryNode
|
||||||
|
2. 完整模块路径:InsightEngine.nodes.summary_node、MediaEngine.nodes.summary_node、QueryEngine.nodes.summary_node
|
||||||
|
3. 部分模块路径:nodes.summary_node(兼容性)
|
||||||
|
4. 关键标识文本:正在生成首次段落总结、正在生成反思总结
|
||||||
|
|
||||||
|
排除条件:
|
||||||
|
- ERROR 级别的日志(错误日志不应被识别为目标节点)
|
||||||
|
- 包含错误关键词的日志(JSON解析失败、JSON修复失败等)
|
||||||
|
"""
|
||||||
|
# 排除 ERROR 级别的日志
|
||||||
|
if "| ERROR" in line or "| ERROR |" in line:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 排除包含错误关键词的日志
|
||||||
|
error_keywords = ["JSON解析失败", "JSON修复失败", "Traceback", "File \""]
|
||||||
|
for keyword in error_keywords:
|
||||||
|
if keyword in line:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 检查是否包含目标节点模式
|
||||||
|
for pattern in self.target_node_patterns:
|
||||||
|
if pattern in line:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -145,7 +176,10 @@ class LogMonitor:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
# 如果行长度过短,也认为不是有价值的内容
|
# 如果行长度过短,也认为不是有价值的内容
|
||||||
clean_line = re.sub(r'\[\d{2}:\d{2}:\d{2}\]', '', line).strip()
|
# 移除时间戳:支持旧格式和新格式
|
||||||
|
clean_line = re.sub(r'\[\d{2}:\d{2}:\d{2}\]', '', line)
|
||||||
|
clean_line = re.sub(r'\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\.\d{3}\s*\|\s*[A-Z]+\s*\|\s*[^|]+?\s*-\s*', '', clean_line)
|
||||||
|
clean_line = clean_line.strip()
|
||||||
if len(clean_line) < 30: # 阈值可以调整
|
if len(clean_line) < 30: # 阈值可以调整
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -156,9 +190,25 @@ class LogMonitor:
|
|||||||
return "清理后的输出: {" in line
|
return "清理后的输出: {" in line
|
||||||
|
|
||||||
def is_json_end_line(self, line: str) -> bool:
|
def is_json_end_line(self, line: str) -> bool:
|
||||||
"""判断是否是JSON结束行"""
|
"""判断是否是JSON结束行
|
||||||
|
|
||||||
|
只判断纯粹的结束标记行,不包含任何日志格式信息(时间戳等)。
|
||||||
|
如果行包含时间戳,应该先清理再判断,但这里返回False表示需要进一步处理。
|
||||||
|
"""
|
||||||
stripped = line.strip()
|
stripped = line.strip()
|
||||||
return stripped == "}" or (stripped.startswith("[") and stripped.endswith("] }"))
|
|
||||||
|
# 如果行包含时间戳(旧格式或新格式),说明不是纯粹的结束行
|
||||||
|
# 旧格式:[HH:MM:SS]
|
||||||
|
if re.match(r'^\[\d{2}:\d{2}:\d{2}\]', stripped):
|
||||||
|
return False
|
||||||
|
# 新格式:YYYY-MM-DD HH:mm:ss.SSS
|
||||||
|
if re.match(r'^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\.\d{3}', stripped):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 不包含时间戳的行,检查是否是纯结束标记
|
||||||
|
if stripped == "}" or stripped == "] }":
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
def extract_json_content(self, json_lines: List[str]) -> Optional[str]:
|
def extract_json_content(self, json_lines: List[str]) -> Optional[str]:
|
||||||
"""从多行中提取并解析JSON内容"""
|
"""从多行中提取并解析JSON内容"""
|
||||||
@@ -200,8 +250,12 @@ class LogMonitor:
|
|||||||
# 处理多行JSON
|
# 处理多行JSON
|
||||||
json_text = json_part
|
json_text = json_part
|
||||||
for line in json_lines[json_start_idx + 1:]:
|
for line in json_lines[json_start_idx + 1:]:
|
||||||
# 移除时间戳
|
# 移除时间戳:支持旧格式 [HH:MM:SS] 和新格式 loguru (YYYY-MM-DD HH:mm:ss.SSS | LEVEL | ...)
|
||||||
|
# 旧格式:[HH:MM:SS]
|
||||||
clean_line = re.sub(r'^\[\d{2}:\d{2}:\d{2}\]\s*', '', line)
|
clean_line = re.sub(r'^\[\d{2}:\d{2}:\d{2}\]\s*', '', line)
|
||||||
|
# 新格式:移除 loguru 格式的时间戳和级别信息
|
||||||
|
# 格式: YYYY-MM-DD HH:mm:ss.SSS | LEVEL | module:function:line -
|
||||||
|
clean_line = re.sub(r'^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\.\d{3}\s*\|\s*[A-Z]+\s*\|\s*[^|]+?\s*-\s*', '', clean_line)
|
||||||
json_text += clean_line
|
json_text += clean_line
|
||||||
|
|
||||||
# 尝试解析JSON
|
# 尝试解析JSON
|
||||||
@@ -247,42 +301,51 @@ class LogMonitor:
|
|||||||
|
|
||||||
def extract_node_content(self, line: str) -> Optional[str]:
|
def extract_node_content(self, line: str) -> Optional[str]:
|
||||||
"""提取节点内容,去除时间戳、节点名称等前缀"""
|
"""提取节点内容,去除时间戳、节点名称等前缀"""
|
||||||
# 移除时间戳部分
|
content = line
|
||||||
# 格式: [HH:MM:SS] [NodeName] message
|
|
||||||
match = re.search(r'\[\d{2}:\d{2}:\d{2}\]\s*(.+)', line)
|
# 移除时间戳部分:支持旧格式和新格式
|
||||||
if match:
|
# 旧格式: [HH:MM:SS]
|
||||||
content = match.group(1).strip()
|
match_old = re.search(r'\[\d{2}:\d{2}:\d{2}\]\s*(.+)', content)
|
||||||
|
if match_old:
|
||||||
# 移除所有的方括号标签(包括节点名称和应用名称)
|
content = match_old.group(1).strip()
|
||||||
|
else:
|
||||||
|
# 新格式: YYYY-MM-DD HH:mm:ss.SSS | LEVEL | module:function:line -
|
||||||
|
match_new = re.search(r'\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\.\d{3}\s*\|\s*[A-Z]+\s*\|\s*[^|]+?\s*-\s*(.+)', content)
|
||||||
|
if match_new:
|
||||||
|
content = match_new.group(1).strip()
|
||||||
|
|
||||||
|
if not content:
|
||||||
|
return line.strip()
|
||||||
|
|
||||||
|
# 移除所有的方括号标签(包括节点名称和应用名称)
|
||||||
|
content = re.sub(r'^\[.*?\]\s*', '', content)
|
||||||
|
|
||||||
|
# 继续移除可能的多个连续标签
|
||||||
|
while re.match(r'^\[.*?\]\s*', content):
|
||||||
content = re.sub(r'^\[.*?\]\s*', '', content)
|
content = re.sub(r'^\[.*?\]\s*', '', content)
|
||||||
|
|
||||||
# 继续移除可能的多个连续标签
|
# 移除常见前缀(如"首次总结: "、"反思总结: "等)
|
||||||
while re.match(r'^\[.*?\]\s*', content):
|
prefixes_to_remove = [
|
||||||
content = re.sub(r'^\[.*?\]\s*', '', content)
|
"首次总结: ",
|
||||||
|
"反思总结: ",
|
||||||
# 移除常见前缀(如"首次总结: "、"反思总结: "等)
|
"清理后的输出: "
|
||||||
prefixes_to_remove = [
|
]
|
||||||
"首次总结: ",
|
|
||||||
"反思总结: ",
|
for prefix in prefixes_to_remove:
|
||||||
"清理后的输出: "
|
if content.startswith(prefix):
|
||||||
]
|
content = content[len(prefix):]
|
||||||
|
break
|
||||||
for prefix in prefixes_to_remove:
|
|
||||||
if content.startswith(prefix):
|
# 移除可能存在的应用名标签(不在方括号内的)
|
||||||
content = content[len(prefix):]
|
app_names = ['INSIGHT', 'MEDIA', 'QUERY']
|
||||||
break
|
for app_name in app_names:
|
||||||
|
# 移除单独的APP_NAME(在行首)
|
||||||
# 移除可能存在的应用名标签(不在方括号内的)
|
content = re.sub(rf'^{app_name}\s+', '', content, flags=re.IGNORECASE)
|
||||||
app_names = ['INSIGHT', 'MEDIA', 'QUERY']
|
|
||||||
for app_name in app_names:
|
# 清理多余的空格
|
||||||
# 移除单独的APP_NAME(在行首)
|
content = re.sub(r'\s+', ' ', content)
|
||||||
content = re.sub(rf'^{app_name}\s+', '', content, flags=re.IGNORECASE)
|
|
||||||
|
return content.strip()
|
||||||
# 清理多余的空格
|
|
||||||
content = re.sub(r'\s+', ' ', content)
|
|
||||||
|
|
||||||
return content.strip()
|
|
||||||
return line.strip()
|
|
||||||
|
|
||||||
def get_file_size(self, file_path: Path) -> int:
|
def get_file_size(self, file_path: Path) -> int:
|
||||||
"""获取文件大小"""
|
"""获取文件大小"""
|
||||||
@@ -349,36 +412,49 @@ class LogMonitor:
|
|||||||
if not line.strip():
|
if not line.strip():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 检查是否是目标节点行
|
# 检查是否是目标节点行和JSON开始标记
|
||||||
if self.is_target_log_line(line):
|
is_target = self.is_target_log_line(line)
|
||||||
if self.is_json_start_line(line):
|
is_json_start = self.is_json_start_line(line)
|
||||||
# 开始捕获JSON
|
|
||||||
self.capturing_json[app_name] = True
|
# 只有目标节点(SummaryNode)的JSON输出才应该被捕获
|
||||||
self.json_buffer[app_name] = [line]
|
# 过滤掉SearchNode等其他节点的输出(它们不是目标节点,即使有JSON也不会被捕获)
|
||||||
self.json_start_line[app_name] = line
|
if is_target and is_json_start:
|
||||||
|
# 开始捕获JSON(必须是目标节点且包含"清理后的输出: {")
|
||||||
|
self.capturing_json[app_name] = True
|
||||||
|
self.json_buffer[app_name] = [line]
|
||||||
|
self.json_start_line[app_name] = line
|
||||||
|
|
||||||
|
# 检查是否是单行JSON
|
||||||
|
if line.strip().endswith("}"):
|
||||||
|
# 单行JSON,立即处理
|
||||||
|
content = self.extract_json_content([line])
|
||||||
|
if content: # 只有成功解析的内容才会被记录
|
||||||
|
# 去除重复的标签和格式化
|
||||||
|
clean_content = self._clean_content_tags(content, app_name)
|
||||||
|
captured_contents.append(f"{clean_content}")
|
||||||
|
self.capturing_json[app_name] = False
|
||||||
|
self.json_buffer[app_name] = []
|
||||||
|
|
||||||
# 检查是否是单行JSON
|
elif is_target and self.is_valuable_content(line):
|
||||||
if line.strip().endswith("}"):
|
# 其他有价值的SummaryNode内容(必须是目标节点且有价值)
|
||||||
# 单行JSON,立即处理
|
clean_content = self._clean_content_tags(self.extract_node_content(line), app_name)
|
||||||
content = self.extract_json_content([line])
|
captured_contents.append(f"{clean_content}")
|
||||||
if content: # 只有成功解析的内容才会被记录
|
|
||||||
# 去除重复的标签和格式化
|
|
||||||
clean_content = self._clean_content_tags(content, app_name)
|
|
||||||
captured_contents.append(f"{clean_content}")
|
|
||||||
self.capturing_json[app_name] = False
|
|
||||||
self.json_buffer[app_name] = []
|
|
||||||
|
|
||||||
elif self.is_valuable_content(line):
|
|
||||||
# 其他有价值的SummaryNode内容
|
|
||||||
clean_content = self._clean_content_tags(self.extract_node_content(line), app_name)
|
|
||||||
captured_contents.append(f"{clean_content}")
|
|
||||||
|
|
||||||
elif self.capturing_json[app_name]:
|
elif self.capturing_json[app_name]:
|
||||||
# 正在捕获JSON的后续行
|
# 正在捕获JSON的后续行
|
||||||
self.json_buffer[app_name].append(line)
|
self.json_buffer[app_name].append(line)
|
||||||
|
|
||||||
# 检查是否是JSON结束
|
# 检查是否是JSON结束
|
||||||
if self.is_json_end_line(line):
|
# 先清理时间戳,然后判断清理后的行是否是结束标记
|
||||||
|
cleaned_line = line.strip()
|
||||||
|
# 清理旧格式时间戳:[HH:MM:SS]
|
||||||
|
cleaned_line = re.sub(r'^\[\d{2}:\d{2}:\d{2}\]\s*', '', cleaned_line)
|
||||||
|
# 清理新格式时间戳:YYYY-MM-DD HH:mm:ss.SSS | LEVEL | module:function:line -
|
||||||
|
cleaned_line = re.sub(r'^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\.\d{3}\s*\|\s*[A-Z]+\s*\|\s*[^|]+?\s*-\s*', '', cleaned_line)
|
||||||
|
cleaned_line = cleaned_line.strip()
|
||||||
|
|
||||||
|
# 清理后判断是否是结束标记
|
||||||
|
if cleaned_line == "}" or cleaned_line == "] }":
|
||||||
# JSON结束,处理完整的JSON
|
# JSON结束,处理完整的JSON
|
||||||
content = self.extract_json_content(self.json_buffer[app_name])
|
content = self.extract_json_content(self.json_buffer[app_name])
|
||||||
if content: # 只有成功解析的内容才会被记录
|
if content: # 只有成功解析的内容才会被记录
|
||||||
@@ -484,13 +560,16 @@ class LogMonitor:
|
|||||||
# 先检查是否需要触发搜索(只触发一次)
|
# 先检查是否需要触发搜索(只触发一次)
|
||||||
if not self.is_searching:
|
if not self.is_searching:
|
||||||
for line in new_lines:
|
for line in new_lines:
|
||||||
if line.strip() and 'FirstSummaryNode' in line:
|
# 检查是否包含目标节点模式(支持多种格式)
|
||||||
logger.info(f"ForumEngine: 在{app_name}中检测到第一次论坛发表内容")
|
if line.strip() and self.is_target_log_line(line):
|
||||||
self.is_searching = True
|
# 进一步确认是首次总结节点(FirstSummaryNode或包含"正在生成首次段落总结")
|
||||||
self.search_inactive_count = 0
|
if 'FirstSummaryNode' in line or '正在生成首次段落总结' in line:
|
||||||
# 清空forum.log开始新会话
|
logger.info(f"ForumEngine: 在{app_name}中检测到第一次论坛发表内容")
|
||||||
self.clear_forum_log()
|
self.is_searching = True
|
||||||
break # 找到一个就够了,跳出循环
|
self.search_inactive_count = 0
|
||||||
|
# 清空forum.log开始新会话
|
||||||
|
self.clear_forum_log()
|
||||||
|
break # 找到一个就够了,跳出循环
|
||||||
|
|
||||||
# 处理所有新增内容(如果正在搜索状态)
|
# 处理所有新增内容(如果正在搜索状态)
|
||||||
if self.is_searching:
|
if self.is_searching:
|
||||||
|
|||||||
@@ -161,6 +161,7 @@ class KeywordOptimizer:
|
|||||||
|
|
||||||
**重要提醒**:每个关键词都必须是一个不可分割的独立词条,严禁在词条内部包含空格。例如,应使用 "雷军班争议" 而不是错误的 "雷军班 争议"。
|
**重要提醒**:每个关键词都必须是一个不可分割的独立词条,严禁在词条内部包含空格。例如,应使用 "雷军班争议" 而不是错误的 "雷军班 争议"。
|
||||||
|
|
||||||
|
|
||||||
**输出格式**:
|
**输出格式**:
|
||||||
请以JSON格式返回结果:
|
请以JSON格式返回结果:
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -134,6 +134,7 @@ def run_report_generation(task: ReportTask, query: str, custom_template: str = "
|
|||||||
task.update_status("completed", 100)
|
task.update_status("completed", 100)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
logger.exception(f"报告生成过程中发生错误: {str(e)}")
|
||||||
task.update_status("error", 0, str(e))
|
task.update_status("error", 0, str(e))
|
||||||
# 只在出错时清理任务
|
# 只在出错时清理任务
|
||||||
with task_lock:
|
with task_lock:
|
||||||
@@ -156,6 +157,7 @@ def get_status():
|
|||||||
'current_task': current_task.to_dict() if current_task else None
|
'current_task': current_task.to_dict() if current_task else None
|
||||||
})
|
})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
logger.exception(f"获取Report Engine状态失败: {str(e)}")
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'success': False,
|
'success': False,
|
||||||
'error': str(e)
|
'error': str(e)
|
||||||
@@ -228,6 +230,7 @@ def generate_report():
|
|||||||
})
|
})
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
logger.exception(f"开始生成报告失败: {str(e)}")
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'success': False,
|
'success': False,
|
||||||
'error': str(e)
|
'error': str(e)
|
||||||
@@ -319,6 +322,7 @@ def get_result_json(task_id: str):
|
|||||||
})
|
})
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
logger.exception(f"获取报告生成结果失败: {str(e)}")
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'success': False,
|
'success': False,
|
||||||
'error': str(e)
|
'error': str(e)
|
||||||
@@ -348,6 +352,7 @@ def cancel_task(task_id: str):
|
|||||||
}), 404
|
}), 404
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
logger.exception(f"取消报告生成任务失败: {str(e)}")
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'success': False,
|
'success': False,
|
||||||
'error': str(e)
|
'error': str(e)
|
||||||
@@ -391,6 +396,7 @@ def get_templates():
|
|||||||
})
|
})
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
logger.exception(f"获取可用模板列表失败: {str(e)}")
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'success': False,
|
'success': False,
|
||||||
'error': str(e)
|
'error': str(e)
|
||||||
@@ -400,6 +406,7 @@ def get_templates():
|
|||||||
# 错误处理
|
# 错误处理
|
||||||
@report_bp.errorhandler(404)
|
@report_bp.errorhandler(404)
|
||||||
def not_found(error):
|
def not_found(error):
|
||||||
|
logger.exception(f"API端点不存在: {str(error)}")
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'success': False,
|
'success': False,
|
||||||
'error': 'API端点不存在'
|
'error': 'API端点不存在'
|
||||||
@@ -408,6 +415,7 @@ def not_found(error):
|
|||||||
|
|
||||||
@report_bp.errorhandler(500)
|
@report_bp.errorhandler(500)
|
||||||
def internal_error(error):
|
def internal_error(error):
|
||||||
|
logger.exception(f"服务器内部错误: {str(error)}")
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'success': False,
|
'success': False,
|
||||||
'error': '服务器内部错误'
|
'error': '服务器内部错误'
|
||||||
|
|||||||
16
app.py
16
app.py
@@ -94,20 +94,10 @@ def _load_config_module():
|
|||||||
def read_config_values():
|
def read_config_values():
|
||||||
"""Return the current configuration values that are exposed to the frontend."""
|
"""Return the current configuration values that are exposed to the frontend."""
|
||||||
try:
|
try:
|
||||||
# 重新导入 config 模块以获取最新的 Settings 实例
|
# 重新加载配置以获取最新的 Settings 实例
|
||||||
importlib.invalidate_caches()
|
from config import reload_settings, settings
|
||||||
if CONFIG_MODULE_NAME in sys.modules:
|
reload_settings()
|
||||||
importlib.reload(sys.modules[CONFIG_MODULE_NAME])
|
|
||||||
else:
|
|
||||||
importlib.import_module(CONFIG_MODULE_NAME)
|
|
||||||
|
|
||||||
# 从 config 模块获取 settings 实例
|
|
||||||
config_module = sys.modules[CONFIG_MODULE_NAME]
|
|
||||||
if not hasattr(config_module, 'settings'):
|
|
||||||
logger.error("config 模块中没有找到 settings 实例")
|
|
||||||
return {}
|
|
||||||
|
|
||||||
settings = config_module.settings
|
|
||||||
values = {}
|
values = {}
|
||||||
for key in CONFIG_KEYS:
|
for key in CONFIG_KEYS:
|
||||||
# 从 Pydantic Settings 实例读取值
|
# 从 Pydantic Settings 实例读取值
|
||||||
|
|||||||
30
config.py
30
config.py
@@ -9,8 +9,9 @@
|
|||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from pydantic_settings import BaseSettings
|
from pydantic_settings import BaseSettings
|
||||||
from pydantic import Field
|
from pydantic import Field, ConfigDict
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
|
||||||
# 计算 .env 优先级:优先当前工作目录,其次项目根目录
|
# 计算 .env 优先级:优先当前工作目录,其次项目根目录
|
||||||
@@ -86,12 +87,29 @@ class Settings(BaseSettings):
|
|||||||
SEARCH_TIMEOUT: int = Field(240, description="单次搜索请求超时")
|
SEARCH_TIMEOUT: int = Field(240, description="单次搜索请求超时")
|
||||||
MAX_CONTENT_LENGTH: int = Field(500000, description="搜索最大内容长度")
|
MAX_CONTENT_LENGTH: int = Field(500000, description="搜索最大内容长度")
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(
|
||||||
env_file = ENV_FILE
|
env_file=ENV_FILE,
|
||||||
env_prefix = ""
|
env_prefix="",
|
||||||
case_sensitive = False
|
case_sensitive=False,
|
||||||
extra = "allow"
|
extra="allow"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# 创建全局配置实例
|
# 创建全局配置实例
|
||||||
settings = Settings()
|
settings = Settings()
|
||||||
|
|
||||||
|
|
||||||
|
def reload_settings() -> Settings:
|
||||||
|
"""
|
||||||
|
重新加载配置
|
||||||
|
|
||||||
|
从 .env 文件和环境变量重新加载配置,更新全局 settings 实例。
|
||||||
|
用于在运行时动态更新配置。
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Settings: 新创建的配置实例
|
||||||
|
"""
|
||||||
|
|
||||||
|
global settings
|
||||||
|
settings = Settings()
|
||||||
|
return settings
|
||||||
|
|||||||
69
tests/README.md
Normal file
69
tests/README.md
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
# ForumEngine日志解析测试
|
||||||
|
|
||||||
|
本测试套件用于测试 `ForumEngine/monitor.py` 中的日志解析功能,验证其在不同日志格式下的正确性。
|
||||||
|
|
||||||
|
## 测试数据
|
||||||
|
|
||||||
|
`forum_log_test_data.py` 包含各种日志格式的最小示例(论坛日志测试数据):
|
||||||
|
|
||||||
|
### 旧格式([HH:MM:SS])
|
||||||
|
- `OLD_FORMAT_SINGLE_LINE_JSON`: 单行JSON
|
||||||
|
- `OLD_FORMAT_MULTILINE_JSON`: 多行JSON
|
||||||
|
- `OLD_FORMAT_FIRST_SUMMARY`: 包含FirstSummaryNode的日志
|
||||||
|
- `OLD_FORMAT_REFLECTION_SUMMARY`: 包含ReflectionSummaryNode的日志
|
||||||
|
|
||||||
|
### 新格式(loguru默认格式)
|
||||||
|
- `NEW_FORMAT_SINGLE_LINE_JSON`: 单行JSON
|
||||||
|
- `NEW_FORMAT_MULTILINE_JSON`: 多行JSON
|
||||||
|
- `NEW_FORMAT_FIRST_SUMMARY`: 包含FirstSummaryNode的日志
|
||||||
|
- `NEW_FORMAT_REFLECTION_SUMMARY`: 包含ReflectionSummaryNode的日志
|
||||||
|
|
||||||
|
### 复杂示例
|
||||||
|
- `COMPLEX_JSON_WITH_UPDATED`: 包含updated_paragraph_latest_state的JSON
|
||||||
|
- `COMPLEX_JSON_WITH_PARAGRAPH`: 只有paragraph_latest_state的JSON
|
||||||
|
- `MIXED_FORMAT_LINES`: 混合格式的日志行
|
||||||
|
|
||||||
|
## 运行测试
|
||||||
|
|
||||||
|
### 使用pytest(推荐)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 安装pytest(如果还没有安装)
|
||||||
|
pip install pytest
|
||||||
|
|
||||||
|
# 运行所有测试
|
||||||
|
pytest tests/test_monitor.py -v
|
||||||
|
|
||||||
|
# 运行特定测试
|
||||||
|
pytest tests/test_monitor.py::TestLogMonitor::test_extract_json_content_new_format_multiline -v
|
||||||
|
```
|
||||||
|
|
||||||
|
### 直接运行
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python tests/test_monitor.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## 测试覆盖
|
||||||
|
|
||||||
|
测试覆盖以下函数:
|
||||||
|
|
||||||
|
1. **is_target_log_line**: 识别目标节点日志行
|
||||||
|
2. **is_json_start_line**: 识别JSON开始行
|
||||||
|
3. **is_json_end_line**: 识别JSON结束行
|
||||||
|
4. **extract_json_content**: 提取JSON内容(单行和多行)
|
||||||
|
5. **format_json_content**: 格式化JSON内容(优先提取updated_paragraph_latest_state)
|
||||||
|
6. **extract_node_content**: 提取节点内容
|
||||||
|
7. **process_lines_for_json**: 完整处理流程
|
||||||
|
8. **is_valuable_content**: 判断内容是否有价值
|
||||||
|
|
||||||
|
## 预期问题
|
||||||
|
|
||||||
|
当前代码可能无法正确处理loguru新格式,主要问题在于:
|
||||||
|
|
||||||
|
1. **时间戳移除**:`extract_json_content()` 中的正则 `r'^\[\d{2}:\d{2}:\d{2}\]\s*'` 只能匹配 `[HH:MM:SS]` 格式,无法匹配loguru的 `YYYY-MM-DD HH:mm:ss.SSS` 格式
|
||||||
|
|
||||||
|
2. **时间戳匹配**:`extract_node_content()` 中的正则 `r'\[\d{2}:\d{2}:\d{2}\]\s*(.+)'` 同样只能匹配旧格式
|
||||||
|
|
||||||
|
这些测试会帮助识别这些问题,并指导后续的代码修复。
|
||||||
|
|
||||||
4
tests/__init__.py
Normal file
4
tests/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
"""
|
||||||
|
测试模块
|
||||||
|
"""
|
||||||
|
|
||||||
157
tests/forum_log_test_data.py
Normal file
157
tests/forum_log_test_data.py
Normal file
File diff suppressed because one or more lines are too long
61
tests/run_tests.py
Normal file
61
tests/run_tests.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
"""
|
||||||
|
简单的测试运行脚本
|
||||||
|
|
||||||
|
可以直接运行此脚本来执行测试
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# 添加项目根目录到路径
|
||||||
|
project_root = Path(__file__).parent.parent
|
||||||
|
sys.path.insert(0, str(project_root))
|
||||||
|
|
||||||
|
from test_monitor import TestLogMonitor
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""运行所有测试"""
|
||||||
|
print("=" * 60)
|
||||||
|
print("ForumEngine 日志解析测试")
|
||||||
|
print("=" * 60)
|
||||||
|
print()
|
||||||
|
|
||||||
|
test_instance = TestLogMonitor()
|
||||||
|
test_instance.setup_method()
|
||||||
|
|
||||||
|
# 获取所有测试方法
|
||||||
|
test_methods = [method for method in dir(test_instance) if method.startswith('test_')]
|
||||||
|
|
||||||
|
passed = 0
|
||||||
|
failed = 0
|
||||||
|
|
||||||
|
for test_method_name in test_methods:
|
||||||
|
test_method = getattr(test_instance, test_method_name)
|
||||||
|
print(f"运行测试: {test_method_name}...", end=" ")
|
||||||
|
|
||||||
|
try:
|
||||||
|
test_method()
|
||||||
|
print("✓ 通过")
|
||||||
|
passed += 1
|
||||||
|
except AssertionError as e:
|
||||||
|
print(f"✗ 失败: {e}")
|
||||||
|
failed += 1
|
||||||
|
except Exception as e:
|
||||||
|
print(f"✗ 错误: {e}")
|
||||||
|
failed += 1
|
||||||
|
|
||||||
|
print()
|
||||||
|
print("=" * 60)
|
||||||
|
print(f"测试结果: {passed} 通过, {failed} 失败")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
if failed > 0:
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
||||||
340
tests/test_monitor.py
Normal file
340
tests/test_monitor.py
Normal file
@@ -0,0 +1,340 @@
|
|||||||
|
"""
|
||||||
|
测试ForumEngine/monitor.py中的日志解析函数
|
||||||
|
|
||||||
|
测试各种日志格式下的解析能力,包括:
|
||||||
|
1. 旧格式:[HH:MM:SS]
|
||||||
|
2. 新格式:loguru默认格式 (YYYY-MM-DD HH:mm:ss.SSS | LEVEL | ...)
|
||||||
|
3. 只应当接收FirstSummaryNode、ReflectionSummaryNode等SummaryNode的输出,不应当接收SearchNode的输出
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# 添加项目根目录到路径
|
||||||
|
project_root = Path(__file__).parent.parent
|
||||||
|
sys.path.insert(0, str(project_root))
|
||||||
|
|
||||||
|
from ForumEngine.monitor import LogMonitor
|
||||||
|
from tests import forum_log_test_data as test_data
|
||||||
|
|
||||||
|
|
||||||
|
class TestLogMonitor:
|
||||||
|
"""测试LogMonitor的日志解析功能"""
|
||||||
|
|
||||||
|
def setup_method(self):
|
||||||
|
"""每个测试方法前的初始化"""
|
||||||
|
self.monitor = LogMonitor(log_dir="tests/test_logs")
|
||||||
|
|
||||||
|
def test_is_target_log_line_old_format(self):
|
||||||
|
"""测试旧格式的目标节点识别"""
|
||||||
|
# 应该识别包含FirstSummaryNode的行
|
||||||
|
assert self.monitor.is_target_log_line(test_data.OLD_FORMAT_FIRST_SUMMARY) == True
|
||||||
|
# 应该识别包含ReflectionSummaryNode的行
|
||||||
|
assert self.monitor.is_target_log_line(test_data.OLD_FORMAT_REFLECTION_SUMMARY) == True
|
||||||
|
# 不应该识别非目标节点
|
||||||
|
assert self.monitor.is_target_log_line(test_data.OLD_FORMAT_NON_TARGET) == False
|
||||||
|
|
||||||
|
def test_is_target_log_line_new_format(self):
|
||||||
|
"""测试新格式的目标节点识别"""
|
||||||
|
# 应该识别包含FirstSummaryNode的行
|
||||||
|
assert self.monitor.is_target_log_line(test_data.NEW_FORMAT_FIRST_SUMMARY) == True
|
||||||
|
# 应该识别包含ReflectionSummaryNode的行
|
||||||
|
assert self.monitor.is_target_log_line(test_data.NEW_FORMAT_REFLECTION_SUMMARY) == True
|
||||||
|
# 不应该识别非目标节点
|
||||||
|
assert self.monitor.is_target_log_line(test_data.NEW_FORMAT_NON_TARGET) == False
|
||||||
|
|
||||||
|
def test_is_json_start_line_old_format(self):
|
||||||
|
"""测试旧格式的JSON开始行识别"""
|
||||||
|
assert self.monitor.is_json_start_line(test_data.OLD_FORMAT_SINGLE_LINE_JSON) == True
|
||||||
|
assert self.monitor.is_json_start_line(test_data.OLD_FORMAT_MULTILINE_JSON[0]) == True
|
||||||
|
assert self.monitor.is_json_start_line(test_data.OLD_FORMAT_NON_TARGET) == False
|
||||||
|
|
||||||
|
def test_is_json_start_line_new_format(self):
|
||||||
|
"""测试新格式的JSON开始行识别"""
|
||||||
|
assert self.monitor.is_json_start_line(test_data.NEW_FORMAT_SINGLE_LINE_JSON) == True
|
||||||
|
assert self.monitor.is_json_start_line(test_data.NEW_FORMAT_MULTILINE_JSON[0]) == True
|
||||||
|
assert self.monitor.is_json_start_line(test_data.NEW_FORMAT_NON_TARGET) == False
|
||||||
|
|
||||||
|
def test_is_json_end_line(self):
|
||||||
|
"""测试JSON结束行识别"""
|
||||||
|
assert self.monitor.is_json_end_line("}") == True
|
||||||
|
assert self.monitor.is_json_end_line("] }") == True
|
||||||
|
assert self.monitor.is_json_end_line("[17:42:31] }") == False # 需要先清理时间戳
|
||||||
|
assert self.monitor.is_json_end_line("2025-11-05 17:42:31.289 | INFO | module:function:133 - }") == False # 需要先清理时间戳
|
||||||
|
|
||||||
|
def test_extract_json_content_old_format_single_line(self):
|
||||||
|
"""测试旧格式单行JSON提取"""
|
||||||
|
lines = [test_data.OLD_FORMAT_SINGLE_LINE_JSON]
|
||||||
|
result = self.monitor.extract_json_content(lines)
|
||||||
|
assert result is not None
|
||||||
|
assert "这是首次总结内容" in result
|
||||||
|
|
||||||
|
def test_extract_json_content_new_format_single_line(self):
|
||||||
|
"""测试新格式单行JSON提取"""
|
||||||
|
lines = [test_data.NEW_FORMAT_SINGLE_LINE_JSON]
|
||||||
|
result = self.monitor.extract_json_content(lines)
|
||||||
|
assert result is not None
|
||||||
|
assert "这是首次总结内容" in result
|
||||||
|
|
||||||
|
def test_extract_json_content_old_format_multiline(self):
|
||||||
|
"""测试旧格式多行JSON提取"""
|
||||||
|
result = self.monitor.extract_json_content(test_data.OLD_FORMAT_MULTILINE_JSON)
|
||||||
|
assert result is not None
|
||||||
|
assert "多行" in result
|
||||||
|
assert "JSON内容" in result
|
||||||
|
|
||||||
|
def test_extract_json_content_new_format_multiline(self):
|
||||||
|
"""测试新格式多行JSON提取(支持loguru格式的时间戳移除)"""
|
||||||
|
result = self.monitor.extract_json_content(test_data.NEW_FORMAT_MULTILINE_JSON)
|
||||||
|
assert result is not None
|
||||||
|
assert "多行" in result
|
||||||
|
assert "JSON内容" in result
|
||||||
|
|
||||||
|
def test_extract_json_content_updated_priority(self):
|
||||||
|
"""测试updated_paragraph_latest_state优先提取"""
|
||||||
|
result = self.monitor.extract_json_content(test_data.COMPLEX_JSON_WITH_UPDATED)
|
||||||
|
assert result is not None
|
||||||
|
assert "更新版" in result
|
||||||
|
assert "核心发现" in result
|
||||||
|
|
||||||
|
def test_extract_json_content_paragraph_only(self):
|
||||||
|
"""测试只有paragraph_latest_state的情况"""
|
||||||
|
result = self.monitor.extract_json_content(test_data.COMPLEX_JSON_WITH_PARAGRAPH)
|
||||||
|
assert result is not None
|
||||||
|
assert "首次总结" in result or "核心发现" in result
|
||||||
|
|
||||||
|
def test_format_json_content(self):
|
||||||
|
"""测试JSON内容格式化"""
|
||||||
|
# 测试updated_paragraph_latest_state优先
|
||||||
|
json_obj = {
|
||||||
|
"updated_paragraph_latest_state": "更新后的内容",
|
||||||
|
"paragraph_latest_state": "首次内容"
|
||||||
|
}
|
||||||
|
result = self.monitor.format_json_content(json_obj)
|
||||||
|
assert result == "更新后的内容"
|
||||||
|
|
||||||
|
# 测试只有paragraph_latest_state
|
||||||
|
json_obj = {
|
||||||
|
"paragraph_latest_state": "首次内容"
|
||||||
|
}
|
||||||
|
result = self.monitor.format_json_content(json_obj)
|
||||||
|
assert result == "首次内容"
|
||||||
|
|
||||||
|
# 测试都没有的情况
|
||||||
|
json_obj = {"other_field": "其他内容"}
|
||||||
|
result = self.monitor.format_json_content(json_obj)
|
||||||
|
assert "清理后的输出" in result
|
||||||
|
|
||||||
|
def test_extract_node_content_old_format(self):
|
||||||
|
"""测试旧格式的节点内容提取"""
|
||||||
|
line = "[17:42:31] [INSIGHT] [FirstSummaryNode] 清理后的输出: 这是测试内容"
|
||||||
|
result = self.monitor.extract_node_content(line)
|
||||||
|
assert result is not None
|
||||||
|
assert "测试内容" in result
|
||||||
|
|
||||||
|
def test_extract_node_content_new_format(self):
|
||||||
|
"""测试新格式的节点内容提取"""
|
||||||
|
line = "2025-11-05 17:42:31.287 | INFO | InsightEngine.nodes.summary_node:process_output:131 - FirstSummaryNode 清理后的输出: 这是测试内容"
|
||||||
|
result = self.monitor.extract_node_content(line)
|
||||||
|
assert result is not None
|
||||||
|
assert "测试内容" in result
|
||||||
|
|
||||||
|
def test_process_lines_for_json_old_format(self):
|
||||||
|
"""测试旧格式的完整处理流程"""
|
||||||
|
lines = [
|
||||||
|
test_data.OLD_FORMAT_NON_TARGET, # 应该被忽略
|
||||||
|
test_data.OLD_FORMAT_MULTILINE_JSON[0],
|
||||||
|
test_data.OLD_FORMAT_MULTILINE_JSON[1],
|
||||||
|
test_data.OLD_FORMAT_MULTILINE_JSON[2],
|
||||||
|
]
|
||||||
|
result = self.monitor.process_lines_for_json(lines, "insight")
|
||||||
|
assert len(result) > 0
|
||||||
|
assert any("多行" in content for content in result)
|
||||||
|
|
||||||
|
def test_process_lines_for_json_new_format(self):
|
||||||
|
"""测试新格式的完整处理流程"""
|
||||||
|
lines = [
|
||||||
|
test_data.NEW_FORMAT_NON_TARGET, # 应该被忽略
|
||||||
|
test_data.NEW_FORMAT_MULTILINE_JSON[0],
|
||||||
|
test_data.NEW_FORMAT_MULTILINE_JSON[1],
|
||||||
|
test_data.NEW_FORMAT_MULTILINE_JSON[2],
|
||||||
|
]
|
||||||
|
result = self.monitor.process_lines_for_json(lines, "insight")
|
||||||
|
assert len(result) > 0
|
||||||
|
assert any("多行" in content for content in result)
|
||||||
|
assert any("JSON内容" in content for content in result)
|
||||||
|
|
||||||
|
def test_process_lines_for_json_mixed_format(self):
|
||||||
|
"""测试混合格式的处理"""
|
||||||
|
result = self.monitor.process_lines_for_json(test_data.MIXED_FORMAT_LINES, "insight")
|
||||||
|
assert len(result) > 0
|
||||||
|
assert any("混合格式内容" in content for content in result)
|
||||||
|
|
||||||
|
def test_is_valuable_content(self):
|
||||||
|
"""测试有价值内容的判断"""
|
||||||
|
# 包含"清理后的输出"应该是有价值的
|
||||||
|
assert self.monitor.is_valuable_content(test_data.OLD_FORMAT_SINGLE_LINE_JSON) == True
|
||||||
|
|
||||||
|
# 排除短小提示信息
|
||||||
|
assert self.monitor.is_valuable_content("JSON解析成功") == False
|
||||||
|
assert self.monitor.is_valuable_content("成功生成") == False
|
||||||
|
|
||||||
|
# 空行应该被过滤
|
||||||
|
assert self.monitor.is_valuable_content("") == False
|
||||||
|
|
||||||
|
def test_extract_json_content_real_query_engine(self):
|
||||||
|
"""测试QueryEngine实际生产环境日志提取"""
|
||||||
|
result = self.monitor.extract_json_content(test_data.REAL_QUERY_ENGINE_REFLECTION)
|
||||||
|
assert result is not None
|
||||||
|
assert "洛阳栾川钼业集团" in result
|
||||||
|
assert "CMOC" in result
|
||||||
|
assert "updated_paragraph_latest_state" not in result # 应该已经提取内容,不包含字段名
|
||||||
|
|
||||||
|
def test_extract_json_content_real_insight_engine(self):
|
||||||
|
"""测试InsightEngine实际生产环境日志提取(包含标识行)"""
|
||||||
|
# 先测试能否识别标识行
|
||||||
|
assert self.monitor.is_target_log_line(test_data.REAL_INSIGHT_ENGINE_REFLECTION[0]) == True # 包含"正在生成反思总结"
|
||||||
|
assert self.monitor.is_target_log_line(test_data.REAL_INSIGHT_ENGINE_REFLECTION[1]) == True # 包含nodes.summary_node
|
||||||
|
|
||||||
|
# 测试JSON提取(从第二行开始,因为第一行是标识行)
|
||||||
|
json_lines = test_data.REAL_INSIGHT_ENGINE_REFLECTION[1:] # 跳过标识行
|
||||||
|
result = self.monitor.extract_json_content(json_lines)
|
||||||
|
assert result is not None
|
||||||
|
assert "核心发现" in result
|
||||||
|
assert "更新版" in result
|
||||||
|
assert "洛阳钼业2025年第三季度" in result
|
||||||
|
|
||||||
|
def test_extract_json_content_real_media_engine(self):
|
||||||
|
"""测试MediaEngine实际生产环境日志提取(单行JSON)"""
|
||||||
|
# MediaEngine是单行JSON格式,需要先分割成行
|
||||||
|
lines = test_data.REAL_MEDIA_ENGINE_REFLECTION.split('\n')
|
||||||
|
|
||||||
|
# 测试能否识别标识行
|
||||||
|
assert self.monitor.is_target_log_line(lines[0]) == True # 包含"正在生成反思总结"
|
||||||
|
assert self.monitor.is_target_log_line(lines[1]) == True # 包含nodes.summary_node和"清理后的输出"
|
||||||
|
|
||||||
|
# 测试JSON提取(从包含JSON的行开始)
|
||||||
|
json_line = lines[1] # 第二行包含完整的单行JSON
|
||||||
|
result = self.monitor.extract_json_content([json_line])
|
||||||
|
assert result is not None
|
||||||
|
assert "综合信息概览" in result
|
||||||
|
assert "洛阳钼业" in result
|
||||||
|
assert "updated_paragraph_latest_state" not in result # 应该已经提取内容
|
||||||
|
|
||||||
|
def test_process_lines_for_json_real_query_engine(self):
|
||||||
|
"""测试QueryEngine实际日志的完整处理流程"""
|
||||||
|
result = self.monitor.process_lines_for_json(test_data.REAL_QUERY_ENGINE_REFLECTION, "query")
|
||||||
|
assert len(result) > 0
|
||||||
|
assert any("洛阳栾川钼业集团" in content for content in result)
|
||||||
|
|
||||||
|
def test_process_lines_for_json_real_insight_engine(self):
|
||||||
|
"""测试InsightEngine实际日志的完整处理流程(包含标识行)"""
|
||||||
|
result = self.monitor.process_lines_for_json(test_data.REAL_INSIGHT_ENGINE_REFLECTION, "insight")
|
||||||
|
assert len(result) > 0
|
||||||
|
assert any("核心发现" in content for content in result)
|
||||||
|
assert any("更新版" in content for content in result)
|
||||||
|
|
||||||
|
def test_process_lines_for_json_real_media_engine(self):
|
||||||
|
"""测试MediaEngine实际日志的完整处理流程(单行JSON)"""
|
||||||
|
# 将单行字符串分割成多行
|
||||||
|
lines = test_data.REAL_MEDIA_ENGINE_REFLECTION.split('\n')
|
||||||
|
result = self.monitor.process_lines_for_json(lines, "media")
|
||||||
|
assert len(result) > 0
|
||||||
|
assert any("综合信息概览" in content for content in result)
|
||||||
|
assert any("洛阳钼业" in content for content in result)
|
||||||
|
|
||||||
|
def test_filter_search_node_output(self):
|
||||||
|
"""测试过滤SearchNode的输出(重要:SearchNode不应进入论坛)"""
|
||||||
|
# SearchNode的输出包含"清理后的输出: {",但不包含目标节点模式
|
||||||
|
search_lines = test_data.SEARCH_NODE_FIRST_SEARCH
|
||||||
|
result = self.monitor.process_lines_for_json(search_lines, "insight")
|
||||||
|
# SearchNode的输出应该被过滤,不应该被捕获
|
||||||
|
assert len(result) == 0
|
||||||
|
|
||||||
|
def test_filter_search_node_output_single_line(self):
|
||||||
|
"""测试过滤SearchNode的单行JSON输出"""
|
||||||
|
# SearchNode的单行JSON格式
|
||||||
|
search_line = test_data.SEARCH_NODE_REFLECTION_SEARCH
|
||||||
|
result = self.monitor.process_lines_for_json([search_line], "insight")
|
||||||
|
# SearchNode的输出应该被过滤
|
||||||
|
assert len(result) == 0
|
||||||
|
|
||||||
|
def test_search_node_vs_summary_node_mixed(self):
|
||||||
|
"""测试混合场景:SearchNode和SummaryNode同时存在,只捕获SummaryNode"""
|
||||||
|
lines = [
|
||||||
|
# SearchNode输出(应该被过滤)
|
||||||
|
"[11:16:35] 2025-11-06 11:16:35.567 | INFO | InsightEngine.nodes.search_node:process_output:97 - 清理后的输出: {",
|
||||||
|
"[11:16:35] \"search_query\": \"测试查询\"",
|
||||||
|
"[11:16:35] }",
|
||||||
|
# SummaryNode输出(应该被捕获)
|
||||||
|
"[11:17:05] 2025-11-06 11:17:05.547 | INFO | InsightEngine.nodes.summary_node:process_output:131 - 清理后的输出: {",
|
||||||
|
"[11:17:05] \"paragraph_latest_state\": \"这是总结内容\"",
|
||||||
|
"[11:17:05] }",
|
||||||
|
]
|
||||||
|
result = self.monitor.process_lines_for_json(lines, "insight")
|
||||||
|
# 应该只捕获SummaryNode的输出,不包含SearchNode的输出
|
||||||
|
assert len(result) > 0
|
||||||
|
assert any("总结内容" in content for content in result)
|
||||||
|
# 确保不包含搜索查询内容
|
||||||
|
assert not any("search_query" in content for content in result)
|
||||||
|
assert not any("测试查询" in content for content in result)
|
||||||
|
|
||||||
|
def test_filter_error_logs_from_summary_node(self):
|
||||||
|
"""测试过滤SummaryNode的错误日志(重要:错误日志不应进入论坛)"""
|
||||||
|
# JSON解析失败错误日志
|
||||||
|
assert self.monitor.is_target_log_line(test_data.SUMMARY_NODE_JSON_ERROR) == False
|
||||||
|
|
||||||
|
# JSON修复失败错误日志
|
||||||
|
assert self.monitor.is_target_log_line(test_data.SUMMARY_NODE_JSON_FIX_ERROR) == False
|
||||||
|
|
||||||
|
# ERROR级别日志
|
||||||
|
assert self.monitor.is_target_log_line(test_data.SUMMARY_NODE_ERROR_LOG) == False
|
||||||
|
|
||||||
|
# Traceback错误日志
|
||||||
|
for line in test_data.SUMMARY_NODE_TRACEBACK.split('\n'):
|
||||||
|
assert self.monitor.is_target_log_line(line) == False
|
||||||
|
|
||||||
|
def test_error_logs_not_captured(self):
|
||||||
|
"""测试错误日志不会被捕获到论坛"""
|
||||||
|
error_lines = [
|
||||||
|
test_data.SUMMARY_NODE_JSON_ERROR,
|
||||||
|
test_data.SUMMARY_NODE_JSON_FIX_ERROR,
|
||||||
|
test_data.SUMMARY_NODE_ERROR_LOG,
|
||||||
|
]
|
||||||
|
|
||||||
|
for line in error_lines:
|
||||||
|
result = self.monitor.process_lines_for_json([line], "media")
|
||||||
|
# 错误日志不应该被捕获
|
||||||
|
assert len(result) == 0
|
||||||
|
|
||||||
|
def test_mixed_valid_and_error_logs(self):
|
||||||
|
"""测试混合场景:有效日志和错误日志同时存在,只捕获有效日志"""
|
||||||
|
lines = [
|
||||||
|
# 错误日志(应该被过滤)
|
||||||
|
test_data.SUMMARY_NODE_JSON_ERROR,
|
||||||
|
test_data.SUMMARY_NODE_JSON_FIX_ERROR,
|
||||||
|
# 有效SummaryNode输出(应该被捕获)
|
||||||
|
"[11:55:31] 2025-11-06 11:55:31.762 | INFO | MediaEngine.nodes.summary_node:process_output:134 - 清理后的输出: {",
|
||||||
|
"[11:55:31] \"paragraph_latest_state\": \"这是有效的总结内容\"",
|
||||||
|
"[11:55:31] }",
|
||||||
|
]
|
||||||
|
result = self.monitor.process_lines_for_json(lines, "media")
|
||||||
|
# 应该只捕获有效日志,不包含错误日志
|
||||||
|
assert len(result) > 0
|
||||||
|
assert any("有效的总结内容" in content for content in result)
|
||||||
|
# 确保不包含错误信息
|
||||||
|
assert not any("JSON解析失败" in content for content in result)
|
||||||
|
assert not any("JSON修复失败" in content for content in result)
|
||||||
|
|
||||||
|
|
||||||
|
def run_tests():
|
||||||
|
"""运行所有测试"""
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
# 运行测试
|
||||||
|
pytest.main([__file__, "-v"])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run_tests()
|
||||||
|
|
||||||
Reference in New Issue
Block a user