This commit is contained in:
hzywhite
2025-09-05 14:57:00 +08:00
parent 9872b86d13
commit de2824f816
2 changed files with 34 additions and 13 deletions

View File

@@ -34,10 +34,13 @@ T = TypeVar("T")
class MineruExecutionError(Exception):
"""catch mineru error"""
def __init__(self, return_code, error_msg):
self.return_code = return_code
self.error_msg = error_msg
super().__init__(f"Mineru command failed with return code {return_code}: {error_msg}")
super().__init__(
f"Mineru command failed with return code {return_code}: {error_msg}"
)
class Parser:
@@ -739,7 +742,7 @@ class MineruParser(Parser):
else:
logging.info("[MinerU] Command executed successfully")
except MineruExecutionError as e:
except MineruExecutionError:
raise
except subprocess.CalledProcessError as e:
logging.error(f"Error running mineru subprocess command: {e}")
@@ -879,7 +882,7 @@ class MineruParser(Parser):
)
return content_list
except MineruExecutionError as e:
except MineruExecutionError:
raise
except Exception as e:
logging.error(f"Error in parse_pdf: {str(e)}")
@@ -1019,7 +1022,7 @@ class MineruParser(Parser):
)
return content_list
except MineruExecutionError as e:
except MineruExecutionError:
raise
finally:

View File

@@ -1423,9 +1423,7 @@ class ProcessorMixin:
if parser:
self.config.parser = parser
current_doc_status = await self.lightrag.doc_status.get_by_id(
doc_pre_id
)
current_doc_status = await self.lightrag.doc_status.get_by_id(doc_pre_id)
try:
# Ensure LightRAG is initialized
@@ -1489,14 +1487,20 @@ class ProcessorMixin:
pipeline_status["history_messages"].append("Now is not allowed to scan")
await self.lightrag.doc_status.upsert(
{doc_pre_id: {**current_doc_status, "status": DocStatus.HANDLING, "error_msg": ""}}
{
doc_pre_id: {
**current_doc_status,
"status": DocStatus.HANDLING,
"error_msg": "",
}
}
)
content_list = []
content_based_doc_id = ''
content_based_doc_id = ""
try:
# Step 1: Parse document
# Step 1: Parse document
content_list, content_based_doc_id = await self.parse_document(
file_path, output_dir, parse_method, display_stats, **kwargs
)
@@ -1505,13 +1509,27 @@ class ProcessorMixin:
if isinstance(e.error_msg, list):
error_message = "\n".join(e.error_msg)
await self.lightrag.doc_status.upsert(
{doc_pre_id: {**current_doc_status, "status": DocStatus.FAILED, "error_msg": error_message}}
{
doc_pre_id: {
**current_doc_status,
"status": DocStatus.FAILED,
"error_msg": error_message,
}
}
)
self.logger.info(
f"Error processing document {file_path}: MineruExecutionError"
)
self.logger.info(f"Error processing document {file_path}: MineruExecutionError")
return False
except Exception as e:
await self.lightrag.doc_status.upsert(
{doc_pre_id: {**current_doc_status, "status": DocStatus.FAILED, "error_msg": str(e)}}
{
doc_pre_id: {
**current_doc_status,
"status": DocStatus.FAILED,
"error_msg": str(e),
}
}
)
self.logger.info(f"Error processing document {file_path}: {str(e)}")
return False