summary
This commit is contained in:
@@ -34,10 +34,13 @@ T = TypeVar("T")
|
||||
|
||||
class MineruExecutionError(Exception):
|
||||
"""catch mineru error"""
|
||||
|
||||
def __init__(self, return_code, error_msg):
|
||||
self.return_code = return_code
|
||||
self.error_msg = error_msg
|
||||
super().__init__(f"Mineru command failed with return code {return_code}: {error_msg}")
|
||||
super().__init__(
|
||||
f"Mineru command failed with return code {return_code}: {error_msg}"
|
||||
)
|
||||
|
||||
|
||||
class Parser:
|
||||
@@ -739,7 +742,7 @@ class MineruParser(Parser):
|
||||
else:
|
||||
logging.info("[MinerU] Command executed successfully")
|
||||
|
||||
except MineruExecutionError as e:
|
||||
except MineruExecutionError:
|
||||
raise
|
||||
except subprocess.CalledProcessError as e:
|
||||
logging.error(f"Error running mineru subprocess command: {e}")
|
||||
@@ -879,7 +882,7 @@ class MineruParser(Parser):
|
||||
)
|
||||
return content_list
|
||||
|
||||
except MineruExecutionError as e:
|
||||
except MineruExecutionError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logging.error(f"Error in parse_pdf: {str(e)}")
|
||||
@@ -1019,7 +1022,7 @@ class MineruParser(Parser):
|
||||
)
|
||||
return content_list
|
||||
|
||||
except MineruExecutionError as e:
|
||||
except MineruExecutionError:
|
||||
raise
|
||||
|
||||
finally:
|
||||
|
||||
@@ -1423,9 +1423,7 @@ class ProcessorMixin:
|
||||
if parser:
|
||||
self.config.parser = parser
|
||||
|
||||
current_doc_status = await self.lightrag.doc_status.get_by_id(
|
||||
doc_pre_id
|
||||
)
|
||||
current_doc_status = await self.lightrag.doc_status.get_by_id(doc_pre_id)
|
||||
|
||||
try:
|
||||
# Ensure LightRAG is initialized
|
||||
@@ -1489,14 +1487,20 @@ class ProcessorMixin:
|
||||
pipeline_status["history_messages"].append("Now is not allowed to scan")
|
||||
|
||||
await self.lightrag.doc_status.upsert(
|
||||
{doc_pre_id: {**current_doc_status, "status": DocStatus.HANDLING, "error_msg": ""}}
|
||||
{
|
||||
doc_pre_id: {
|
||||
**current_doc_status,
|
||||
"status": DocStatus.HANDLING,
|
||||
"error_msg": "",
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
content_list = []
|
||||
content_based_doc_id = ''
|
||||
content_based_doc_id = ""
|
||||
|
||||
try:
|
||||
# Step 1: Parse document
|
||||
# Step 1: Parse document
|
||||
content_list, content_based_doc_id = await self.parse_document(
|
||||
file_path, output_dir, parse_method, display_stats, **kwargs
|
||||
)
|
||||
@@ -1505,13 +1509,27 @@ class ProcessorMixin:
|
||||
if isinstance(e.error_msg, list):
|
||||
error_message = "\n".join(e.error_msg)
|
||||
await self.lightrag.doc_status.upsert(
|
||||
{doc_pre_id: {**current_doc_status, "status": DocStatus.FAILED, "error_msg": error_message}}
|
||||
{
|
||||
doc_pre_id: {
|
||||
**current_doc_status,
|
||||
"status": DocStatus.FAILED,
|
||||
"error_msg": error_message,
|
||||
}
|
||||
}
|
||||
)
|
||||
self.logger.info(
|
||||
f"Error processing document {file_path}: MineruExecutionError"
|
||||
)
|
||||
self.logger.info(f"Error processing document {file_path}: MineruExecutionError")
|
||||
return False
|
||||
except Exception as e:
|
||||
await self.lightrag.doc_status.upsert(
|
||||
{doc_pre_id: {**current_doc_status, "status": DocStatus.FAILED, "error_msg": str(e)}}
|
||||
{
|
||||
doc_pre_id: {
|
||||
**current_doc_status,
|
||||
"status": DocStatus.FAILED,
|
||||
"error_msg": str(e),
|
||||
}
|
||||
}
|
||||
)
|
||||
self.logger.info(f"Error processing document {file_path}: {str(e)}")
|
||||
return False
|
||||
|
||||
Reference in New Issue
Block a user