From e70cf8d38ab693c30a9f668b7b3200db00ac6baa Mon Sep 17 00:00:00 2001 From: Yasiru Rangana Date: Sun, 19 Oct 2025 23:36:54 +1100 Subject: [PATCH] fix: use DocStatus.PROCESSED enum instead of hardcoded uppercase string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem Status comparisons used hardcoded uppercase string "PROCESSED" which didn't match LightRAG's DocStatus enum that stores lowercase "processed". This caused text_processed to always return False even when documents were successfully processed. **Evidence:** - LightRAG's DocStatus enum (lightrag/base.py): PROCESSED = "processed" - RAGAnything's DocStatus enum (raganything/base.py:11): PROCESSED = "processed" - Current code checked: doc_status == "PROCESSED" (uppercase) ❌ - Actual value from LightRAG: "processed" (lowercase) ✓ **Impact:** - is_document_fully_processed() always returned False - get_document_processing_status() showed text_processed as False - Multimodal processing logic incorrectly detected status ## Solution Replace hardcoded string literals with DocStatus.PROCESSED enum constant (already imported at line 14). **Changes:** - Line 481: doc_status == "PROCESSED" → DocStatus.PROCESSED - Line 486: doc_status == "PROCESSED" → DocStatus.PROCESSED - Line 1355: doc_status.get("status") == "PROCESSED" → DocStatus.PROCESSED - Line 1387: doc_status.get("status") == "PROCESSED" → DocStatus.PROCESSED - Updated comments (lines 463, 478) for consistency **Benefits:** 1. ✅ Fixes case mismatch bug - enum auto-converts to lowercase 2. ✅ Type-safe - IDE/linter catches errors 3. ✅ Maintainable - single source of truth (no magic strings) 4. ✅ Future-proof - if enum changes, code updates automatically 5. ✅ Follows Python best practices **Compatibility:** - Works with LightRAG v1.4.9.2+ - Compatible with LightRAG v1.4.9.3 (which added PREPROCESSED status) - No breaking changes **References:** - LightRAG DocStatus: lightrag/base.py - RAGAnything DocStatus: raganything/base.py:11 - Related: LightRAG v1.4.9.3 added PREPROCESSED = "multimodal_processed" --- raganything/processor.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/raganything/processor.py b/raganything/processor.py index 9954c47..ac851d3 100644 --- a/raganything/processor.py +++ b/raganything/processor.py @@ -460,7 +460,7 @@ class ProcessorMixin: self.logger.debug("No multimodal content to process") return - # Check multimodal processing status - handle LightRAG's early "PROCESSED" marking + # Check multimodal processing status - handle LightRAG's early DocStatus.PROCESSED marking try: existing_doc_status = await self.lightrag.doc_status.get_by_id(doc_id) if existing_doc_status: @@ -475,15 +475,15 @@ class ProcessorMixin: ) return - # Even if status is "PROCESSED" (text processing done), + # Even if status is DocStatus.PROCESSED (text processing done), # we still need to process multimodal content if not yet done doc_status = existing_doc_status.get("status", "") - if doc_status == "PROCESSED" and not multimodal_processed: + if doc_status == DocStatus.PROCESSED and not multimodal_processed: self.logger.info( f"Document {doc_id} text processing is complete, but multimodal content still needs processing" ) # Continue with multimodal processing - elif doc_status == "PROCESSED" and multimodal_processed: + elif doc_status == DocStatus.PROCESSED and multimodal_processed: self.logger.info( f"Document {doc_id} is fully processed (text + multimodal)" ) @@ -1352,7 +1352,7 @@ class ProcessorMixin: if not doc_status: return False - text_processed = doc_status.get("status") == "PROCESSED" + text_processed = doc_status.get("status") == DocStatus.PROCESSED multimodal_processed = doc_status.get("multimodal_processed", False) return text_processed and multimodal_processed @@ -1384,7 +1384,7 @@ class ProcessorMixin: "chunks_count": 0, } - text_processed = doc_status.get("status") == "PROCESSED" + text_processed = doc_status.get("status") == DocStatus.PROCESSED multimodal_processed = doc_status.get("multimodal_processed", False) fully_processed = text_processed and multimodal_processed