From e70cf8d38ab693c30a9f668b7b3200db00ac6baa Mon Sep 17 00:00:00 2001
From: Yasiru Rangana <yasiru@formitize.com>
Date: Sun, 19 Oct 2025 23:36:54 +1100
Subject: [PATCH] fix: use DocStatus.PROCESSED enum instead of hardcoded
 uppercase string
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Problem

Status comparisons used hardcoded uppercase string "PROCESSED" which
didn't match LightRAG's DocStatus enum that stores lowercase "processed".
This caused text_processed to always return False even when documents
were successfully processed.

**Evidence:**
- LightRAG's DocStatus enum (lightrag/base.py): PROCESSED = "processed"
- RAGAnything's DocStatus enum (raganything/base.py:11): PROCESSED = "processed"
- Current code checked: doc_status == "PROCESSED" (uppercase) ❌
- Actual value from LightRAG: "processed" (lowercase) ✓

**Impact:**
- is_document_fully_processed() always returned False
- get_document_processing_status() showed text_processed as False
- Multimodal processing logic incorrectly detected status

## Solution

Replace hardcoded string literals with DocStatus.PROCESSED enum constant
(already imported at line 14).

**Changes:**
- Line 481: doc_status == "PROCESSED" → DocStatus.PROCESSED
- Line 486: doc_status == "PROCESSED" → DocStatus.PROCESSED
- Line 1355: doc_status.get("status") == "PROCESSED" → DocStatus.PROCESSED
- Line 1387: doc_status.get("status") == "PROCESSED" → DocStatus.PROCESSED
- Updated comments (lines 463, 478) for consistency

**Benefits:**
1. ✅ Fixes case mismatch bug - enum auto-converts to lowercase
2. ✅ Type-safe - IDE/linter catches errors
3. ✅ Maintainable - single source of truth (no magic strings)
4. ✅ Future-proof - if enum changes, code updates automatically
5. ✅ Follows Python best practices

**Compatibility:**
- Works with LightRAG v1.4.9.2+
- Compatible with LightRAG v1.4.9.3 (which added PREPROCESSED status)
- No breaking changes

**References:**
- LightRAG DocStatus: lightrag/base.py
- RAGAnything DocStatus: raganything/base.py:11
- Related: LightRAG v1.4.9.3 added PREPROCESSED = "multimodal_processed"
---
 raganything/processor.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/raganything/processor.py b/raganything/processor.py
index 9954c47..ac851d3 100644
--- a/raganything/processor.py
+++ b/raganything/processor.py
@@ -460,7 +460,7 @@ class ProcessorMixin:
             self.logger.debug("No multimodal content to process")
             return
 
-        # Check multimodal processing status - handle LightRAG's early "PROCESSED" marking
+        # Check multimodal processing status - handle LightRAG's early DocStatus.PROCESSED marking
         try:
             existing_doc_status = await self.lightrag.doc_status.get_by_id(doc_id)
             if existing_doc_status:
@@ -475,15 +475,15 @@ class ProcessorMixin:
                     )
                     return
 
-                # Even if status is "PROCESSED" (text processing done),
+                # Even if status is DocStatus.PROCESSED (text processing done),
                 # we still need to process multimodal content if not yet done
                 doc_status = existing_doc_status.get("status", "")
-                if doc_status == "PROCESSED" and not multimodal_processed:
+                if doc_status == DocStatus.PROCESSED and not multimodal_processed:
                     self.logger.info(
                         f"Document {doc_id} text processing is complete, but multimodal content still needs processing"
                     )
                     # Continue with multimodal processing
-                elif doc_status == "PROCESSED" and multimodal_processed:
+                elif doc_status == DocStatus.PROCESSED and multimodal_processed:
                     self.logger.info(
                         f"Document {doc_id} is fully processed (text + multimodal)"
                     )
@@ -1352,7 +1352,7 @@ class ProcessorMixin:
             if not doc_status:
                 return False
 
-            text_processed = doc_status.get("status") == "PROCESSED"
+            text_processed = doc_status.get("status") == DocStatus.PROCESSED
             multimodal_processed = doc_status.get("multimodal_processed", False)
 
             return text_processed and multimodal_processed
@@ -1384,7 +1384,7 @@ class ProcessorMixin:
                     "chunks_count": 0,
                 }
 
-            text_processed = doc_status.get("status") == "PROCESSED"
+            text_processed = doc_status.get("status") == DocStatus.PROCESSED
             multimodal_processed = doc_status.get("multimodal_processed", False)
             fully_processed = text_processed and multimodal_processed