fix lint

2025-09-22 10:42:35 +08:00
parent 8e0e05d497
commit 1d48f24b4a
4 changed files with 104 additions and 4863 deletions
--- a/examples/lmstudio_integration_example.py
+++ b/examples/lmstudio_integration_example.py
@@ -36,13 +36,18 @@ from raganything import RAGAnything, RAGAnythingConfig
 from lightrag.utils import EmbeddingFunc
 from lightrag.llm.openai import openai_complete_if_cache

-LM_BASE_URL = os.getenv('LLM_BINDING_HOST', 'http://localhost:1234/v1')
-LM_API_KEY = os.getenv('LLM_BINDING_API_KEY', 'lm-studio')
-LM_MODEL_NAME = os.getenv('LLM_MODEL', 'openai/gpt-oss-20b')
-LM_EMBED_MODEL = os.getenv('EMBEDDING_MODEL', 'text-embedding-nomic-embed-text-v1.5')
+LM_BASE_URL = os.getenv("LLM_BINDING_HOST", "http://localhost:1234/v1")
+LM_API_KEY = os.getenv("LLM_BINDING_API_KEY", "lm-studio")
+LM_MODEL_NAME = os.getenv("LLM_MODEL", "openai/gpt-oss-20b")
+LM_EMBED_MODEL = os.getenv("EMBEDDING_MODEL", "text-embedding-nomic-embed-text-v1.5")

-async def lmstudio_llm_model_func(prompt: str, system_prompt: Optional[str] = None,
-                                  history_messages: List[Dict] = None, **kwargs) -> str:
+
+async def lmstudio_llm_model_func(
+    prompt: str,
+    system_prompt: Optional[str] = None,
+    history_messages: List[Dict] = None,
+    **kwargs,
+) -> str:
    """Top-level LLM function for LightRAG (pickle-safe)."""
    return await openai_complete_if_cache(
        model=LM_MODEL_NAME,
@@ -58,6 +63,7 @@ async def lmstudio_llm_model_func(prompt: str, system_prompt: Optional[str] = No
 async def lmstudio_embedding_async(texts: List[str]) -> List[List[float]]:
    """Top-level embedding function for LightRAG (pickle-safe)."""
    from lightrag.llm.openai import openai_embed
+
    embeddings = await openai_embed(
        texts=texts,
        model=LM_EMBED_MODEL,
@@ -66,17 +72,19 @@ async def lmstudio_embedding_async(texts: List[str]) -> List[List[float]]:
    )
    return embeddings.tolist()

+
 class LMStudioRAGIntegration:
    """Integration class for LM Studio with RAG-Anything."""
-    
+
    def __init__(self):
        # LM Studio configuration using standard LLM_BINDING variables
-        self.base_url = os.getenv('LLM_BINDING_HOST', 'http://localhost:1234/v1')
-        self.api_key = os.getenv('LLM_BINDING_API_KEY', 'lm-studio')
-        self.model_name = os.getenv('LLM_MODEL', 'openai/gpt-oss-20b')
-        self.embedding_model = os.getenv('EMBEDDING_MODEL', 'text-embedding-nomic-embed-text-v1.5')
-    
-        
+        self.base_url = os.getenv("LLM_BINDING_HOST", "http://localhost:1234/v1")
+        self.api_key = os.getenv("LLM_BINDING_API_KEY", "lm-studio")
+        self.model_name = os.getenv("LLM_MODEL", "openai/gpt-oss-20b")
+        self.embedding_model = os.getenv(
+            "EMBEDDING_MODEL", "text-embedding-nomic-embed-text-v1.5"
+        )
+
        # RAG-Anything configuration
        # Use a fresh working directory each run to avoid legacy doc_status schema conflicts
        self.config = RAGAnythingConfig(
@@ -88,7 +96,7 @@ class LMStudioRAGIntegration:
            enable_equation_processing=True,
        )
        print(f"📁 Using working_dir: {self.config.working_dir}")
-        
+
        self.rag = None

    async def test_connection(self) -> bool:
@@ -98,16 +106,16 @@ class LMStudioRAGIntegration:
            client = AsyncOpenAI(base_url=self.base_url, api_key=self.api_key)
            models = await client.models.list()
            print(f"✅ Connected successfully! Found {len(models.data)} models")
-            
+
            # Show available models
            print("📊 Available models:")
            for i, model in enumerate(models.data[:5]):
                marker = "🎯" if model.id == self.model_name else "  "
                print(f"{marker} {i+1}. {model.id}")
-            
+
            if len(models.data) > 5:
                print(f"  ... and {len(models.data) - 5} more models")
-            
+
            return True
        except Exception as e:
            print(f"❌ Connection failed: {str(e)}")
@@ -122,7 +130,7 @@ class LMStudioRAGIntegration:
                await client.close()
            except Exception:
                pass
-    
+
    async def test_chat_completion(self) -> bool:
        """Test basic chat functionality."""
        try:
@@ -132,14 +140,17 @@ class LMStudioRAGIntegration:
                model=self.model_name,
                messages=[
                    {"role": "system", "content": "You are a helpful AI assistant."},
-                    {"role": "user", "content": "Hello! Please confirm you're working and tell me your capabilities."}
+                    {
+                        "role": "user",
+                        "content": "Hello! Please confirm you're working and tell me your capabilities.",
+                    },
                ],
                max_tokens=100,
-                temperature=0.7
+                temperature=0.7,
            )
-            
+
            result = response.choices[0].message.content.strip()
-            print(f"✅ Chat test successful!")
+            print("✅ Chat test successful!")
            print(f"Response: {result}")
            return True
        except Exception as e:
@@ -150,9 +161,9 @@ class LMStudioRAGIntegration:
                await client.close()
            except Exception:
                pass
-    
+
    # Deprecated factory helpers removed to reduce redundancy
-    
+
    def embedding_func_factory(self):
        """Create a completely serializable embedding function."""
        return EmbeddingFunc(
@@ -160,11 +171,11 @@ class LMStudioRAGIntegration:
            max_token_size=8192,  # nomic-embed-text-v1.5 context length
            func=lmstudio_embedding_async,
        )
-    
+
    async def initialize_rag(self):
        """Initialize RAG-Anything with LM Studio functions."""
        print("Initializing RAG-Anything with LM Studio...")
-        
+
        try:
            self.rag = RAGAnything(
                config=self.config,
@@ -176,6 +187,7 @@ class LMStudioRAGIntegration:
            # Older LightRAG versions may not accept this extra field in DocProcessingStatus
            async def _noop_mark_multimodal(doc_id: str):
                return None
+
            self.rag._mark_multimodal_processing_complete = _noop_mark_multimodal

            print("✅ RAG-Anything initialized successfully!")
@@ -183,38 +195,38 @@ class LMStudioRAGIntegration:
        except Exception as e:
            print(f"❌ RAG initialization failed: {str(e)}")
            return False
-    
+
    async def process_document_example(self, file_path: str):
        """Example: Process a document with LM Studio backend."""
        if not self.rag:
            print("❌ RAG not initialized. Call initialize_rag() first.")
            return
-        
+
        try:
            print(f"📄 Processing document: {file_path}")
            await self.rag.process_document_complete(
                file_path=file_path,
                output_dir="./output_lmstudio",
                parse_method="auto",
-                display_stats=True
+                display_stats=True,
            )
            print("✅ Document processing completed!")
        except Exception as e:
            print(f"❌ Document processing failed: {str(e)}")
-    
+
    async def query_examples(self):
        """Example queries with different modes."""
        if not self.rag:
            print("❌ RAG not initialized. Call initialize_rag() first.")
            return
-        
+
        # Example queries
        queries = [
            ("What are the main topics in the processed documents?", "hybrid"),
            ("Summarize any tables or data found in the documents", "local"),
            ("What images or figures are mentioned?", "global"),
        ]
-        
+
        print("\n🔍 Running example queries...")
        for query, mode in queries:
            try:
@@ -223,26 +235,26 @@ class LMStudioRAGIntegration:
                print(f"Answer: {result[:200]}...")
            except Exception as e:
                print(f"❌ Query failed: {str(e)}")
-    
+
    async def simple_query_example(self):
        """Example basic text query with sample content."""
        if not self.rag:
            print("❌ RAG not initialized")
            return
-        
+
        try:
            print("\nAdding sample content for testing...")
-            
+
            # Create content list in the format expected by RAGAnything
            content_list = [
                {
                    "type": "text",
                    "text": """LM Studio Integration with RAG-Anything
-                    
+
 This integration demonstrates how to connect LM Studio's local AI models with RAG-Anything's document processing capabilities. The system uses:

 - LM Studio for local LLM inference
- nomic-embed-text-v1.5 for embeddings (768 dimensions)  
+- nomic-embed-text-v1.5 for embeddings (768 dimensions)
 - RAG-Anything for document processing and retrieval

 Key benefits include:
@@ -250,71 +262,73 @@ Key benefits include:
 - Performance: Direct API access to local models
 - Flexibility: Support for various document formats
 - Cost-effective: No external API usage""",
-                    "page_idx": 0
+                    "page_idx": 0,
                }
            ]
-            
+
            # Insert the content list using the correct method
            await self.rag.insert_content_list(
                content_list=content_list,
                file_path="lmstudio_integration_demo.txt",
                # Use a unique doc_id to avoid collisions and doc_status reuse across runs
                doc_id=f"demo-content-{uuid.uuid4()}",
-                display_stats=True
+                display_stats=True,
            )
            print("✅ Sample content added to knowledge base")
-            
+
            print("\nTesting basic text query...")
-            
+
            # Simple text query example
            result = await self.rag.aquery(
                "What are the key benefits of this LM Studio integration?",
-                mode="hybrid"
+                mode="hybrid",
            )
            print(f"✅ Query result: {result[:300]}...")
-            
+
        except Exception as e:
            print(f"❌ Query failed: {str(e)}")

+
 async def main():
    """Main example function."""
    print("=" * 70)
    print("LM Studio + RAG-Anything Integration Example")
    print("=" * 70)
-    
+
    # Initialize integration
    integration = LMStudioRAGIntegration()
-    
+
    # Test connection
    if not await integration.test_connection():
        return False
-    
+
    print()
    if not await integration.test_chat_completion():
        return False
-    
+
    # Initialize RAG
    print("\n" + "─" * 50)
    if not await integration.initialize_rag():
        return False
-    
+
    # Example document processing (uncomment and provide a real file path)
    # await integration.process_document_example("path/to/your/document.pdf")
-    
+
    # Example queries (uncomment after processing documents)
    # await integration.query_examples()
-    
+
    # Example basic query
    await integration.simple_query_example()
-    
+
    print("\n" + "=" * 70)
    print("Integration example completed successfully!")
    print("=" * 70)
-    
+
    return True

+
 if __name__ == "__main__":
    print("🚀 Starting LM Studio integration example...")
    success = asyncio.run(main())
-    
+
    exit(0 if success else 1)
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,16 +1,17 @@
 [build-system]
-requires = ["setuptools>=45", "wheel"]
+requires = ["setuptools>=64", "wheel"]
 build-backend = "setuptools.build_meta"

 [project]
 name = "raganything"
-version = "1.2.7"
+dynamic = ["version"]
+authors = [
+    {name = "Zirui Guo"}
+]
 description = "RAGAnything: All-in-One RAG System"
 readme = "README.md"
 license = { text = "MIT" }
-authors = [
-    { name = "Zirui Guo" }
-]
+requires-python = ">=3.10"
 classifiers = [
    "Development Status :: 4 - Beta",
    "Programming Language :: Python :: 3",
@@ -19,7 +20,6 @@ classifiers = [
    "Intended Audience :: Developers",
    "Topic :: Software Development :: Libraries :: Python Modules",
 ]
-requires-python = ">=3.10"
 dependencies = [
    "huggingface_hub",
    "lightrag-hku",
@@ -31,12 +31,18 @@ dependencies = [
 image = ["Pillow>=10.0.0"]
 text = ["reportlab>=4.0.0"]
 office = []  # Requires LibreOffice (external program)
-all = ["Pillow>=10.0.0", "reportlab>=4.0.0"]
 markdown = [
    "markdown>=3.4.0",
    "weasyprint>=60.0",
    "pygments>=2.10.0",
 ]
+all = [
+    "Pillow>=10.0.0",
+    "reportlab>=4.0.0",
+    "markdown>=3.4.0",
+    "weasyprint>=60.0",
+    "pygments>=2.10.0"
+]

 [project.urls]
 Homepage = "https://github.com/HKUDS/RAG-Anything"
@@ -57,7 +63,13 @@ dev-dependencies = [
 ]

 [tool.setuptools.packages.find]
-exclude = ["tests*", "docs*"]
+include = ["raganything*"]

 [tool.setuptools]
 include-package-data = true
+
+[tool.setuptools.dynamic]
+version = {attr = "raganything.__version__"}
+
+[tool.ruff]
+target-version = "py310"
--- a/raganything/parser.py
+++ b/raganything/parser.py
@@ -262,20 +262,29 @@ class Parser:
                from reportlab.lib.units import inch
                from reportlab.pdfbase import pdfmetrics
                from reportlab.pdfbase.ttfonts import TTFont
+
                support_chinese = True
                try:
-                    if 'WenQuanYi' not in pdfmetrics.getRegisteredFontNames():
-                        if not Path('/usr/share/fonts/wqy-microhei/wqy-microhei.ttc').exists():
+                    if "WenQuanYi" not in pdfmetrics.getRegisteredFontNames():
+                        if not Path(
+                            "/usr/share/fonts/wqy-microhei/wqy-microhei.ttc"
+                        ).exists():
                            support_chinese = False
                            logging.warning(
-                                "WenQuanYi font not found at /usr/share/fonts/wqy-microhei/wqy-microhei.ttc. Chinese characters may not render correctly.")
+                                "WenQuanYi font not found at /usr/share/fonts/wqy-microhei/wqy-microhei.ttc. Chinese characters may not render correctly."
+                            )
                        else:
                            pdfmetrics.registerFont(
-                                TTFont('WenQuanYi', '/usr/share/fonts/wqy-microhei/wqy-microhei.ttc'))
+                                TTFont(
+                                    "WenQuanYi",
+                                    "/usr/share/fonts/wqy-microhei/wqy-microhei.ttc",
+                                )
+                            )
                except Exception as e:
                    support_chinese = False
                    logging.warning(
-                        f"Failed to register WenQuanYi font: {e}. Chinese characters may not render correctly.")
+                        f"Failed to register WenQuanYi font: {e}. Chinese characters may not render correctly."
+                    )

                # Create PDF document
                doc = SimpleDocTemplate(
@@ -292,8 +301,8 @@ class Parser:
                normal_style = styles["Normal"]
                heading_style = styles["Heading1"]
                if support_chinese:
-                    normal_style.fontName = 'WenQuanYi'
-                    heading_style.fontName = 'WenQuanYi'
+                    normal_style.fontName = "WenQuanYi"
+                    heading_style.fontName = "WenQuanYi"

                # Try to register a font that supports Chinese characters
                try:
--- a/uv.lock
+++ b/uv.lock