This commit is contained in:
zrguo
2025-09-22 10:42:35 +08:00
parent 8e0e05d497
commit 1d48f24b4a
4 changed files with 104 additions and 4863 deletions

View File

@@ -36,13 +36,18 @@ from raganything import RAGAnything, RAGAnythingConfig
from lightrag.utils import EmbeddingFunc
from lightrag.llm.openai import openai_complete_if_cache
LM_BASE_URL = os.getenv('LLM_BINDING_HOST', 'http://localhost:1234/v1')
LM_API_KEY = os.getenv('LLM_BINDING_API_KEY', 'lm-studio')
LM_MODEL_NAME = os.getenv('LLM_MODEL', 'openai/gpt-oss-20b')
LM_EMBED_MODEL = os.getenv('EMBEDDING_MODEL', 'text-embedding-nomic-embed-text-v1.5')
LM_BASE_URL = os.getenv("LLM_BINDING_HOST", "http://localhost:1234/v1")
LM_API_KEY = os.getenv("LLM_BINDING_API_KEY", "lm-studio")
LM_MODEL_NAME = os.getenv("LLM_MODEL", "openai/gpt-oss-20b")
LM_EMBED_MODEL = os.getenv("EMBEDDING_MODEL", "text-embedding-nomic-embed-text-v1.5")
async def lmstudio_llm_model_func(prompt: str, system_prompt: Optional[str] = None,
history_messages: List[Dict] = None, **kwargs) -> str:
async def lmstudio_llm_model_func(
prompt: str,
system_prompt: Optional[str] = None,
history_messages: List[Dict] = None,
**kwargs,
) -> str:
"""Top-level LLM function for LightRAG (pickle-safe)."""
return await openai_complete_if_cache(
model=LM_MODEL_NAME,
@@ -58,6 +63,7 @@ async def lmstudio_llm_model_func(prompt: str, system_prompt: Optional[str] = No
async def lmstudio_embedding_async(texts: List[str]) -> List[List[float]]:
"""Top-level embedding function for LightRAG (pickle-safe)."""
from lightrag.llm.openai import openai_embed
embeddings = await openai_embed(
texts=texts,
model=LM_EMBED_MODEL,
@@ -66,16 +72,18 @@ async def lmstudio_embedding_async(texts: List[str]) -> List[List[float]]:
)
return embeddings.tolist()
class LMStudioRAGIntegration:
"""Integration class for LM Studio with RAG-Anything."""
def __init__(self):
# LM Studio configuration using standard LLM_BINDING variables
self.base_url = os.getenv('LLM_BINDING_HOST', 'http://localhost:1234/v1')
self.api_key = os.getenv('LLM_BINDING_API_KEY', 'lm-studio')
self.model_name = os.getenv('LLM_MODEL', 'openai/gpt-oss-20b')
self.embedding_model = os.getenv('EMBEDDING_MODEL', 'text-embedding-nomic-embed-text-v1.5')
self.base_url = os.getenv("LLM_BINDING_HOST", "http://localhost:1234/v1")
self.api_key = os.getenv("LLM_BINDING_API_KEY", "lm-studio")
self.model_name = os.getenv("LLM_MODEL", "openai/gpt-oss-20b")
self.embedding_model = os.getenv(
"EMBEDDING_MODEL", "text-embedding-nomic-embed-text-v1.5"
)
# RAG-Anything configuration
# Use a fresh working directory each run to avoid legacy doc_status schema conflicts
@@ -132,14 +140,17 @@ class LMStudioRAGIntegration:
model=self.model_name,
messages=[
{"role": "system", "content": "You are a helpful AI assistant."},
{"role": "user", "content": "Hello! Please confirm you're working and tell me your capabilities."}
{
"role": "user",
"content": "Hello! Please confirm you're working and tell me your capabilities.",
},
],
max_tokens=100,
temperature=0.7
temperature=0.7,
)
result = response.choices[0].message.content.strip()
print(f"✅ Chat test successful!")
print("✅ Chat test successful!")
print(f"Response: {result}")
return True
except Exception as e:
@@ -176,6 +187,7 @@ class LMStudioRAGIntegration:
# Older LightRAG versions may not accept this extra field in DocProcessingStatus
async def _noop_mark_multimodal(doc_id: str):
return None
self.rag._mark_multimodal_processing_complete = _noop_mark_multimodal
print("✅ RAG-Anything initialized successfully!")
@@ -196,7 +208,7 @@ class LMStudioRAGIntegration:
file_path=file_path,
output_dir="./output_lmstudio",
parse_method="auto",
display_stats=True
display_stats=True,
)
print("✅ Document processing completed!")
except Exception as e:
@@ -250,7 +262,7 @@ Key benefits include:
- Performance: Direct API access to local models
- Flexibility: Support for various document formats
- Cost-effective: No external API usage""",
"page_idx": 0
"page_idx": 0,
}
]
@@ -260,7 +272,7 @@ Key benefits include:
file_path="lmstudio_integration_demo.txt",
# Use a unique doc_id to avoid collisions and doc_status reuse across runs
doc_id=f"demo-content-{uuid.uuid4()}",
display_stats=True
display_stats=True,
)
print("✅ Sample content added to knowledge base")
@@ -269,13 +281,14 @@ Key benefits include:
# Simple text query example
result = await self.rag.aquery(
"What are the key benefits of this LM Studio integration?",
mode="hybrid"
mode="hybrid",
)
print(f"✅ Query result: {result[:300]}...")
except Exception as e:
print(f"❌ Query failed: {str(e)}")
async def main():
"""Main example function."""
print("=" * 70)
@@ -313,6 +326,7 @@ async def main():
return True
if __name__ == "__main__":
print("🚀 Starting LM Studio integration example...")
success = asyncio.run(main())

View File

@@ -1,16 +1,17 @@
[build-system]
requires = ["setuptools>=45", "wheel"]
requires = ["setuptools>=64", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "raganything"
version = "1.2.7"
description = "RAGAnything: All-in-One RAG System"
readme = "README.md"
license = { text = "MIT" }
dynamic = ["version"]
authors = [
{name = "Zirui Guo"}
]
description = "RAGAnything: All-in-One RAG System"
readme = "README.md"
license = { text = "MIT" }
requires-python = ">=3.10"
classifiers = [
"Development Status :: 4 - Beta",
"Programming Language :: Python :: 3",
@@ -19,7 +20,6 @@ classifiers = [
"Intended Audience :: Developers",
"Topic :: Software Development :: Libraries :: Python Modules",
]
requires-python = ">=3.10"
dependencies = [
"huggingface_hub",
"lightrag-hku",
@@ -31,12 +31,18 @@ dependencies = [
image = ["Pillow>=10.0.0"]
text = ["reportlab>=4.0.0"]
office = [] # Requires LibreOffice (external program)
all = ["Pillow>=10.0.0", "reportlab>=4.0.0"]
markdown = [
"markdown>=3.4.0",
"weasyprint>=60.0",
"pygments>=2.10.0",
]
all = [
"Pillow>=10.0.0",
"reportlab>=4.0.0",
"markdown>=3.4.0",
"weasyprint>=60.0",
"pygments>=2.10.0"
]
[project.urls]
Homepage = "https://github.com/HKUDS/RAG-Anything"
@@ -57,7 +63,13 @@ dev-dependencies = [
]
[tool.setuptools.packages.find]
exclude = ["tests*", "docs*"]
include = ["raganything*"]
[tool.setuptools]
include-package-data = true
[tool.setuptools.dynamic]
version = {attr = "raganything.__version__"}
[tool.ruff]
target-version = "py310"

View File

@@ -262,20 +262,29 @@ class Parser:
from reportlab.lib.units import inch
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
support_chinese = True
try:
if 'WenQuanYi' not in pdfmetrics.getRegisteredFontNames():
if not Path('/usr/share/fonts/wqy-microhei/wqy-microhei.ttc').exists():
if "WenQuanYi" not in pdfmetrics.getRegisteredFontNames():
if not Path(
"/usr/share/fonts/wqy-microhei/wqy-microhei.ttc"
).exists():
support_chinese = False
logging.warning(
"WenQuanYi font not found at /usr/share/fonts/wqy-microhei/wqy-microhei.ttc. Chinese characters may not render correctly.")
"WenQuanYi font not found at /usr/share/fonts/wqy-microhei/wqy-microhei.ttc. Chinese characters may not render correctly."
)
else:
pdfmetrics.registerFont(
TTFont('WenQuanYi', '/usr/share/fonts/wqy-microhei/wqy-microhei.ttc'))
TTFont(
"WenQuanYi",
"/usr/share/fonts/wqy-microhei/wqy-microhei.ttc",
)
)
except Exception as e:
support_chinese = False
logging.warning(
f"Failed to register WenQuanYi font: {e}. Chinese characters may not render correctly.")
f"Failed to register WenQuanYi font: {e}. Chinese characters may not render correctly."
)
# Create PDF document
doc = SimpleDocTemplate(
@@ -292,8 +301,8 @@ class Parser:
normal_style = styles["Normal"]
heading_style = styles["Heading1"]
if support_chinese:
normal_style.fontName = 'WenQuanYi'
heading_style.fontName = 'WenQuanYi'
normal_style.fontName = "WenQuanYi"
heading_style.fontName = "WenQuanYi"
# Try to register a font that supports Chinese characters
try:

4794
uv.lock generated

File diff suppressed because it is too large Load Diff