Upgrade lightrag-memgraph to LightRAG 1.4.9.11 (#141)

gitbuda · web-flow · commit ddb20255b38a · 2026-02-11T11:53:02.000+01:00
diff --git a/integrations/lightrag-memgraph/README.md b/integrations/lightrag-memgraph/README.md
@@ -11,9 +11,102 @@ powerful querying and analysis. Ideal for building knowledge graphs, improving
 data discovery, and leveraging advanced AI techniques on top of your domain
 data.
 
-## Notes
+## General Notes
 
 - Entity/relationship extraction is high-quality, but also high-cost and
 relatively slow.
 - The goal over time is to expose time and cost metrics (e.g., $ per your
 specific document page or chunk).
+
+## Quick start
+
+**Prerequisites:** [Memgraph](https://memgraph.com/docs/getting-started) running
+*(default `bolt://localhost:7687`), and an LLM API key (e.g. `OPENAI_API_KEY` or
+*`ANTHROPIC_API_KEY`).
+
+**Install:**
+
+```bash
+pip install lightrag-memgraph
+```
+
+**Minimal example** (async): create the wrapper, initialize with a working
+*directory, insert text, then finalize.
+
+```python
+import asyncio
+from lightrag_memgraph import MemgraphLightRAGWrapper
+
+async def main():
+    wrapper = MemgraphLightRAGWrapper(disable_embeddings=True)
+    await wrapper.initialize(working_dir="./lightrag_storage")
+    await wrapper.ainsert(input="Your document text here.", file_paths=["doc1"])
+    # optional: rag = wrapper.get_lightrag(); print(await rag.get_graph_labels())
+    await wrapper.afinalize()
+
+asyncio.run(main())
+```
+
+See `example.py` in this repo for a full run with sample texts and graph output.
+
+## Using Anthropic (Claude) as the LLM
+
+LightRAG supports Claude via the `lightrag.llm.anthropic` module. Set your API
+key and pass the LLM function and model name when initializing the wrapper. The
+list of Anthropic models is available under
+https://platform.claude.com/docs/en/about-claude/models.
+
+1. **Set the API key** (required for Claude):
+
+   ```bash
+   export ANTHROPIC_API_KEY="your-anthropic-api-key"
+   ```
+
+2. **Use Anthropic in code** by passing `llm_model_func` and `llm_model_name` to `initialize()`:
+
+   ```python
+   from lightrag.llm.anthropic import anthropic_complete
+   from lightrag_memgraph import MemgraphLightRAGWrapper
+
+   wrapper = MemgraphLightRAGWrapper(disable_embeddings=True)  # or set embedding_func
+   await wrapper.initialize(
+       working_dir="./lightrag_storage",
+       llm_model_func=anthropic_complete,
+       llm_model_name="claude-3-5-sonnet-20241022",  # or claude-3-haiku-20240307, etc.
+   )
+   ```
+
+   Preset functions are also available: `claude_3_opus_complete`,
+   `claude_3_sonnet_complete`, `claude_3_haiku_complete` (fixed older model
+   IDs). For current models, use `anthropic_complete` with the desired
+   `llm_model_name`.
+   
+3. **Embeddings**: Anthropic does not provide embeddings. Either use
+`disable_embeddings=True` (as above), or set `embedding_func` to another
+provider (e.g. `openai_embed` from `lightrag.llm.openai` with `OPENAI_API_KEY`,
+or Voyage AI via `lightrag.llm.anthropic.anthropic_embed` with
+`VOYAGE_API_KEY`).
+
+## Using OpenAI as the LLM
+
+Set your API key and optionally choose a model.
+
+1. **Set the API key**:
+
+   ```bash
+   export OPENAI_API_KEY="your-openai-api-key"
+   ```
+
+3. **Use a specific OpenAI model** by passing `llm_model_func` and optionally `llm_model_name`:
+
+   ```python
+   from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed
+   from lightrag_memgraph import MemgraphLightRAGWrapper
+
+   wrapper = MemgraphLightRAGWrapper()
+   await wrapper.initialize(
+       working_dir="./lightrag_storage",
+       llm_model_func=gpt_4o_mini_complete,
+       embedding_func=openai_embed,
+   )
+   ```
diff --git a/integrations/lightrag-memgraph/example.py b/integrations/lightrag-memgraph/example.py
@@ -4,6 +4,7 @@
 
 import asyncio
 from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed
+from lightrag.llm.anthropic import anthropic_complete
 import shutil
 
 from lightrag_memgraph import MemgraphLightRAGWrapper
@@ -71,6 +72,8 @@ async def main():
         await lightrag_wrapper.initialize(
             working_dir=WORKING_DIR,
             max_parallel_insert=8,
+            llm_model_func=anthropic_complete,
+            llm_model_name="claude-haiku-4-5",
         )
 
         total_time = 0.0
diff --git a/integrations/lightrag-memgraph/pyproject.toml b/integrations/lightrag-memgraph/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "lightrag-memgraph"
-version = "0.1.3"
+version = "0.1.4"
 description = "LightRAG integration with Memgraph"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -22,9 +22,11 @@ classifiers = [
 ]
 
 dependencies = [
-    "lightrag-hku[api]==1.4.8.2",
+    "lightrag-hku[api]>=1.4.9.11",
     "memgraph-toolbox>=0.1.8",
     "numpy>=1.21.0",
+    "anthropic>=0.18.0",
+    "voyageai>=0.2.0",
 ]
 
 [project.optional-dependencies]
diff --git a/integrations/lightrag-memgraph/src/lightrag_memgraph/__init__.py b/integrations/lightrag-memgraph/src/lightrag_memgraph/__init__.py
@@ -6,4 +6,120 @@
 
 from .core import MemgraphLightRAGWrapper
 
+
+# Patch lightrag.llm.anthropic for current Anthropic SDK:
+# - require max_tokens; use top-level system= (not "system" role in messages).
+def _patch_anthropic() -> None:
+    try:
+        import os
+        import logging
+        from typing import Any, Union
+        from collections.abc import AsyncIterator
+
+        import lightrag.llm.anthropic as _mod
+        from anthropic import (
+            AsyncAnthropic,
+            APIConnectionError,
+            RateLimitError,
+            APITimeoutError,
+        )
+        from tenacity import (
+            retry,
+            stop_after_attempt,
+            wait_exponential,
+            retry_if_exception_type,
+        )
+        from lightrag.utils import safe_unicode_decode, logger, VERBOSE_DEBUG
+        from lightrag.api import __api_version__
+
+        _orig = _mod.anthropic_complete_if_cache
+        if getattr(_orig, "_lightrag_memgraph_patched", False):
+            return
+
+        @retry(
+            stop=stop_after_attempt(3),
+            wait=wait_exponential(multiplier=1, min=4, max=10),
+            retry=retry_if_exception_type(
+                (RateLimitError, APIConnectionError, APITimeoutError)
+            ),
+        )
+        async def _wrapped(
+            model: str,
+            prompt: str,
+            system_prompt: str | None = None,
+            history_messages: list[dict[str, Any]] | None = None,
+            enable_cot: bool = False,
+            base_url: str | None = None,
+            api_key: str | None = None,
+            **kwargs: Any,
+        ) -> Union[str, AsyncIterator[str]]:
+            if history_messages is None:
+                history_messages = []
+            kwargs.setdefault("max_tokens", 4096)
+            if not api_key:
+                api_key = os.environ.get("ANTHROPIC_API_KEY")
+
+            default_headers = {
+                "User-Agent": f"Mozilla/5.0 LightRAG/{__api_version__}",
+                "Content-Type": "application/json",
+            }
+            kwargs.pop("hashing_kv", None)
+            kwargs.pop("keyword_extraction", None)
+            timeout = kwargs.pop("timeout", None)
+
+            client = (
+                AsyncAnthropic(
+                    default_headers=default_headers, api_key=api_key, timeout=timeout
+                )
+                if base_url is None
+                else AsyncAnthropic(
+                    base_url=base_url,
+                    default_headers=default_headers,
+                    api_key=api_key,
+                    timeout=timeout,
+                )
+            )
+
+            # API expects top-level system=, not a message with role "system"
+            messages: list[dict[str, Any]] = list(history_messages)
+            messages.append({"role": "user", "content": prompt})
+
+            create_kwargs: dict[str, Any] = {
+                "model": model,
+                "messages": messages,
+                "stream": True,
+                **kwargs,
+            }
+            if system_prompt:
+                create_kwargs["system"] = system_prompt
+
+            if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
+                logging.getLogger("anthropic").setLevel(logging.INFO)
+
+            response = await client.messages.create(**create_kwargs)
+
+            # Consume stream and return a single string (caller expects str, not AsyncIterator)
+            # Only content_block_delta events have delta.text; message_delta etc. have no .text
+            parts: list[str] = []
+            async for event in response:
+                content = (
+                    getattr(getattr(event, "delta", None), "text", None)
+                    if hasattr(event, "delta")
+                    else None
+                )
+                if not content:
+                    continue
+                if r"\u" in content:
+                    content = safe_unicode_decode(content.encode("utf-8"))
+                parts.append(content)
+            return "".join(parts)
+
+        _wrapped._lightrag_memgraph_patched = True  # type: ignore[attr-defined]
+        _mod.anthropic_complete_if_cache = _wrapped
+    except Exception:
+        pass
+
+
+_patch_anthropic()
+
 __all__ = ["MemgraphLightRAGWrapper"]
diff --git a/integrations/lightrag-memgraph/src/lightrag_memgraph/core.py b/integrations/lightrag-memgraph/src/lightrag_memgraph/core.py
@@ -5,7 +5,7 @@
 from lightrag import LightRAG
 from lightrag.kg.shared_storage import initialize_pipeline_status
 from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed
-from lightrag.utils import setup_logger
+from lightrag.utils import EmbeddingFunc, setup_logger
 import numpy as np
 
 
@@ -15,12 +15,13 @@
 os.environ["MEMGRAPH_URI"] = MEMGRAPH_URL
 
 
-class DummyEmbed:
-    def __init__(self, dim: int = 1):
-        self.embedding_dim = dim
+def _dummy_embedding_func(dim: int = 1) -> EmbeddingFunc:
+    """Build an EmbeddingFunc that returns constant embeddings (for disable_embeddings=True)."""
 
-    async def __call__(self, texts: list[str]) -> np.ndarray:
-        return np.ones((len(texts), self.embedding_dim), dtype=float)
+    async def _dummy_embed_func(texts: list[str]) -> np.ndarray:
+        return np.ones((len(texts), dim), dtype=float)
+
+    return EmbeddingFunc(embedding_dim=dim, func=_dummy_embed_func)
 
 
 class MemgraphLightRAGWrapper:
@@ -40,7 +41,7 @@ async def initialize(self, **lightrag_kwargs) -> None:
         logging.getLogger("nano-vectordb").setLevel(self.log_level)
         logging.getLogger("pikepdf").setLevel(self.log_level)
         if self.disable_embeddings:
-            lightrag_kwargs["embedding_func"] = DummyEmbed(dim=1)
+            lightrag_kwargs["embedding_func"] = _dummy_embedding_func(dim=1)
             lightrag_kwargs["vector_storage"] = "NanoVectorDBStorage"
         if "working_dir" in lightrag_kwargs:
             working_dir = lightrag_kwargs["working_dir"]
diff --git a/uv.lock b/uv.lock