Skip to content

Commit ddb2025

Browse files
authored
Upgrade lightrag-memgraph to LightRAG 1.4.9.11 (#141)
1 parent eb9a6b0 commit ddb2025

6 files changed

Lines changed: 340 additions & 92 deletions

File tree

integrations/lightrag-memgraph/README.md

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,102 @@ powerful querying and analysis. Ideal for building knowledge graphs, improving
1111
data discovery, and leveraging advanced AI techniques on top of your domain
1212
data.
1313

14-
## Notes
14+
## General Notes
1515

1616
- Entity/relationship extraction is high-quality, but also high-cost and
1717
relatively slow.
1818
- The goal over time is to expose time and cost metrics (e.g., $ per your
1919
specific document page or chunk).
20+
21+
## Quick start
22+
23+
**Prerequisites:** [Memgraph](https://memgraph.com/docs/getting-started) running
24+
*(default `bolt://localhost:7687`), and an LLM API key (e.g. `OPENAI_API_KEY` or
25+
*`ANTHROPIC_API_KEY`).
26+
27+
**Install:**
28+
29+
```bash
30+
pip install lightrag-memgraph
31+
```
32+
33+
**Minimal example** (async): create the wrapper, initialize with a working
34+
*directory, insert text, then finalize.
35+
36+
```python
37+
import asyncio
38+
from lightrag_memgraph import MemgraphLightRAGWrapper
39+
40+
async def main():
41+
wrapper = MemgraphLightRAGWrapper(disable_embeddings=True)
42+
await wrapper.initialize(working_dir="./lightrag_storage")
43+
await wrapper.ainsert(input="Your document text here.", file_paths=["doc1"])
44+
# optional: rag = wrapper.get_lightrag(); print(await rag.get_graph_labels())
45+
await wrapper.afinalize()
46+
47+
asyncio.run(main())
48+
```
49+
50+
See `example.py` in this repo for a full run with sample texts and graph output.
51+
52+
## Using Anthropic (Claude) as the LLM
53+
54+
LightRAG supports Claude via the `lightrag.llm.anthropic` module. Set your API
55+
key and pass the LLM function and model name when initializing the wrapper. The
56+
list of Anthropic models is available under
57+
https://platform.claude.com/docs/en/about-claude/models.
58+
59+
1. **Set the API key** (required for Claude):
60+
61+
```bash
62+
export ANTHROPIC_API_KEY="your-anthropic-api-key"
63+
```
64+
65+
2. **Use Anthropic in code** by passing `llm_model_func` and `llm_model_name` to `initialize()`:
66+
67+
```python
68+
from lightrag.llm.anthropic import anthropic_complete
69+
from lightrag_memgraph import MemgraphLightRAGWrapper
70+
71+
wrapper = MemgraphLightRAGWrapper(disable_embeddings=True) # or set embedding_func
72+
await wrapper.initialize(
73+
working_dir="./lightrag_storage",
74+
llm_model_func=anthropic_complete,
75+
llm_model_name="claude-3-5-sonnet-20241022", # or claude-3-haiku-20240307, etc.
76+
)
77+
```
78+
79+
Preset functions are also available: `claude_3_opus_complete`,
80+
`claude_3_sonnet_complete`, `claude_3_haiku_complete` (fixed older model
81+
IDs). For current models, use `anthropic_complete` with the desired
82+
`llm_model_name`.
83+
84+
3. **Embeddings**: Anthropic does not provide embeddings. Either use
85+
`disable_embeddings=True` (as above), or set `embedding_func` to another
86+
provider (e.g. `openai_embed` from `lightrag.llm.openai` with `OPENAI_API_KEY`,
87+
or Voyage AI via `lightrag.llm.anthropic.anthropic_embed` with
88+
`VOYAGE_API_KEY`).
89+
90+
## Using OpenAI as the LLM
91+
92+
Set your API key and optionally choose a model.
93+
94+
1. **Set the API key**:
95+
96+
```bash
97+
export OPENAI_API_KEY="your-openai-api-key"
98+
```
99+
100+
3. **Use a specific OpenAI model** by passing `llm_model_func` and optionally `llm_model_name`:
101+
102+
```python
103+
from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed
104+
from lightrag_memgraph import MemgraphLightRAGWrapper
105+
106+
wrapper = MemgraphLightRAGWrapper()
107+
await wrapper.initialize(
108+
working_dir="./lightrag_storage",
109+
llm_model_func=gpt_4o_mini_complete,
110+
embedding_func=openai_embed,
111+
)
112+
```

integrations/lightrag-memgraph/example.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import asyncio
66
from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed
7+
from lightrag.llm.anthropic import anthropic_complete
78
import shutil
89

910
from lightrag_memgraph import MemgraphLightRAGWrapper
@@ -71,6 +72,8 @@ async def main():
7172
await lightrag_wrapper.initialize(
7273
working_dir=WORKING_DIR,
7374
max_parallel_insert=8,
75+
llm_model_func=anthropic_complete,
76+
llm_model_name="claude-haiku-4-5",
7477
)
7578

7679
total_time = 0.0

integrations/lightrag-memgraph/pyproject.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "lightrag-memgraph"
3-
version = "0.1.3"
3+
version = "0.1.4"
44
description = "LightRAG integration with Memgraph"
55
readme = "README.md"
66
requires-python = ">=3.10"
@@ -22,9 +22,11 @@ classifiers = [
2222
]
2323

2424
dependencies = [
25-
"lightrag-hku[api]==1.4.8.2",
25+
"lightrag-hku[api]>=1.4.9.11",
2626
"memgraph-toolbox>=0.1.8",
2727
"numpy>=1.21.0",
28+
"anthropic>=0.18.0",
29+
"voyageai>=0.2.0",
2830
]
2931

3032
[project.optional-dependencies]

integrations/lightrag-memgraph/src/lightrag_memgraph/__init__.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,120 @@
66

77
from .core import MemgraphLightRAGWrapper
88

9+
10+
# Patch lightrag.llm.anthropic for current Anthropic SDK:
11+
# - require max_tokens; use top-level system= (not "system" role in messages).
12+
def _patch_anthropic() -> None:
13+
try:
14+
import os
15+
import logging
16+
from typing import Any, Union
17+
from collections.abc import AsyncIterator
18+
19+
import lightrag.llm.anthropic as _mod
20+
from anthropic import (
21+
AsyncAnthropic,
22+
APIConnectionError,
23+
RateLimitError,
24+
APITimeoutError,
25+
)
26+
from tenacity import (
27+
retry,
28+
stop_after_attempt,
29+
wait_exponential,
30+
retry_if_exception_type,
31+
)
32+
from lightrag.utils import safe_unicode_decode, logger, VERBOSE_DEBUG
33+
from lightrag.api import __api_version__
34+
35+
_orig = _mod.anthropic_complete_if_cache
36+
if getattr(_orig, "_lightrag_memgraph_patched", False):
37+
return
38+
39+
@retry(
40+
stop=stop_after_attempt(3),
41+
wait=wait_exponential(multiplier=1, min=4, max=10),
42+
retry=retry_if_exception_type(
43+
(RateLimitError, APIConnectionError, APITimeoutError)
44+
),
45+
)
46+
async def _wrapped(
47+
model: str,
48+
prompt: str,
49+
system_prompt: str | None = None,
50+
history_messages: list[dict[str, Any]] | None = None,
51+
enable_cot: bool = False,
52+
base_url: str | None = None,
53+
api_key: str | None = None,
54+
**kwargs: Any,
55+
) -> Union[str, AsyncIterator[str]]:
56+
if history_messages is None:
57+
history_messages = []
58+
kwargs.setdefault("max_tokens", 4096)
59+
if not api_key:
60+
api_key = os.environ.get("ANTHROPIC_API_KEY")
61+
62+
default_headers = {
63+
"User-Agent": f"Mozilla/5.0 LightRAG/{__api_version__}",
64+
"Content-Type": "application/json",
65+
}
66+
kwargs.pop("hashing_kv", None)
67+
kwargs.pop("keyword_extraction", None)
68+
timeout = kwargs.pop("timeout", None)
69+
70+
client = (
71+
AsyncAnthropic(
72+
default_headers=default_headers, api_key=api_key, timeout=timeout
73+
)
74+
if base_url is None
75+
else AsyncAnthropic(
76+
base_url=base_url,
77+
default_headers=default_headers,
78+
api_key=api_key,
79+
timeout=timeout,
80+
)
81+
)
82+
83+
# API expects top-level system=, not a message with role "system"
84+
messages: list[dict[str, Any]] = list(history_messages)
85+
messages.append({"role": "user", "content": prompt})
86+
87+
create_kwargs: dict[str, Any] = {
88+
"model": model,
89+
"messages": messages,
90+
"stream": True,
91+
**kwargs,
92+
}
93+
if system_prompt:
94+
create_kwargs["system"] = system_prompt
95+
96+
if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
97+
logging.getLogger("anthropic").setLevel(logging.INFO)
98+
99+
response = await client.messages.create(**create_kwargs)
100+
101+
# Consume stream and return a single string (caller expects str, not AsyncIterator)
102+
# Only content_block_delta events have delta.text; message_delta etc. have no .text
103+
parts: list[str] = []
104+
async for event in response:
105+
content = (
106+
getattr(getattr(event, "delta", None), "text", None)
107+
if hasattr(event, "delta")
108+
else None
109+
)
110+
if not content:
111+
continue
112+
if r"\u" in content:
113+
content = safe_unicode_decode(content.encode("utf-8"))
114+
parts.append(content)
115+
return "".join(parts)
116+
117+
_wrapped._lightrag_memgraph_patched = True # type: ignore[attr-defined]
118+
_mod.anthropic_complete_if_cache = _wrapped
119+
except Exception:
120+
pass
121+
122+
123+
_patch_anthropic()
124+
9125
__all__ = ["MemgraphLightRAGWrapper"]

integrations/lightrag-memgraph/src/lightrag_memgraph/core.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from lightrag import LightRAG
66
from lightrag.kg.shared_storage import initialize_pipeline_status
77
from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed
8-
from lightrag.utils import setup_logger
8+
from lightrag.utils import EmbeddingFunc, setup_logger
99
import numpy as np
1010

1111

@@ -15,12 +15,13 @@
1515
os.environ["MEMGRAPH_URI"] = MEMGRAPH_URL
1616

1717

18-
class DummyEmbed:
19-
def __init__(self, dim: int = 1):
20-
self.embedding_dim = dim
18+
def _dummy_embedding_func(dim: int = 1) -> EmbeddingFunc:
19+
"""Build an EmbeddingFunc that returns constant embeddings (for disable_embeddings=True)."""
2120

22-
async def __call__(self, texts: list[str]) -> np.ndarray:
23-
return np.ones((len(texts), self.embedding_dim), dtype=float)
21+
async def _dummy_embed_func(texts: list[str]) -> np.ndarray:
22+
return np.ones((len(texts), dim), dtype=float)
23+
24+
return EmbeddingFunc(embedding_dim=dim, func=_dummy_embed_func)
2425

2526

2627
class MemgraphLightRAGWrapper:
@@ -40,7 +41,7 @@ async def initialize(self, **lightrag_kwargs) -> None:
4041
logging.getLogger("nano-vectordb").setLevel(self.log_level)
4142
logging.getLogger("pikepdf").setLevel(self.log_level)
4243
if self.disable_embeddings:
43-
lightrag_kwargs["embedding_func"] = DummyEmbed(dim=1)
44+
lightrag_kwargs["embedding_func"] = _dummy_embedding_func(dim=1)
4445
lightrag_kwargs["vector_storage"] = "NanoVectorDBStorage"
4546
if "working_dir" in lightrag_kwargs:
4647
working_dir = lightrag_kwargs["working_dir"]

0 commit comments

Comments
 (0)