Skip to content

Commit e25164d

Browse files
authored
Merge pull request #1 from ItMeDiaTech/claude/code-review-audit-011CUvC2jETLr1vpep6v8gAj
Review project for bugs and code issues
2 parents 4ec6d6f + 09ef953 commit e25164d

51 files changed

Lines changed: 3702 additions & 762 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

AUDIT_COMPLETION_SUMMARY.md

Lines changed: 428 additions & 0 deletions
Large diffs are not rendered by default.

CODE_AUDIT_REPORT.md

Lines changed: 1320 additions & 0 deletions
Large diffs are not rendered by default.

PARALLEL_AGENT_TASKS.md

Lines changed: 566 additions & 0 deletions
Large diffs are not rendered by default.

config/documentation_sources.yaml

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
# Documentation Sources Configuration
2+
#
3+
# This file defines documentation sources for various technologies that can be
4+
# automatically fetched and indexed by RAG-CLI's project indexer.
5+
#
6+
# Each source has:
7+
# - name: Technology name
8+
# - url: Documentation URL
9+
# - priority: Fetch priority (1=highest)
10+
# - doc_type: Type of documentation (official, tutorial, community)
11+
# - enabled: Whether to fetch this source
12+
13+
documentation_sources:
14+
# Programming Languages
15+
Python:
16+
- url: "https://docs.python.org/3/"
17+
priority: 1
18+
doc_type: "official"
19+
enabled: true
20+
- url: "https://realpython.com/"
21+
priority: 2
22+
doc_type: "tutorial"
23+
enabled: true
24+
25+
JavaScript:
26+
- url: "https://developer.mozilla.org/en-US/docs/Web/JavaScript"
27+
priority: 1
28+
doc_type: "official"
29+
enabled: true
30+
- url: "https://javascript.info/"
31+
priority: 2
32+
doc_type: "tutorial"
33+
enabled: true
34+
35+
TypeScript:
36+
- url: "https://www.typescriptlang.org/docs/"
37+
priority: 1
38+
doc_type: "official"
39+
enabled: true
40+
41+
Rust:
42+
- url: "https://doc.rust-lang.org/book/"
43+
priority: 1
44+
doc_type: "official"
45+
enabled: true
46+
- url: "https://rust-lang.github.io/async-book/"
47+
priority: 2
48+
doc_type: "official"
49+
enabled: true
50+
51+
Go:
52+
- url: "https://go.dev/doc/"
53+
priority: 1
54+
doc_type: "official"
55+
enabled: true
56+
57+
Java:
58+
- url: "https://docs.oracle.com/javase/tutorial/"
59+
priority: 1
60+
doc_type: "official"
61+
enabled: true
62+
63+
# Web Frameworks
64+
Django:
65+
- url: "https://docs.djangoproject.com/"
66+
priority: 1
67+
doc_type: "official"
68+
enabled: true
69+
70+
Flask:
71+
- url: "https://flask.palletsprojects.com/"
72+
priority: 1
73+
doc_type: "official"
74+
enabled: true
75+
76+
FastAPI:
77+
- url: "https://fastapi.tiangolo.com/"
78+
priority: 1
79+
doc_type: "official"
80+
enabled: true
81+
82+
React:
83+
- url: "https://react.dev/"
84+
priority: 1
85+
doc_type: "official"
86+
enabled: true
87+
88+
"Vue.js":
89+
- url: "https://vuejs.org/guide/"
90+
priority: 1
91+
doc_type: "official"
92+
enabled: true
93+
94+
Angular:
95+
- url: "https://angular.io/docs"
96+
priority: 1
97+
doc_type: "official"
98+
enabled: true
99+
100+
Express:
101+
- url: "https://expressjs.com/"
102+
priority: 1
103+
doc_type: "official"
104+
enabled: true
105+
106+
"Next.js":
107+
- url: "https://nextjs.org/docs"
108+
priority: 1
109+
doc_type: "official"
110+
enabled: true
111+
112+
# AI/ML Libraries
113+
LangChain:
114+
- url: "https://python.langchain.com/docs/"
115+
priority: 1
116+
doc_type: "official"
117+
enabled: true
118+
119+
"Anthropic SDK":
120+
- url: "https://docs.anthropic.com/"
121+
priority: 1
122+
doc_type: "official"
123+
enabled: true
124+
125+
FAISS:
126+
- url: "https://github.com/facebookresearch/faiss/wiki"
127+
priority: 1
128+
doc_type: "official"
129+
enabled: true
130+
131+
NumPy:
132+
- url: "https://numpy.org/doc/stable/"
133+
priority: 1
134+
doc_type: "official"
135+
enabled: true
136+
137+
Pandas:
138+
- url: "https://pandas.pydata.org/docs/"
139+
priority: 1
140+
doc_type: "official"
141+
enabled: true
142+
143+
PyTorch:
144+
- url: "https://pytorch.org/docs/stable/index.html"
145+
priority: 1
146+
doc_type: "official"
147+
enabled: true
148+
149+
TensorFlow:
150+
- url: "https://www.tensorflow.org/api_docs"
151+
priority: 1
152+
doc_type: "official"
153+
enabled: true

src/rag_cli/agents/maf/core/agent_communication.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,14 @@
77
import asyncio
88
import logging
99
import uuid
10+
from collections import deque
1011
from dataclasses import asdict, dataclass, field
1112
from datetime import datetime, timezone
1213
from enum import Enum
1314
from typing import Any, Dict, List, Optional, Set
1415

16+
from rag_cli.core.constants import MAX_EVENT_HISTORY
17+
1518

1619
class CommunicationType(Enum):
1720
"""Types of inter-agent communication"""
@@ -85,8 +88,8 @@ def __init__(self):
8588
constraints=[]
8689
)
8790

88-
# Communication history
89-
self.message_history: List[AgentMessage] = []
91+
# Communication history (bounded to prevent memory leaks)
92+
self.message_history = deque(maxlen=1000) # Keep last 1000 messages
9093
self.conversation_threads: Dict[str, List[str]] = {}
9194

9295
# Task coordination
@@ -392,7 +395,7 @@ class CommunicativeAgent:
392395
def __init__(self):
393396
self.communication_hub: Optional[AgentCommunicationHub] = None
394397
self.agent_id: str = ""
395-
self.received_messages: List[AgentMessage] = []
398+
self.received_messages = deque(maxlen=MAX_EVENT_HISTORY) # Bounded to prevent memory leaks
396399

397400
def connect_to_hub(self, hub: AgentCommunicationHub, agent_id: str, expertise: List[str] = None):
398401
"""Connect this agent to the communication hub"""

src/rag_cli/agents/maf/core/memory.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def encode(self, texts: List[str]) -> np.ndarray:
8484
embeddings = []
8585
for text in texts:
8686
# Generate deterministic mock embedding from text hash
87-
hash_obj = hashlib.md5(text.encode())
87+
hash_obj = hashlib.blake2b(text.encode(), digest_size=16)
8888
hash_bytes = hash_obj.digest()
8989

9090
# Convert to floats
@@ -201,8 +201,9 @@ async def store(self, memory_data: Dict[str, Any]) -> str:
201201
"""Store a memory"""
202202

203203
# Generate ID
204-
memory_id = hashlib.md5(
205-
f"{memory_data.get('content', '')}{time.time()}".encode()
204+
memory_id = hashlib.blake2b(
205+
f"{memory_data.get('content', '')}{time.time()}".encode(),
206+
digest_size=16
206207
).hexdigest()
207208

208209
# Extract content
@@ -275,7 +276,7 @@ async def search(self, query: str, limit: Optional[int] = None) -> List[Memory]:
275276
self.logger.debug("Searching for: %s...", query[:100])
276277

277278
# Check cache first
278-
cache_key = hashlib.md5(f"{query}{limit}".encode()).hexdigest()
279+
cache_key = hashlib.blake2b(f"{query}{limit}".encode(), digest_size=16).hexdigest()
279280
if cache_key in self.memory_cache:
280281
self.cache_hits += 1
281282
self.logger.debug("Cache hit for query")
@@ -437,7 +438,7 @@ async def consolidate(self):
437438
summary = f"Consolidated {len(old_memories)} memories from before {cutoff_date}"
438439

439440
# Store consolidation
440-
consolidation_id = hashlib.md5(f"{summary}{time.time()}".encode()).hexdigest()
441+
consolidation_id = hashlib.blake2b(f"{summary}{time.time()}".encode(), digest_size=16).hexdigest()
441442

442443
cursor.execute('''
443444
INSERT INTO consolidations (id, summary, memory_ids, timestamp)
@@ -450,9 +451,10 @@ async def consolidate(self):
450451
))
451452

452453
# Delete old memories
453-
cursor.execute('''
454+
placeholders = ','.join('?' * len(memory_ids))
455+
cursor.execute(f'''
454456
DELETE FROM memories
455-
WHERE id IN ({','.join(['?'] * len(memory_ids))})
457+
WHERE id IN ({placeholders})
456458
''', memory_ids)
457459

458460
conn.commit()

src/rag_cli/agents/maf/core/orchestrator.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import logging
77
import time
88
import uuid
9-
from collections import Counter
9+
from collections import Counter, deque
1010
from dataclasses import dataclass
1111
from datetime import datetime, timezone
1212
from enum import Enum
@@ -94,7 +94,7 @@ def __init__(self, agents: Dict[str, Any], message_bus: Any = None):
9494
self.total_workflows = 0
9595
self.completed_workflows = 0
9696
self.failed_workflows = 0
97-
self.workflow_history = []
97+
self.workflow_history = deque(maxlen=1000) # Bounded to prevent memory leaks
9898

9999
self.logger.info("Orchestrator initialized with %s agents", len(agents))
100100

@@ -626,19 +626,19 @@ def check_maf_status() -> Dict[str, Any]:
626626
"""
627627
try:
628628
# Try to import and initialize orchestrator
629-
from agents.maf.core.agent import AgentRegistry
630-
from agents.maf.core.orchestrator import WorkflowOrchestrator
631-
629+
from rag_cli.agents.maf.core.agent import AgentRegistry
630+
from rag_cli.agents.maf.core.orchestrator import WorkflowOrchestrator
631+
632632
# Get agent registry
633633
registry = AgentRegistry()
634634
agents = registry.get_all_agents()
635-
635+
636636
# Create orchestrator instance
637637
orchestrator = WorkflowOrchestrator(agents=agents)
638-
638+
639639
# Get stats
640640
stats = orchestrator.get_stats()
641-
641+
642642
return {
643643
'available': True,
644644
'orchestrator_initialized': True,

src/rag_cli/agents/maf/core/task_classifier.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
Automatically determines the appropriate workflow and agents based on task description
55
"""
66

7-
from typing import Dict, List, Optional
7+
from typing import Dict, List, Optional, Any
88
from dataclasses import dataclass
99

1010

@@ -247,7 +247,7 @@ async def get_claude_classification(self, task_description: str, claude_cli) ->
247247

248248
return None
249249

250-
def get_task_summary(self, classification: TaskClassification, task_description: str) -> Dict[str, any]:
250+
def get_task_summary(self, classification: TaskClassification, task_description: str) -> Dict[str, Any]:
251251
"""Generate a summary of the task classification"""
252252

253253
return {

src/rag_cli/agents/query_decomposer.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import re
2020
import asyncio
21+
import threading
2122
from typing import List, Dict, Any, Optional, Tuple
2223
from dataclasses import dataclass
2324
from enum import Enum
@@ -456,18 +457,21 @@ def _create_result(
456457

457458
# Singleton instance
458459
_decomposer: Optional[QueryDecomposer] = None
460+
_decomposer_lock = threading.Lock()
459461

460462

461463
def get_query_decomposer() -> QueryDecomposer:
462-
"""Get or create the global query decomposer instance.
464+
"""Get or create the global query decomposer instance with thread-safe initialization.
463465
464466
Returns:
465467
Query decomposer instance
466468
"""
467469
global _decomposer
468470

469471
if _decomposer is None:
470-
_decomposer = QueryDecomposer()
472+
with _decomposer_lock:
473+
if _decomposer is None:
474+
_decomposer = QueryDecomposer()
471475

472476
return _decomposer
473477

src/rag_cli/agents/result_synthesizer.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import hashlib
2323

2424
from rag_cli.core.retrieval_pipeline import RetrievalResult
25+
from rag_cli.core.constants import SIMILARITY_THRESHOLD
2526
from rag_cli.agents.query_decomposer import SubQuery
2627
from rag_cli.utils.logger import get_logger
2728

@@ -44,7 +45,7 @@ class ResultSynthesizer:
4445

4546
def __init__(self):
4647
"""Initialize result synthesizer."""
47-
self.similarity_threshold = 0.85 # For deduplication
48+
self.similarity_threshold = SIMILARITY_THRESHOLD # For deduplication
4849
self.max_merged_results = 15 # Limit final result set
4950

5051
logger.info(
@@ -161,7 +162,7 @@ def _deduplicate_results(
161162

162163
for sq_idx, result in results_with_source:
163164
# Create hash of text content
164-
text_hash = hashlib.md5(result.text.encode()).hexdigest()
165+
text_hash = hashlib.blake2b(result.text.encode(), digest_size=16).hexdigest()
165166

166167
# Check exact duplicate
167168
if text_hash in seen_hashes:
@@ -371,7 +372,7 @@ async def test_synthesizer():
371372
print("Testing Result Synthesizer...")
372373
print("=" * 70)
373374

374-
from agents.query_decomposer import SubQuery
375+
from rag_cli.agents.query_decomposer import SubQuery
375376

376377
synthesizer = get_result_synthesizer()
377378

0 commit comments

Comments
 (0)