Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
428 changes: 428 additions & 0 deletions AUDIT_COMPLETION_SUMMARY.md

Large diffs are not rendered by default.

1,320 changes: 1,320 additions & 0 deletions CODE_AUDIT_REPORT.md

Large diffs are not rendered by default.

566 changes: 566 additions & 0 deletions PARALLEL_AGENT_TASKS.md

Large diffs are not rendered by default.

153 changes: 153 additions & 0 deletions config/documentation_sources.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
# Documentation Sources Configuration
#
# This file defines documentation sources for various technologies that can be
# automatically fetched and indexed by RAG-CLI's project indexer.
#
# Each source has:
# - name: Technology name
# - url: Documentation URL
# - priority: Fetch priority (1=highest)
# - doc_type: Type of documentation (official, tutorial, community)
# - enabled: Whether to fetch this source

documentation_sources:
# Programming Languages
Python:
- url: "https://docs.python.org/3/"
priority: 1
doc_type: "official"
enabled: true
- url: "https://realpython.com/"
priority: 2
doc_type: "tutorial"
enabled: true

JavaScript:
- url: "https://developer.mozilla.org/en-US/docs/Web/JavaScript"
priority: 1
doc_type: "official"
enabled: true
- url: "https://javascript.info/"
priority: 2
doc_type: "tutorial"
enabled: true

TypeScript:
- url: "https://www.typescriptlang.org/docs/"
priority: 1
doc_type: "official"
enabled: true

Rust:
- url: "https://doc.rust-lang.org/book/"
priority: 1
doc_type: "official"
enabled: true
- url: "https://rust-lang.github.io/async-book/"
priority: 2
doc_type: "official"
enabled: true

Go:
- url: "https://go.dev/doc/"
priority: 1
doc_type: "official"
enabled: true

Java:
- url: "https://docs.oracle.com/javase/tutorial/"
priority: 1
doc_type: "official"
enabled: true

# Web Frameworks
Django:
- url: "https://docs.djangoproject.com/"
priority: 1
doc_type: "official"
enabled: true

Flask:
- url: "https://flask.palletsprojects.com/"
priority: 1
doc_type: "official"
enabled: true

FastAPI:
- url: "https://fastapi.tiangolo.com/"
priority: 1
doc_type: "official"
enabled: true

React:
- url: "https://react.dev/"
priority: 1
doc_type: "official"
enabled: true

"Vue.js":
- url: "https://vuejs.org/guide/"
priority: 1
doc_type: "official"
enabled: true

Angular:
- url: "https://angular.io/docs"
priority: 1
doc_type: "official"
enabled: true

Express:
- url: "https://expressjs.com/"
priority: 1
doc_type: "official"
enabled: true

"Next.js":
- url: "https://nextjs.org/docs"
priority: 1
doc_type: "official"
enabled: true

# AI/ML Libraries
LangChain:
- url: "https://python.langchain.com/docs/"
priority: 1
doc_type: "official"
enabled: true

"Anthropic SDK":
- url: "https://docs.anthropic.com/"
priority: 1
doc_type: "official"
enabled: true

FAISS:
- url: "https://github.com/facebookresearch/faiss/wiki"
priority: 1
doc_type: "official"
enabled: true

NumPy:
- url: "https://numpy.org/doc/stable/"
priority: 1
doc_type: "official"
enabled: true

Pandas:
- url: "https://pandas.pydata.org/docs/"
priority: 1
doc_type: "official"
enabled: true

PyTorch:
- url: "https://pytorch.org/docs/stable/index.html"
priority: 1
doc_type: "official"
enabled: true

TensorFlow:
- url: "https://www.tensorflow.org/api_docs"
priority: 1
doc_type: "official"
enabled: true
9 changes: 6 additions & 3 deletions src/rag_cli/agents/maf/core/agent_communication.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,14 @@
import asyncio
import logging
import uuid
from collections import deque
from dataclasses import asdict, dataclass, field
from datetime import datetime, timezone
from enum import Enum
from typing import Any, Dict, List, Optional, Set

from rag_cli.core.constants import MAX_EVENT_HISTORY


class CommunicationType(Enum):
"""Types of inter-agent communication"""
Expand Down Expand Up @@ -85,8 +88,8 @@ def __init__(self):
constraints=[]
)

# Communication history
self.message_history: List[AgentMessage] = []
# Communication history (bounded to prevent memory leaks)
self.message_history = deque(maxlen=1000) # Keep last 1000 messages
self.conversation_threads: Dict[str, List[str]] = {}

# Task coordination
Expand Down Expand Up @@ -392,7 +395,7 @@ class CommunicativeAgent:
def __init__(self):
self.communication_hub: Optional[AgentCommunicationHub] = None
self.agent_id: str = ""
self.received_messages: List[AgentMessage] = []
self.received_messages = deque(maxlen=MAX_EVENT_HISTORY) # Bounded to prevent memory leaks

def connect_to_hub(self, hub: AgentCommunicationHub, agent_id: str, expertise: List[str] = None):
"""Connect this agent to the communication hub"""
Expand Down
16 changes: 9 additions & 7 deletions src/rag_cli/agents/maf/core/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def encode(self, texts: List[str]) -> np.ndarray:
embeddings = []
for text in texts:
# Generate deterministic mock embedding from text hash
hash_obj = hashlib.md5(text.encode())
hash_obj = hashlib.blake2b(text.encode(), digest_size=16)
hash_bytes = hash_obj.digest()

# Convert to floats
Expand Down Expand Up @@ -201,8 +201,9 @@ async def store(self, memory_data: Dict[str, Any]) -> str:
"""Store a memory"""

# Generate ID
memory_id = hashlib.md5(
f"{memory_data.get('content', '')}{time.time()}".encode()
memory_id = hashlib.blake2b(
f"{memory_data.get('content', '')}{time.time()}".encode(),
digest_size=16
).hexdigest()

# Extract content
Expand Down Expand Up @@ -275,7 +276,7 @@ async def search(self, query: str, limit: Optional[int] = None) -> List[Memory]:
self.logger.debug("Searching for: %s...", query[:100])

# Check cache first
cache_key = hashlib.md5(f"{query}{limit}".encode()).hexdigest()
cache_key = hashlib.blake2b(f"{query}{limit}".encode(), digest_size=16).hexdigest()
if cache_key in self.memory_cache:
self.cache_hits += 1
self.logger.debug("Cache hit for query")
Expand Down Expand Up @@ -437,7 +438,7 @@ async def consolidate(self):
summary = f"Consolidated {len(old_memories)} memories from before {cutoff_date}"

# Store consolidation
consolidation_id = hashlib.md5(f"{summary}{time.time()}".encode()).hexdigest()
consolidation_id = hashlib.blake2b(f"{summary}{time.time()}".encode(), digest_size=16).hexdigest()

cursor.execute('''
INSERT INTO consolidations (id, summary, memory_ids, timestamp)
Expand All @@ -450,9 +451,10 @@ async def consolidate(self):
))

# Delete old memories
cursor.execute('''
placeholders = ','.join('?' * len(memory_ids))
cursor.execute(f'''
DELETE FROM memories
WHERE id IN ({','.join(['?'] * len(memory_ids))})
WHERE id IN ({placeholders})
''', memory_ids)

conn.commit()
Expand Down
16 changes: 8 additions & 8 deletions src/rag_cli/agents/maf/core/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import logging
import time
import uuid
from collections import Counter
from collections import Counter, deque
from dataclasses import dataclass
from datetime import datetime, timezone
from enum import Enum
Expand Down Expand Up @@ -94,7 +94,7 @@ def __init__(self, agents: Dict[str, Any], message_bus: Any = None):
self.total_workflows = 0
self.completed_workflows = 0
self.failed_workflows = 0
self.workflow_history = []
self.workflow_history = deque(maxlen=1000) # Bounded to prevent memory leaks

self.logger.info("Orchestrator initialized with %s agents", len(agents))

Expand Down Expand Up @@ -626,19 +626,19 @@ def check_maf_status() -> Dict[str, Any]:
"""
try:
# Try to import and initialize orchestrator
from agents.maf.core.agent import AgentRegistry
from agents.maf.core.orchestrator import WorkflowOrchestrator
from rag_cli.agents.maf.core.agent import AgentRegistry
from rag_cli.agents.maf.core.orchestrator import WorkflowOrchestrator

# Get agent registry
registry = AgentRegistry()
agents = registry.get_all_agents()

# Create orchestrator instance
orchestrator = WorkflowOrchestrator(agents=agents)

# Get stats
stats = orchestrator.get_stats()

return {
'available': True,
'orchestrator_initialized': True,
Expand Down
4 changes: 2 additions & 2 deletions src/rag_cli/agents/maf/core/task_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
Automatically determines the appropriate workflow and agents based on task description
"""

from typing import Dict, List, Optional
from typing import Dict, List, Optional, Any
from dataclasses import dataclass


Expand Down Expand Up @@ -247,7 +247,7 @@ async def get_claude_classification(self, task_description: str, claude_cli) ->

return None

def get_task_summary(self, classification: TaskClassification, task_description: str) -> Dict[str, any]:
def get_task_summary(self, classification: TaskClassification, task_description: str) -> Dict[str, Any]:
"""Generate a summary of the task classification"""

return {
Expand Down
8 changes: 6 additions & 2 deletions src/rag_cli/agents/query_decomposer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import re
import asyncio
import threading
from typing import List, Dict, Any, Optional, Tuple
from dataclasses import dataclass
from enum import Enum
Expand Down Expand Up @@ -456,18 +457,21 @@ def _create_result(

# Singleton instance
_decomposer: Optional[QueryDecomposer] = None
_decomposer_lock = threading.Lock()


def get_query_decomposer() -> QueryDecomposer:
"""Get or create the global query decomposer instance.
"""Get or create the global query decomposer instance with thread-safe initialization.

Returns:
Query decomposer instance
"""
global _decomposer

if _decomposer is None:
_decomposer = QueryDecomposer()
with _decomposer_lock:
if _decomposer is None:
_decomposer = QueryDecomposer()

return _decomposer

Expand Down
7 changes: 4 additions & 3 deletions src/rag_cli/agents/result_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import hashlib

from rag_cli.core.retrieval_pipeline import RetrievalResult
from rag_cli.core.constants import SIMILARITY_THRESHOLD
from rag_cli.agents.query_decomposer import SubQuery
from rag_cli.utils.logger import get_logger

Expand All @@ -44,7 +45,7 @@ class ResultSynthesizer:

def __init__(self):
"""Initialize result synthesizer."""
self.similarity_threshold = 0.85 # For deduplication
self.similarity_threshold = SIMILARITY_THRESHOLD # For deduplication
self.max_merged_results = 15 # Limit final result set

logger.info(
Expand Down Expand Up @@ -161,7 +162,7 @@ def _deduplicate_results(

for sq_idx, result in results_with_source:
# Create hash of text content
text_hash = hashlib.md5(result.text.encode()).hexdigest()
text_hash = hashlib.blake2b(result.text.encode(), digest_size=16).hexdigest()

# Check exact duplicate
if text_hash in seen_hashes:
Expand Down Expand Up @@ -371,7 +372,7 @@ async def test_synthesizer():
print("Testing Result Synthesizer...")
print("=" * 70)

from agents.query_decomposer import SubQuery
from rag_cli.agents.query_decomposer import SubQuery

synthesizer = get_result_synthesizer()

Expand Down
Loading
Loading