ItMeDiaTech · ItMeDiaTech · Nov 9, 2025 · Nov 8, 2025 · Nov 8, 2025 · Nov 8, 2025
diff --git a/AUDIT_COMPLETION_SUMMARY.md b/AUDIT_COMPLETION_SUMMARY.md
diff --git a/CODE_AUDIT_REPORT.md b/CODE_AUDIT_REPORT.md
diff --git a/PARALLEL_AGENT_TASKS.md b/PARALLEL_AGENT_TASKS.md
diff --git a/config/documentation_sources.yaml b/config/documentation_sources.yaml
@@ -0,0 +1,153 @@
+# Documentation Sources Configuration
+#
+# This file defines documentation sources for various technologies that can be
+# automatically fetched and indexed by RAG-CLI's project indexer.
+#
+# Each source has:
+#   - name: Technology name
+#   - url: Documentation URL
+#   - priority: Fetch priority (1=highest)
+#   - doc_type: Type of documentation (official, tutorial, community)
+#   - enabled: Whether to fetch this source
+
+documentation_sources:
+  # Programming Languages
+  Python:
+    - url: "https://docs.python.org/3/"
+      priority: 1
+      doc_type: "official"
+      enabled: true
+    - url: "https://realpython.com/"
+      priority: 2
+      doc_type: "tutorial"
+      enabled: true
+
+  JavaScript:
+    - url: "https://developer.mozilla.org/en-US/docs/Web/JavaScript"
+      priority: 1
+      doc_type: "official"
+      enabled: true
+    - url: "https://javascript.info/"
+      priority: 2
+      doc_type: "tutorial"
+      enabled: true
+
+  TypeScript:
+    - url: "https://www.typescriptlang.org/docs/"
+      priority: 1
+      doc_type: "official"
+      enabled: true
+
+  Rust:
+    - url: "https://doc.rust-lang.org/book/"
+      priority: 1
+      doc_type: "official"
+      enabled: true
+    - url: "https://rust-lang.github.io/async-book/"
+      priority: 2
+      doc_type: "official"
+      enabled: true
+
+  Go:
+    - url: "https://go.dev/doc/"
+      priority: 1
+      doc_type: "official"
+      enabled: true
+
+  Java:
+    - url: "https://docs.oracle.com/javase/tutorial/"
+      priority: 1
+      doc_type: "official"
+      enabled: true
+
+  # Web Frameworks
+  Django:
+    - url: "https://docs.djangoproject.com/"
+      priority: 1
+      doc_type: "official"
+      enabled: true
+
+  Flask:
+    - url: "https://flask.palletsprojects.com/"
+      priority: 1
+      doc_type: "official"
+      enabled: true
+
+  FastAPI:
+    - url: "https://fastapi.tiangolo.com/"
+      priority: 1
+      doc_type: "official"
+      enabled: true
+
+  React:
+    - url: "https://react.dev/"
+      priority: 1
+      doc_type: "official"
+      enabled: true
+
+  "Vue.js":
+    - url: "https://vuejs.org/guide/"
+      priority: 1
+      doc_type: "official"
+      enabled: true
+
+  Angular:
+    - url: "https://angular.io/docs"
+      priority: 1
+      doc_type: "official"
+      enabled: true
+
+  Express:
+    - url: "https://expressjs.com/"
+      priority: 1
+      doc_type: "official"
+      enabled: true
+
+  "Next.js":
+    - url: "https://nextjs.org/docs"
+      priority: 1
+      doc_type: "official"
+      enabled: true
+
+  # AI/ML Libraries
+  LangChain:
+    - url: "https://python.langchain.com/docs/"
+      priority: 1
+      doc_type: "official"
+      enabled: true
+
+  "Anthropic SDK":
+    - url: "https://docs.anthropic.com/"
+      priority: 1
+      doc_type: "official"
+      enabled: true
+
+  FAISS:
+    - url: "https://github.com/facebookresearch/faiss/wiki"
+      priority: 1
+      doc_type: "official"
+      enabled: true
+
+  NumPy:
+    - url: "https://numpy.org/doc/stable/"
+      priority: 1
+      doc_type: "official"
+      enabled: true
+
+  Pandas:
+    - url: "https://pandas.pydata.org/docs/"
+      priority: 1
+      doc_type: "official"
+      enabled: true
+
+  PyTorch:
+    - url: "https://pytorch.org/docs/stable/index.html"
+      priority: 1
+      doc_type: "official"
+      enabled: true
+
+  TensorFlow:
+    - url: "https://www.tensorflow.org/api_docs"
+      priority: 1
+      doc_type: "official"
+      enabled: true
diff --git a/src/rag_cli/agents/maf/core/agent_communication.py b/src/rag_cli/agents/maf/core/agent_communication.py
@@ -7,11 +7,14 @@
 import asyncio
 import logging
 import uuid
+from collections import deque
 from dataclasses import asdict, dataclass, field
 from datetime import datetime, timezone
 from enum import Enum
 from typing import Any, Dict, List, Optional, Set
 
+from rag_cli.core.constants import MAX_EVENT_HISTORY
+
 
 class CommunicationType(Enum):
     """Types of inter-agent communication"""
@@ -85,8 +88,8 @@ def __init__(self):
             constraints=[]
         )
 
-        # Communication history
-        self.message_history: List[AgentMessage] = []
+        # Communication history (bounded to prevent memory leaks)
+        self.message_history = deque(maxlen=1000)  # Keep last 1000 messages
         self.conversation_threads: Dict[str, List[str]] = {}
 
         # Task coordination
@@ -392,7 +395,7 @@ class CommunicativeAgent:
     def __init__(self):
         self.communication_hub: Optional[AgentCommunicationHub] = None
         self.agent_id: str = ""
-        self.received_messages: List[AgentMessage] = []
+        self.received_messages = deque(maxlen=MAX_EVENT_HISTORY)  # Bounded to prevent memory leaks
 
     def connect_to_hub(self, hub: AgentCommunicationHub, agent_id: str, expertise: List[str] = None):
         """Connect this agent to the communication hub"""

diff --git a/src/rag_cli/agents/maf/core/memory.py b/src/rag_cli/agents/maf/core/memory.py
@@ -84,7 +84,7 @@ def encode(self, texts: List[str]) -> np.ndarray:
             embeddings = []
             for text in texts:
                 # Generate deterministic mock embedding from text hash
-                hash_obj = hashlib.md5(text.encode())
+                hash_obj = hashlib.blake2b(text.encode(), digest_size=16)
                 hash_bytes = hash_obj.digest()
 
                 # Convert to floats
@@ -201,8 +201,9 @@ async def store(self, memory_data: Dict[str, Any]) -> str:
         """Store a memory"""
 
         # Generate ID
-        memory_id = hashlib.md5(
-            f"{memory_data.get('content', '')}{time.time()}".encode()
+        memory_id = hashlib.blake2b(
+            f"{memory_data.get('content', '')}{time.time()}".encode(),
+            digest_size=16
         ).hexdigest()
 
         # Extract content
@@ -275,7 +276,7 @@ async def search(self, query: str, limit: Optional[int] = None) -> List[Memory]:
         self.logger.debug("Searching for: %s...", query[:100])
 
         # Check cache first
-        cache_key = hashlib.md5(f"{query}{limit}".encode()).hexdigest()
+        cache_key = hashlib.blake2b(f"{query}{limit}".encode(), digest_size=16).hexdigest()
         if cache_key in self.memory_cache:
             self.cache_hits += 1
             self.logger.debug("Cache hit for query")
@@ -437,7 +438,7 @@ async def consolidate(self):
                 summary = f"Consolidated {len(old_memories)} memories from before {cutoff_date}"
 
                 # Store consolidation
-                consolidation_id = hashlib.md5(f"{summary}{time.time()}".encode()).hexdigest()
+                consolidation_id = hashlib.blake2b(f"{summary}{time.time()}".encode(), digest_size=16).hexdigest()
 
                 cursor.execute('''
                     INSERT INTO consolidations (id, summary, memory_ids, timestamp)
@@ -450,9 +451,10 @@ async def consolidate(self):
                 ))
 
                 # Delete old memories
-                cursor.execute('''
+                placeholders = ','.join('?' * len(memory_ids))
+                cursor.execute(f'''
                     DELETE FROM memories
-                    WHERE id IN ({','.join(['?'] * len(memory_ids))})
+                    WHERE id IN ({placeholders})
                 ''', memory_ids)
 
                 conn.commit()

diff --git a/src/rag_cli/agents/maf/core/orchestrator.py b/src/rag_cli/agents/maf/core/orchestrator.py
@@ -6,7 +6,7 @@
 import logging
 import time
 import uuid
-from collections import Counter
+from collections import Counter, deque
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from enum import Enum
@@ -94,7 +94,7 @@ def __init__(self, agents: Dict[str, Any], message_bus: Any = None):
         self.total_workflows = 0
         self.completed_workflows = 0
         self.failed_workflows = 0
-        self.workflow_history = []
+        self.workflow_history = deque(maxlen=1000)  # Bounded to prevent memory leaks
 
         self.logger.info("Orchestrator initialized with %s agents", len(agents))
 
@@ -626,19 +626,19 @@ def check_maf_status() -> Dict[str, Any]:
     """
     try:
         # Try to import and initialize orchestrator
-        from agents.maf.core.agent import AgentRegistry
-        from agents.maf.core.orchestrator import WorkflowOrchestrator
-        
+        from rag_cli.agents.maf.core.agent import AgentRegistry
+        from rag_cli.agents.maf.core.orchestrator import WorkflowOrchestrator
+
         # Get agent registry
         registry = AgentRegistry()
         agents = registry.get_all_agents()
-        
+
         # Create orchestrator instance
         orchestrator = WorkflowOrchestrator(agents=agents)
-        
+
         # Get stats
         stats = orchestrator.get_stats()
-        
+
         return {
             'available': True,
             'orchestrator_initialized': True,

diff --git a/src/rag_cli/agents/maf/core/task_classifier.py b/src/rag_cli/agents/maf/core/task_classifier.py
@@ -4,7 +4,7 @@
 Automatically determines the appropriate workflow and agents based on task description
 """
 
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Any
 from dataclasses import dataclass
 
 
@@ -247,7 +247,7 @@ async def get_claude_classification(self, task_description: str, claude_cli) ->
 
         return None
 
-    def get_task_summary(self, classification: TaskClassification, task_description: str) -> Dict[str, any]:
+    def get_task_summary(self, classification: TaskClassification, task_description: str) -> Dict[str, Any]:
         """Generate a summary of the task classification"""
 
         return {

diff --git a/src/rag_cli/agents/query_decomposer.py b/src/rag_cli/agents/query_decomposer.py
@@ -18,6 +18,7 @@
 
 import re
 import asyncio
+import threading
 from typing import List, Dict, Any, Optional, Tuple
 from dataclasses import dataclass
 from enum import Enum
@@ -456,18 +457,21 @@ def _create_result(
 
 # Singleton instance
 _decomposer: Optional[QueryDecomposer] = None
+_decomposer_lock = threading.Lock()
 
 
 def get_query_decomposer() -> QueryDecomposer:
-    """Get or create the global query decomposer instance.
+    """Get or create the global query decomposer instance with thread-safe initialization.
 
     Returns:
         Query decomposer instance
     """
     global _decomposer
 
     if _decomposer is None:
-        _decomposer = QueryDecomposer()
+        with _decomposer_lock:
+            if _decomposer is None:
+                _decomposer = QueryDecomposer()
 
     return _decomposer
 

diff --git a/src/rag_cli/agents/result_synthesizer.py b/src/rag_cli/agents/result_synthesizer.py
@@ -22,6 +22,7 @@
 import hashlib
 
 from rag_cli.core.retrieval_pipeline import RetrievalResult
+from rag_cli.core.constants import SIMILARITY_THRESHOLD
 from rag_cli.agents.query_decomposer import SubQuery
 from rag_cli.utils.logger import get_logger
 
@@ -44,7 +45,7 @@ class ResultSynthesizer:
 
     def __init__(self):
         """Initialize result synthesizer."""
-        self.similarity_threshold = 0.85  # For deduplication
+        self.similarity_threshold = SIMILARITY_THRESHOLD  # For deduplication
         self.max_merged_results = 15  # Limit final result set
 
         logger.info(
@@ -161,7 +162,7 @@ def _deduplicate_results(
 
         for sq_idx, result in results_with_source:
             # Create hash of text content
-            text_hash = hashlib.md5(result.text.encode()).hexdigest()
+            text_hash = hashlib.blake2b(result.text.encode(), digest_size=16).hexdigest()
 
             # Check exact duplicate
             if text_hash in seen_hashes:
@@ -371,7 +372,7 @@ async def test_synthesizer():
     print("Testing Result Synthesizer...")
     print("=" * 70)
 
-    from agents.query_decomposer import SubQuery
+    from rag_cli.agents.query_decomposer import SubQuery
 
     synthesizer = get_result_synthesizer()