add radix cache hit rate

sufubao · sufubao · commit ee5c4df6d37b · 2025-12-10T09:15:48.000Z
diff --git a/lightllm/server/router/dynamic_prompt/radix_cache.py b/lightllm/server/router/dynamic_prompt/radix_cache.py
@@ -125,6 +125,16 @@ def __init__(self, unique_name, total_token_num, rank_in_node, mem_manager=None)
         )
         self.tree_total_tokens_num.arr[0] = 0
 
+        # Hit rate tracking
+        self.match_prefix_total_calls = SharedArray(
+            f"{unique_name}_match_prefix_total_calls_{rank_in_node}", (1,), dtype=np.int64
+        )
+        self.match_prefix_total_calls.arr[0] = 0
+        self.match_prefix_hit_tokens = SharedArray(
+            f"{unique_name}_match_prefix_hit_tokens_{rank_in_node}", (1,), dtype=np.int64
+        )
+        self.match_prefix_hit_tokens.arr[0] = 0
+
     def insert(self, key, value=None, buffer_idx=None) -> Tuple[int, Optional[TreeNode]]:
         if value is None:
             value = key
@@ -232,13 +242,21 @@ def _insert_helper_no_recursion(
 
     def match_prefix(self, key, update_refs=False):
         assert len(key) != 0
+
+        # Track total calls
+        self.match_prefix_total_calls.arr[0] += 1
+
         ans_value_list = []
         tree_node = self._match_prefix_helper(self.root_node, key, ans_value_list, update_refs=update_refs)
         if tree_node != self.root_node:
             if len(ans_value_list) != 0:
                 value = torch.concat(ans_value_list)
             else:
                 value = torch.zeros((0,), device="cpu", dtype=self._value_dtype)
+
+            # Track hit tokens
+            self.match_prefix_hit_tokens.arr[0] += len(value)
+
             return tree_node, len(value), value
         else:
             self.dec_node_ref_counter(self.root_node)
@@ -509,6 +527,24 @@ def release_mem(mem_index):
             self.mem_manager.free(mem_index)
         return
 
+    def get_match_prefix_hit_rate(self):
+        """Get the hit rate as a ratio of hit tokens to total requested tokens"""
+        total_calls = self.match_prefix_total_calls.arr[0]
+        if total_calls == 0:
+            return 0.0
+        # We calculate hit rate as the average hit tokens per call
+        # Note: This is a simplified metric. For true hit rate, you might want to track total requested tokens
+        total_hit_tokens = self.match_prefix_hit_tokens.arr[0]
+        return total_hit_tokens / total_calls if total_calls > 0 else 0.0
+
+    def get_match_prefix_stats(self):
+        """Get detailed match_prefix statistics"""
+        return {
+            "total_calls": self.match_prefix_total_calls.arr[0],
+            "total_hit_tokens": self.match_prefix_hit_tokens.arr[0],
+            "hit_rate": self.get_match_prefix_hit_rate(),
+        }
+
 
 class _RadixCacheReadOnlyClient:
     """
@@ -520,6 +556,13 @@ def __init__(self, unique_name, total_token_num, rank_in_node):
         self.tree_total_tokens_num = SharedArray(
             f"{unique_name}_tree_total_tokens_num_{rank_in_node}", (1,), dtype=np.int64
         )
+        # Hit rate tracking
+        self.match_prefix_total_calls = SharedArray(
+            f"{unique_name}_match_prefix_total_calls_{rank_in_node}", (1,), dtype=np.int64
+        )
+        self.match_prefix_hit_tokens = SharedArray(
+            f"{unique_name}_match_prefix_hit_tokens_{rank_in_node}", (1,), dtype=np.int64
+        )
 
     def get_refed_tokens_num(self):
         return self.refed_tokens_num.arr[0]
@@ -530,6 +573,22 @@ def get_tree_total_tokens_num(self):
     def get_unrefed_tokens_num(self):
         return self.tree_total_tokens_num.arr[0] - self.refed_tokens_num.arr[0]
 
+    def get_match_prefix_hit_rate(self):
+        """Get the hit rate as a ratio of hit tokens to total calls"""
+        total_calls = self.match_prefix_total_calls.arr[0]
+        if total_calls == 0:
+            return 0.0
+        total_hit_tokens = self.match_prefix_hit_tokens.arr[0]
+        return total_hit_tokens / total_calls if total_calls > 0 else 0.0
+
+    def get_match_prefix_stats(self):
+        """Get detailed match_prefix statistics"""
+        return {
+            "total_calls": self.match_prefix_total_calls.arr[0],
+            "total_hit_tokens": self.match_prefix_hit_tokens.arr[0],
+            "hit_rate": self.get_match_prefix_hit_rate(),
+        }
+
 
 class RadixCacheReadOnlyClient:
     def __init__(self, unique_name, total_token_num, node_world_size, dp_world_size):
@@ -546,3 +605,9 @@ def get_tree_total_tokens_num(self, dp_rank_in_node):
 
     def get_unrefed_tokens_num(self, dp_rank_in_node):
         return self.dp_rank_clients[dp_rank_in_node].get_unrefed_tokens_num()
+
+    def get_match_prefix_hit_rate(self, dp_rank_in_node):
+        return self.dp_rank_clients[dp_rank_in_node].get_match_prefix_hit_rate()
+
+    def get_match_prefix_stats(self, dp_rank_in_node):
+        return self.dp_rank_clients[dp_rank_in_node].get_match_prefix_stats()