rename vars for clarity

Jonahcb · Jonahcb · commit d3b27ee61415 · 2025-11-29T01:31:40.000Z
diff --git a/python/sglang/srt/layers/moe/lora_moe.py b/python/sglang/srt/layers/moe/lora_moe.py
@@ -104,12 +104,10 @@ def _compute_lora_delta(
         num_loras = self.lora_a_weights.shape[0]
 
         # Dispatch tokens to experts
-        token_ids, expert_ids, _, lora_ids = moe_dispatch(
+        token_ids, expert_ids, sorted_topk_weights, lora_ids = moe_dispatch(
             topk_ids=topk_ids,
             topk_weights=topk_weights,
             lora_indices=lora_indices,
-            num_experts=num_experts,
-            num_loras=num_loras,
         )
 
 
diff --git a/python/sglang/srt/lora/moe_dispatch.py b/python/sglang/srt/lora/moe_dispatch.py
@@ -21,8 +21,6 @@ def moe_dispatch(
     topk_ids: torch.Tensor,
     topk_weights: torch.Tensor,
     lora_indices: torch.Tensor,
-    num_experts: int,
-    num_loras: int,
 ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
     """
     Dispatch tokens to experts for MoE computation.
@@ -31,13 +29,11 @@ def moe_dispatch(
         topk_ids: [num_tokens, top_k] - Expert IDs selected by router
         topk_weights: [num_tokens, top_k] - Router weights
         lora_indices: [num_tokens] - LoRA adapter ID for each token
-        num_experts: Total number of experts
-        num_loras: Total number of LoRA adapters
 
     Returns:
         sorted_token_ids: Token indices sorted by expert_id
         sorted_expert_ids: Corresponding expert IDs
-        sorted_weights: Corresponding router weights
+        sorted_topk_weights: Corresponding router weights
         sorted_lora_ids: LoRA adapter IDs for each dispatched token
     """
     num_tokens, top_k = topk_ids.shape
@@ -54,10 +50,7 @@ def moe_dispatch(
 
     sorted_token_ids = flat_token_ids[sorted_indices]
     sorted_expert_ids = flat_topk_ids[sorted_indices]
-    sorted_weights = flat_topk_weights[sorted_indices]
-
-    if flat_lora_ids.shape != sorted_indices.shape:
-        y = 1 # need to pause
+    sorted_topk_weights = flat_topk_weights[sorted_indices]
     sorted_lora_ids = flat_lora_ids[sorted_indices]
 
-    return sorted_token_ids, sorted_expert_ids, sorted_weights, sorted_lora_ids
+    return sorted_token_ids, sorted_expert_ids, sorted_topk_weights, sorted_lora_ids
diff --git a/test/srt/lora/test_lora_moe.py b/test/srt/lora/test_lora_moe.py
@@ -592,19 +592,18 @@ def test_moe_lora_basic_functionality(self):
         lora_indices = torch.tensor([0, 0, 1, 1], dtype=torch.int32)  # tokens 0,1 use lora 0; tokens 2,3 use lora 1
 
         # Run dispatch
-        token_ids, expert_ids, weights = moe_dispatch(
+        token_ids, expert_ids, sorted_topk_weights, lora_ids = moe_dispatch(
             topk_ids=topk_ids,
             topk_weights=topk_weights,
             lora_indices=lora_indices,
-            num_experts=num_experts,
-            num_loras=2,
         )
 
         # Verify results
         # Should have 4 tokens * 2 experts each = 8 dispatched entries
         self.assertEqual(len(token_ids), 8)
         self.assertEqual(len(expert_ids), 8)
-        self.assertEqual(len(weights), 8)
+        self.assertEqual(len(sorted_topk_weights), 8)
+        self.assertEqual(len(lora_ids), 8)
 
         # Check that tokens are grouped by expert (not by LoRA)
         # All tokens going to expert 0 should come first, then expert 1, etc.