Skip to content

Commit c10886d

Browse files
committed
fix; refactor codes
1 parent 9ecfef2 commit c10886d

File tree

7 files changed

+89
-50
lines changed

7 files changed

+89
-50
lines changed

rtp_llm/config/gpt_init_model_parameters.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
StaticConfig,
1717
get_env_bool,
1818
get_env_int,
19+
get_env_optional_bool,
1920
get_env_str,
2021
)
2122
from rtp_llm.config.quant_config import (
@@ -537,6 +538,8 @@ def update_gpt_init_params_from_env(
537538
):
538539

539540
# ParallelismDistributedConfig
541+
# USE_ALL_GATHER: Enable all-gather communication for pure TP (ep_size == tp_size).
542+
# When enabled, DeepEP should not be used. Default is False.
540543
# Calculate use_all_gather: (USE_ALL_GATHER env is True) and (ep_size == tp_size)
541544
use_all_gather_env = get_env_bool("USE_ALL_GATHER", True)
542545
use_all_gather = use_all_gather_env and (
@@ -689,10 +692,24 @@ def update_gpt_init_params_from_env(
689692
)
690693

691694
# MoeConfig
695+
use_deepep_moe_env = get_env_optional_bool("USE_DEEPEP_MOE")
696+
use_deepep_internode_env = get_env_optional_bool("USE_DEEPEP_INTERNODE")
697+
use_deepep_low_latency_env = get_env_optional_bool("USE_DEEPEP_LOW_LATENCY")
698+
692699
self.gpt_init_params.moe_config = MoeConfig(
693-
use_deepep_moe=get_env_bool("USE_DEEPEP_MOE", False),
694-
use_deepep_internode=get_env_bool("USE_DEEPEP_INTERNODE", False),
695-
use_deepep_low_latency=get_env_bool("USE_DEEPEP_LOW_LATENCY", True),
700+
use_deepep_moe=(
701+
use_deepep_moe_env if use_deepep_moe_env is not None else False
702+
),
703+
use_deepep_internode=(
704+
use_deepep_internode_env
705+
if use_deepep_internode_env is not None
706+
else False
707+
),
708+
use_deepep_low_latency=(
709+
use_deepep_low_latency_env
710+
if use_deepep_low_latency_env is not None
711+
else True
712+
),
696713
use_deepep_p2p_low_latency=get_env_bool(
697714
"USE_DEEPEP_P2P_LOW_LATENCY", False
698715
),

rtp_llm/config/py_config_modules.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,17 @@ def get_env_bool(name: str, default: bool = False):
4040
return v.lower() == "1" or v.lower() == "on" or v.lower() == "true"
4141

4242

43+
def get_env_optional_bool(name: str):
44+
"""
45+
Get optional bool from environment variable.
46+
Returns None if environment variable is not set, otherwise returns bool value.
47+
"""
48+
v = os.environ.get(name, None)
49+
if v is None or v == "":
50+
return None
51+
return v.lower() == "1" or v.lower() == "on" or v.lower() == "true"
52+
53+
4354
class ServerConfig:
4455
def __init__(self):
4556
self.frontend_server_count = 4
@@ -882,6 +893,25 @@ def update_from_env(self):
882893
self.py_hw_kernel_config.update_from_env()
883894
logging.info(self.to_string())
884895

896+
def should_auto_configure_deepep(self) -> bool:
897+
"""
898+
Check if DeepEP should be auto-configured.
899+
Returns True if environment variables are not set (None), meaning user hasn't manually configured.
900+
Returns False if user has manually set any of the DeepEP environment variables.
901+
"""
902+
use_deepep_moe_env = get_env_optional_bool("USE_DEEPEP_MOE")
903+
use_deepep_internode_env = get_env_optional_bool("USE_DEEPEP_INTERNODE")
904+
use_deepep_low_latency_env = get_env_optional_bool("USE_DEEPEP_LOW_LATENCY")
905+
906+
# Check if all environment variables are None (not set)
907+
# If all are None, we should auto-configure
908+
# If any is not None, user has manually configured, so we shouldn't auto-configure
909+
return (
910+
use_deepep_moe_env is None
911+
and use_deepep_internode_env is None
912+
and use_deepep_low_latency_env is None
913+
)
914+
885915
def to_string(self):
886916
return (
887917
"[server_config]\n" + self.server_config.to_string() + "\n\n"

rtp_llm/cpp/pybind/ConfigInit.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ void register_parallelism_distributed_config(pybind11::module& m) {
3535
pybind11::arg("world_rank") = 0,
3636
pybind11::arg("local_world_size") = 1,
3737
pybind11::arg("ffn_sp_size") = 1,
38-
pybind11::arg("use_all_gather") = false)
38+
pybind11::arg("use_all_gather") = true)
3939
.def("to_string", &ParallelismDistributedConfig::to_string)
4040
.def("update_from_env", &ParallelismDistributedConfig::update_from_env_for_test)
4141
.def_readwrite("tp_size", &ParallelismDistributedConfig::tp_size)

rtp_llm/server/server_args/moe_group_args.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,24 @@ def init_moe_group_args(parser):
1010
"--use_deepep_moe",
1111
env_name="USE_DEEPEP_MOE",
1212
type=str2bool,
13-
default=False,
14-
help="设置为 `True` 以启用 DeepEP 来处理 MoE 模型的 expert 部分。",
13+
default=None,
14+
help="设置为 `True` 以启用 DeepEP 来处理 MoE 模型的 expert 部分。默认值为 None,允许自动配置。",
1515
)
1616

1717
moe_group.add_argument(
1818
"--use_deepep_internode",
1919
env_name="USE_DEEPEP_INTERNODE",
2020
type=str2bool,
21-
default=False,
22-
help="设置为 `True` 以启用 DeepEP 来优化跨节点 (inter-node) 通信。",
21+
default=None,
22+
help="设置为 `True` 以启用 DeepEP 来优化跨节点 (inter-node) 通信。默认值为 None,允许自动配置。",
2323
)
2424

2525
moe_group.add_argument(
2626
"--use_deepep_low_latency",
2727
env_name="USE_DEEPEP_LOW_LATENCY",
2828
type=str2bool,
29-
default=True,
30-
help="设置为 `True` 以启用 DeepEP 的低延迟模式。",
29+
default=None,
30+
help="设置为 `True` 以启用 DeepEP 的低延迟模式。默认值为 None,允许自动配置。",
3131
)
3232

3333
moe_group.add_argument(

rtp_llm/server/server_args/test/server_args_test.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,13 @@ def test_default_args_env(self):
115115
self.assertIsNone(env.get("ACEXT_GEMM_CONFIG_DIR"))
116116

117117
# 9. MOE 专家并行
118-
self.assertEqual(env.get("USE_DEEPEP_MOE"), "0")
119-
self.assertEqual(env.get("USE_DEEPEP_INTERNODE"), "0")
120-
self.assertEqual(env.get("USE_DEEPEP_LOW_LATENCY"), "1")
118+
self.assertIsNone(env.get("USE_DEEPEP_MOE")) # 默认值为 None,允许自动配置
119+
self.assertIsNone(
120+
env.get("USE_DEEPEP_INTERNODE")
121+
) # 默认值为 None,允许自动配置
122+
self.assertIsNone(
123+
env.get("USE_DEEPEP_LOW_LATENCY")
124+
) # 默认值为 None,允许自动配置
121125
self.assertEqual(env.get("USE_DEEPEP_P2P_LOW_LATENCY"), "0")
122126
self.assertEqual(env.get("DEEP_EP_NUM_SM"), "0")
123127
self.assertEqual(env.get("FAKE_BALANCE_EXPERT"), "0")
@@ -945,8 +949,10 @@ def test_all_args_set_env(self):
945949
self.assertEqual(env["DASHSCOPE_HTTP_URL"], "http://test.url")
946950
self.assertEqual(env["DASHSCOPE_WEBSOCKET_URL"], "ws://test.url")
947951
self.assertEqual(env["OPENAI_API_KEY"], "test_openai_key")
948-
self.assertEqual(env["JSON_MODEL_OVERRIDE_ARGS"],
949-
'{"rope_scaling":{"type":"yarn","factor":2.0,"original_max_position_embeddings":32768,"beta_slow":1.0,"beta_fast":1.0,"mscale":1.0,"extrapolation_factor":1.0}}')
952+
self.assertEqual(
953+
env["JSON_MODEL_OVERRIDE_ARGS"],
954+
'{"rope_scaling":{"type":"yarn","factor":2.0,"original_max_position_embeddings":32768,"beta_slow":1.0,"beta_fast":1.0,"mscale":1.0,"extrapolation_factor":1.0}}',
955+
)
950956

951957
# 27. Lora Configuration
952958
self.assertEqual(env["LORA_INFO"], '{"lora1": "/path/to/lora1"}')
@@ -966,9 +972,7 @@ def test_all_args_set_env(self):
966972

967973
# 30. Miscellaneous Configuration
968974
self.assertEqual(env["DISABLE_PDL"], "1")
969-
self.assertEqual(
970-
env["AUX_STRING"], ""
971-
)
975+
self.assertEqual(env["AUX_STRING"], "")
972976

973977
# 31. PD-Separation Configuration
974978
self.assertEqual(env["PREFILL_RETRY_TIMES"], "2")

rtp_llm/server/server_args/util.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33

44
def str2bool(v):
5+
if v is None:
6+
return None
57
if isinstance(v, bool):
68
return v
79
if v.lower() in ("yes", "true", "t", "1", "on"):

rtp_llm/start_server.py

Lines changed: 18 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import sys
66
import time
77
import traceback
8+
from math import log
89

910
import requests
1011

@@ -153,41 +154,19 @@ def monitor_and_release_process(backend_process, frontend_process):
153154
def should_auto_configure_deepep() -> bool:
154155
"""
155156
Check if DeepEP should be auto-configured.
156-
Returns True if current values match defaults (user hasn't manually set them).
157-
Returns False if user has manually set any of the DeepEP values to non-default.
158-
159-
This function reads values from StaticConfig.moe_config and compares them with
160-
default values to determine if user has manually configured.
161-
162-
Default values:
163-
- USE_DEEPEP_MOE: False
164-
- USE_DEEPEP_INTERNODE: False
165-
- USE_DEEPEP_LOW_LATENCY: True
157+
Returns True if environment variables are not set (None), meaning user hasn't manually configured.
158+
Returns False if user has manually set any of the DeepEP environment variables.
166159
"""
167-
# Default values
168-
default_use_deepep_moe = False
169-
default_use_deepep_internode = False
170-
default_use_deepep_low_latency = True
171-
172-
# Read current values from StaticConfig.moe_config
173-
current_use_deepep_moe = StaticConfig.moe_config.use_deepep_moe
174-
current_use_deepep_internode = StaticConfig.moe_config.use_deepep_internode
175-
current_use_deepep_low_latency = StaticConfig.moe_config.use_deepep_low_latency
176-
177-
# Check if current values match defaults
178-
# If all match defaults, user hasn't manually set them, so we should auto-configure
179-
# If any value differs from default, user has manually configured, so we shouldn't auto-configure
180-
return (
181-
current_use_deepep_moe == default_use_deepep_moe
182-
and current_use_deepep_internode == default_use_deepep_internode
183-
and current_use_deepep_low_latency == default_use_deepep_low_latency
184-
)
160+
return StaticConfig.should_auto_configure_deepep()
185161

186162

187163
def auto_configure_deepep(args: argparse.Namespace):
188164
"""
189165
Automatically configure DeepEP settings based on deployment scenario.
190166
167+
Note: USE_ALL_GATHER should be enabled for pure TP scenarios (ep_size == tp_size).
168+
When USE_ALL_GATHER is enabled, DeepEP should not be used.
169+
191170
Configuration rules (for 8-GPU machine):
192171
- Non-PD separation + Inference node + Single GPU (1TP): 0, 0, 0
193172
- Non-PD separation + Inference node + Single-node multi-GPU (>1TP): 1, 0, 0
@@ -199,10 +178,19 @@ def auto_configure_deepep(args: argparse.Namespace):
199178
- PD separation + Prefill node + Multi-node multi-GPU (>=9 GPUs): 1, 0, 1
200179
- PD separation + Decode node + Multi-node multi-GPU (>=9 GPUs): 1, 1, 1
201180
"""
202-
# If USE_ALL_GATHER is enabled, disable all DeepEP settings
203-
use_all_gather = StaticConfig.parallelism_distributed_config.use_all_gather
181+
logging.info("auto configure deepep work")
182+
# Get parallelism info for use_all_gather calculation
183+
world_size = g_parallel_info.world_size
184+
tp_size = g_parallel_info.tp_size
185+
ep_size = g_parallel_info.ep_size
186+
logging.info(f"world_size: {world_size}, tp_size: {tp_size}, ep_size: {ep_size}")
187+
# If USE_ALL_GATHER is enabled (for pure TP scenarios), disable all DeepEP settings
188+
# Calculate use_all_gather: (USE_ALL_GATHER env is True) and (ep_size == tp_size)
189+
use_all_gather_env = StaticConfig.parallelism_distributed_config.use_all_gather
190+
use_all_gather = use_all_gather_env and (ep_size == tp_size)
204191

205192
if use_all_gather:
193+
logging.info("use all gather in `auto_configure_deepep`")
206194
os.environ["USE_DEEPEP_MOE"] = "0"
207195
os.environ["USE_DEEPEP_LOW_LATENCY"] = "0"
208196
os.environ["USE_DEEPEP_INTERNODE"] = "0"
@@ -217,8 +205,6 @@ def auto_configure_deepep(args: argparse.Namespace):
217205
role_type = (
218206
role_type_enum.name if hasattr(role_type_enum, "name") else str(role_type_enum)
219207
)
220-
world_size = g_parallel_info.world_size
221-
tp_size = g_parallel_info.tp_size
222208

223209
# Get number of nodes
224210
try:

0 commit comments

Comments
 (0)