Skip to content

Commit 9fc9a39

Browse files
committed
fix scheduler_reserve_resource_ratio
1 parent b4b0211 commit 9fc9a39

File tree

6 files changed

+2
-13
lines changed

6 files changed

+2
-13
lines changed

rtp_llm/config/gpt_init_model_parameters.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,6 @@ class GptInitModelParameters:
349349
rotary_factor2: float
350350
partial_rotary_factor: float
351351
rotary_embedding_extrapolation_factor: float
352-
scheduler_reserve_resource_ratio: int
353352
scoring_func: int
354353
seq_size_per_block: int
355354
size_per_head: int
@@ -1140,13 +1139,6 @@ def update_common(
11401139
)
11411140
logging.info(f"decode_entrance: {self.decode_entrance}")
11421141

1143-
self.scheduler_reserve_resource_ratio = int(
1144-
os.environ.get("SCHEDULER_RESERVE_RESOURCE_RATIO", 5)
1145-
)
1146-
logging.info(
1147-
f"scheduler_reserve_resource_ratio: {self.scheduler_reserve_resource_ratio}"
1148-
)
1149-
11501142
self.reuse_cache = self.py_env_configs.py_kv_cache_config.reuse_cache
11511143
logging.info(f"reuse_cache: {self.reuse_cache}")
11521144
self.pre_allocate_op_mem = bool(int(os.environ.get("PRE_ALLOCATE_OP_MEM", 1)))

rtp_llm/cpp/config/GptInitParameter.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,6 @@ class GptInitParameter {
209209
int64_t max_block_size_per_item_ = 16;
210210

211211
int64_t block_nums_ = 0;
212-
int64_t scheduler_reserve_resource_ratio_ = 5;
213212
int64_t reserve_runtime_mem_mb_ = 0;
214213
int64_t kv_cache_mem_mb_ = 0;
215214
bool reuse_cache_ = false;

rtp_llm/cpp/engine_base/schedulers/FIFOScheduler.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ FIFOScheduler::FIFOScheduler(const rtp_llm::GptInitParameter& params,
2424
need_fill_fake_stream_(params.dp_size_ > 1 && params.tp_rank_ == 0),
2525
fast_gen_max_context_len_(params.fast_gen_max_context_len_),
2626
metrics_reporter_(metrics_reporter) {
27-
reserve_block_num_ = params.scheduler_reserve_resource_ratio_ * cache_manager->availableBlockNums() / 100;
27+
reserve_block_num_ = params.fifo_scheduler_config.scheduler_reserve_resource_ratio * cache_manager->availableBlockNums() / 100;
2828
RTP_LLM_LOG_INFO("max_generate_batch_size is [%d], max_batch_tokens_size is [%d], reserve_block_num is [%d]",
2929
max_generate_batch_size_,
3030
max_batch_tokens_size_,

rtp_llm/cpp/model_rpc/DecodeRpcServer.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ void DecodeRpcServer::allocateResource(DecodeGenerateContext& decode_context) {
9090

9191
auto cache_manager = engine_->resourceContext().cache_manager;
9292
auto reserve_block_num =
93-
maga_init_params_.gpt_init_parameter.scheduler_reserve_resource_ratio_ * cache_manager->totalBlocks() / 100;
93+
maga_init_params_.gpt_init_parameter.fifo_scheduler_config.scheduler_reserve_resource_ratio * cache_manager->totalBlocks() / 100;
9494
auto current_blocks = cache_manager->availableBlockNums();
9595
if (current_blocks < reserve_block_num) {
9696
string error_msg = "request: [" + decode_context.request_key + "] malloc kv cache block failed at decode node, "

rtp_llm/cpp/pybind/ConfigInit.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -632,7 +632,6 @@ void registerGptInitParameter(py::module m) {
632632
DEF_PROPERTY(seq_size_per_block, seq_size_per_block_) \
633633
DEF_PROPERTY(max_block_size_per_item, max_block_size_per_item_) \
634634
DEF_PROPERTY(block_nums, block_nums_) \
635-
DEF_PROPERTY(scheduler_reserve_resource_ratio, scheduler_reserve_resource_ratio_) \
636635
DEF_PROPERTY(kv_cache_mem_mb, kv_cache_mem_mb_) \
637636
DEF_PROPERTY(reserve_runtime_mem_mb, reserve_runtime_mem_mb_) \
638637
DEF_PROPERTY(reuse_cache, reuse_cache_) \

rtp_llm/ops/libth_transformer_config.pyi

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,6 @@ class GptInitParameter:
439439
routed_scaling_factor: float
440440
sampler_config: SamplerConfig
441441
scheduler_config: SchedulerConfig
442-
scheduler_reserve_resource_ratio: int
443442
scoring_func: int
444443
seq_size_per_block: int
445444
service_discovery_config: ServiceDiscoveryConfig

0 commit comments

Comments
 (0)