@@ -56,7 +56,12 @@ ScannerContext::ScannerContext(
5656 RuntimeState* state, pipeline::ScanLocalStateBase* local_state,
5757 const TupleDescriptor* output_tuple_desc, const RowDescriptor* output_row_descriptor,
5858 const std::list<std::shared_ptr<vectorized::ScannerDelegate>>& scanners, int64_t limit_,
59- std::shared_ptr<pipeline::Dependency> dependency, int parallism_of_scan_operator)
59+ std::shared_ptr<pipeline::Dependency> dependency
60+ #ifdef BE_TEST
61+ ,
62+ int num_parallel_instances
63+ #endif
64+ )
6065 : HasTaskExecutionCtx(state),
6166 _state (state),
6267 _local_state(local_state),
@@ -68,9 +73,14 @@ ScannerContext::ScannerContext(
6873 limit(limit_),
6974 _scanner_scheduler_global(state->exec_env ()->scanner_scheduler()),
7075 _all_scanners(scanners.begin(), scanners.end()),
71- _parallism_of_scan_operator(parallism_of_scan_operator),
7276 _min_scan_concurrency_of_scan_scheduler(_state->min_scan_concurrency_of_scan_scheduler ()),
7377 _min_scan_concurrency(_state->min_scan_concurrency_of_scanner ()) {
78+ #ifndef BE_TEST
79+ _max_scan_concurrency =
80+ std::min (local_state->max_scanners_concurrency (state), cast_set<int >(scanners.size ()));
81+ #else
82+ _max_scan_concurrency = num_parallel_instances;
83+ #endif
7484 DCHECK (_state != nullptr );
7585 DCHECK (_output_row_descriptor == nullptr ||
7686 _output_row_descriptor->tuple_descriptors ().size () == 1 );
@@ -143,33 +153,6 @@ Status ScannerContext::init() {
143153 _set_scanner_done ();
144154 }
145155
146- // The overall target of our system is to make full utilization of the resources.
147- // At the same time, we dont want too many tasks are queued by scheduler, that is not necessary.
148- // Each scan operator can submit _max_scan_concurrency scanner to scheduelr if scheduler has enough resource.
149- // So that for a single query, we can make sure it could make full utilization of the resource.
150- _max_scan_concurrency = _state->num_scanner_threads ();
151- if (_max_scan_concurrency == 0 ) {
152- // Why this is safe:
153- /*
154- 1. If num cpu cores is less than or equal to 24:
155- _max_concurrency_of_scan_scheduler will be 96. _parallism_of_scan_operator will be 1 or C/2.
156- so _max_scan_concurrency will be 96 or (96 * 2 / C).
157- For a single scan node, most scanner it can submit will be 96 or (96 * 2 / C) * (C / 2) which is 96 too.
158- So a single scan node could make full utilization of the resource without sumbiting all its tasks.
159- 2. If num cpu cores greater than 24:
160- _max_concurrency_of_scan_scheduler will be 4 * C. _parallism_of_scan_operator will be 1 or C/2.
161- so _max_scan_concurrency will be 4 * C or (4 * C * 2 / C).
162- For a single scan node, most scanner it can submit will be 4 * C or (4 * C * 2 / C) * (C / 2) which is 4 * C too.
163-
164- So, in all situations, when there is only one scan node, it could make full utilization of the resource.
165- */
166- _max_scan_concurrency =
167- _min_scan_concurrency_of_scan_scheduler / _parallism_of_scan_operator;
168- _max_scan_concurrency = _max_scan_concurrency == 0 ? 1 : _max_scan_concurrency;
169- }
170-
171- _max_scan_concurrency = std::min (_max_scan_concurrency, (int32_t )_pending_scanners.size ());
172-
173156 // when user not specify scan_thread_num, so we can try downgrade _max_thread_num.
174157 // becaue we found in a table with 5k columns, column reader may ocuppy too much memory.
175158 // you can refer https://github.com/apache/doris/issues/35340 for details.
0 commit comments