Skip to content

Commit 47afaf0

Browse files
fix(tracing): avoid raising index errors on partial flushes (#15409)
## Description Fixes an `IndexError` in `SpanAggregator.on_span_finish()` when partial flush is enabled. The error occurred when `num_finished >= partial_flush_min_spans` but `remove_finished()` returned an empty list due to counter inconsistency. Fix: - Add defensive check before accessing `finished[0]` - Prevents partial flushing min spans from being set to a value less than zero ## Risks Low. Minimal change maintains O(1) performance for counting finished spans. Partial flush behavior unchanged when spans are available. --------- Co-authored-by: Brett Langdon <[email protected]>
1 parent 27b38c9 commit 47afaf0

File tree

7 files changed

+91
-10
lines changed

7 files changed

+91
-10
lines changed

ddtrace/_trace/processor/__init__.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,11 @@ def on_span_finish(self, span: Span) -> None:
378378
elif self.partial_flush_enabled and num_finished >= self.partial_flush_min_spans:
379379
should_partial_flush = True
380380
finished = trace.remove_finished()
381-
finished[0].set_metric("_dd.py.partial_flush", num_finished)
381+
if finished:
382+
finished[0].set_metric("_dd.py.partial_flush", num_finished)
383+
else:
384+
# num_finished was out of sync with the actual finished spans, skip partial flush
385+
return
382386
else:
383387
return
384388

@@ -446,17 +450,16 @@ def shutdown(self, timeout: Optional[float]) -> None:
446450
self._queue_span_count_metrics("spans_finished", "integration_name", 1)
447451
# Log a warning if the tracer is shutdown before spans are finished
448452
if log.isEnabledFor(logging.WARNING):
449-
unfinished_spans = [
453+
unsent_spans = [
450454
f"trace_id={s.trace_id} parent_id={s.parent_id} span_id={s.span_id} name={s.name} resource={s.resource} started={s.start} sampling_priority={s.context.sampling_priority}" # noqa: E501
451455
for t in self._traces.values()
452456
for s in t.spans
453-
if not s.finished
454457
]
455-
if unfinished_spans:
458+
if unsent_spans:
456459
log.warning(
457-
"Shutting down tracer with %d unfinished spans. Unfinished spans will not be sent to Datadog: %s",
458-
len(unfinished_spans),
459-
", ".join(unfinished_spans),
460+
"Shutting down tracer with %d spans. These spans will not be sent to Datadog: %s",
461+
len(unsent_spans),
462+
", ".join(unsent_spans),
460463
)
461464

462465
try:

ddtrace/internal/settings/_config.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,12 @@ def __init__(self):
446446
self._trace_rate_limit,
447447
)
448448
self._partial_flush_enabled = _get_config("DD_TRACE_PARTIAL_FLUSH_ENABLED", True, asbool)
449-
self._partial_flush_min_spans = _get_config("DD_TRACE_PARTIAL_FLUSH_MIN_SPANS", 300, int)
449+
partial_flush_min_spans = _get_config("DD_TRACE_PARTIAL_FLUSH_MIN_SPANS", 300, int)
450+
if partial_flush_min_spans < 1:
451+
log.warning("DD_TRACE_PARTIAL_FLUSH_MIN_SPANS must be >= 1, defaulting to 1")
452+
self._partial_flush_min_spans = 1
453+
else:
454+
self._partial_flush_min_spans = partial_flush_min_spans
450455

451456
self._http = HttpConfig(header_tags=self._trace_http_header_tags)
452457
self._remote_config_enabled = _get_config("DD_REMOTE_CONFIGURATION_ENABLED", True, asbool)
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
fixes:
3+
- |
4+
tracing: Fixed a potential ``IndexError`` in partial flush when the finished span counter was out of sync with actual finished spans.
5+
- |
6+
tracing: ``DD_TRACE_PARTIAL_FLUSH_MIN_SPANS`` values less than 1 now default to 1 with a warning.

tests/integration/test_integration_snapshots.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,3 +352,22 @@ def test_encode_span_with_large_unicode_string_attributes(encoding):
352352
with override_global_config(dict(_trace_api=encoding)):
353353
with tracer.trace(name="á" * 25000, resource="â" * 25001) as span:
354354
span.set_tag(key="å" * 25001, value="ä" * 2000)
355+
356+
357+
@pytest.mark.snapshot
358+
@pytest.mark.subprocess(env={"DD_TRACE_PARTIAL_FLUSH_ENABLED": "true", "DD_TRACE_PARTIAL_FLUSH_MIN_SPANS": "1"})
359+
def test_aggregator_partial_flush_finished_counter_out_of_sync():
360+
"""Regression test for IndexError when num_finished counter is out of sync with finished spans."""
361+
from ddtrace import tracer
362+
363+
span1 = tracer.start_span("span1")
364+
span2 = tracer.start_span("span2", child_of=span1)
365+
# Manually set duration_ns before calling finish() to trigger the race condition
366+
# where trace.num_finished == 1 but len(trace.spans) == 0 after span1.finish().
367+
# In this scenario, span1.finish() does NOT trigger encoding, both span1 and span2
368+
# are encoded during span2.finish(). This occurs because _Trace.remove_finished()
369+
# removes all spans that have a duration (end state). This test ensures that calling
370+
# span1.finish() in this state does not cause unexpected behavior or crash trace encoding.
371+
span1.duration_ns = 1
372+
span2.finish()
373+
span1.finish()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
[[
2+
{
3+
"name": "span1",
4+
"service": "ddtrace_subprocess_dir",
5+
"resource": "span1",
6+
"trace_id": 0,
7+
"span_id": 1,
8+
"parent_id": 0,
9+
"type": "",
10+
"error": 0,
11+
"meta": {
12+
"_dd.p.dm": "-0",
13+
"_dd.p.tid": "692a0cfd00000000",
14+
"language": "python",
15+
"runtime-id": "48593bb50c174935bfdd35e973b6247b"
16+
},
17+
"metrics": {
18+
"_dd.py.partial_flush": 1.0,
19+
"_dd.tracer_kr": 1.0,
20+
"_sampling_priority_v1": 1.0,
21+
"process_id": 74422.0
22+
},
23+
"duration": 1,
24+
"start": 1764363517274022000
25+
},
26+
{
27+
"name": "span2",
28+
"service": "ddtrace_subprocess_dir",
29+
"resource": "span2",
30+
"trace_id": 0,
31+
"span_id": 2,
32+
"parent_id": 1,
33+
"type": "",
34+
"error": 0,
35+
"duration": 46000,
36+
"start": 1764363517274097000
37+
}]]

tests/tracer/test_processors.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,17 @@ def test_aggregator_multi_span():
282282
assert writer.pop() == [parent, child]
283283

284284

285+
@pytest.mark.subprocess(
286+
parametrize={"DD_TRACE_PARTIAL_FLUSH_MIN_SPANS": ["0", "-1", "-20"]},
287+
err=b"DD_TRACE_PARTIAL_FLUSH_MIN_SPANS must be >= 1, defaulting to 1\n",
288+
)
289+
def test_config_partial_flush_min_spans_validation():
290+
"""Test that DD_TRACE_PARTIAL_FLUSH_MIN_SPANS < 1 defaults to 1 with warning."""
291+
from ddtrace import config
292+
293+
assert config._partial_flush_min_spans == 1
294+
295+
285296
def test_aggregator_partial_flush_0_spans():
286297
writer = DummyWriter()
287298
aggr = SpanAggregator(partial_flush_enabled=True, partial_flush_min_spans=0)

tests/tracer/test_tracer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1042,8 +1042,8 @@ def test_enable():
10421042

10431043

10441044
@pytest.mark.subprocess(
1045-
err=b"Shutting down tracer with 2 unfinished spans. "
1046-
b"Unfinished spans will not be sent to Datadog: "
1045+
err=b"Shutting down tracer with 2 spans. "
1046+
b"These spans will not be sent to Datadog: "
10471047
b"trace_id=123 parent_id=0 span_id=456 name=unfinished_span1 "
10481048
b"resource=my_resource1 started=46121775360.0 sampling_priority=2, "
10491049
b"trace_id=123 parent_id=456 span_id=666 name=unfinished_span2 "

0 commit comments

Comments
 (0)