Skip to content

Commit 9ca9bc4

Browse files
refactor(profiling): change how GenInfo::is_running works (#15508)
## Description This change replicates P403n1x87/echion#202. This PR changes the way `GenInfo::is_running` works. Previously, it would indicate whether the _current_ coroutine was on CPU; now, it indicates whether the current coroutine _or the coroutine it (recursively) awaits_ is on CPU. Making that change also allows us to do less work when we check whether the current coroutine is on CPU or not. Because a Coroutine / Generator / `GenInfo` can only be running if it is not awaiting another Generator, we do not need to compute `is_running` if we have `await != nullptr` (and we take `await->is_running` for the value in that case). Making that change makes it easier to check whether a Task is currently on-CPU; and allows to do less work when we decide how to unwind `asyncio` Tasks (cf the changes in `TaskInfo` which doesn't need the `is_on_cpu` method that iterates on the `await` chain anymore). Note that I checked whether `GenInfo::is_running` was used in any other way than the one I describe and simplify; it is not, so I do think this change is safe to make as-is. **Note** This PR makes sense on its own, but it is in the context of making P403n1x87/echion#198 simpler. **Note** This doesn't need a changelog entry as it makes no difference to the user, it's purely a refactor.
1 parent 9f1242e commit 9ca9bc4

File tree

2 files changed

+21
-42
lines changed

2 files changed

+21
-42
lines changed

ddtrace/internal/datadog/profiling/stack_v2/echion/echion/tasks.h

Lines changed: 19 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,13 @@ class GenInfo
7171
public:
7272
typedef std::unique_ptr<GenInfo> Ptr;
7373

74-
PyObject* origin = NULL;
75-
PyObject* frame = NULL;
74+
PyObject* origin = nullptr;
75+
PyObject* frame = nullptr;
7676

77+
// The coroutine awaited by this coroutine, if any
7778
GenInfo::Ptr await = nullptr;
7879

80+
// Whether the coroutine, or the coroutine it awaits, is currently running (on CPU)
7981
bool is_running = false;
8082

8183
[[nodiscard]] static Result<GenInfo::Ptr> create(PyObject* gen_addr);
@@ -149,13 +151,21 @@ GenInfo::create(PyObject* gen_addr)
149151
}
150152
}
151153

154+
// A coroutine awaiting another coroutine is never running itself,
155+
// so when the coroutine is awaiting another coroutine, we use the running state of the awaited coroutine.
156+
// Otherwise, we use the running state of the coroutine itself.
157+
bool is_running = false;
158+
if (await) {
159+
is_running = await->is_running;
160+
} else {
152161
#if PY_VERSION_HEX >= 0x030b0000
153-
auto is_running = (gen.gi_frame_state == FRAME_EXECUTING);
162+
is_running = (gen.gi_frame_state == FRAME_EXECUTING);
154163
#elif PY_VERSION_HEX >= 0x030a0000
155-
auto is_running = (frame != NULL) ? _PyFrame_IsExecuting(&f) : false;
164+
is_running = (frame != NULL) ? _PyFrame_IsExecuting(&f) : false;
156165
#else
157-
auto is_running = gen.gi_running;
166+
is_running = gen.gi_running;
158167
#endif
168+
}
159169

160170
recursion_depth--;
161171
return std::make_unique<GenInfo>(gen_addr, frame, std::move(await), is_running);
@@ -178,7 +188,7 @@ class TaskInfo
178188

179189
// Information to reconstruct the async stack as best as we can
180190
TaskInfo::Ptr waiter = nullptr;
181-
std::optional<bool> is_on_cpu_ = std::nullopt;
191+
bool is_on_cpu = false;
182192

183193
[[nodiscard]] static Result<TaskInfo::Ptr> create(TaskObj*);
184194
TaskInfo(PyObject* origin, PyObject* loop, GenInfo::Ptr coro, StringTable::Key name, TaskInfo::Ptr waiter)
@@ -187,32 +197,12 @@ class TaskInfo
187197
, coro(std::move(coro))
188198
, name(name)
189199
, waiter(std::move(waiter))
200+
, is_on_cpu(this->coro && this->coro->is_running)
190201
{
191202
}
192203

193204
[[nodiscard]] static Result<TaskInfo::Ptr> current(PyObject*);
194205
inline size_t unwind(FrameStack&);
195-
196-
// Check if any coroutine in the chain is currently running (on CPU)
197-
inline bool is_on_cpu()
198-
{
199-
if (is_on_cpu_.has_value()) {
200-
return *is_on_cpu_;
201-
}
202-
203-
auto* last_coro = static_cast<GenInfo*>(nullptr);
204-
auto* current_coro = this->coro.get();
205-
206-
// Check if the innermost coroutine is running.
207-
// We don't need to test all the other coroutines as they are awaiting, by definition.
208-
while (current_coro) {
209-
last_coro = current_coro;
210-
current_coro = current_coro->await.get();
211-
}
212-
213-
is_on_cpu_ = last_coro && last_coro->is_running;
214-
return *is_on_cpu_;
215-
}
216206
};
217207

218208
inline std::unordered_map<PyObject*, PyObject*> task_link_map;
@@ -242,8 +232,6 @@ TaskInfo::create(TaskObj* task_addr)
242232
return ErrorKind::TaskInfoGeneratorError;
243233
}
244234

245-
auto origin = reinterpret_cast<PyObject*>(task_addr);
246-
247235
auto maybe_name = string_table.key(task.task_name);
248236
if (!maybe_name) {
249237
recursion_depth--;
@@ -262,7 +250,8 @@ TaskInfo::create(TaskObj* task_addr)
262250
}
263251

264252
recursion_depth--;
265-
return std::make_unique<TaskInfo>(origin, loop, std::move(*maybe_coro), name, std::move(waiter));
253+
return std::make_unique<TaskInfo>(
254+
reinterpret_cast<PyObject*>(task_addr), loop, std::move(*maybe_coro), name, std::move(waiter));
266255
}
267256

268257
// ----------------------------------------------------------------------------

ddtrace/internal/datadog/profiling/stack_v2/echion/echion/threads.h

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -252,25 +252,15 @@ ThreadInfo::unwind_tasks()
252252
}
253253
}
254254

255-
// Only one Task can be on CPU at a time.
256-
// Since determining if a task is on CPU is somewhat costly, we
257-
// stop checking if Tasks are on CPU after seeing the first one.
258-
bool on_cpu_task_seen = false;
259255
for (auto& leaf_task : leaf_tasks) {
260-
bool on_cpu = false;
261-
if (!on_cpu_task_seen) {
262-
on_cpu = leaf_task.get().is_on_cpu();
263-
on_cpu_task_seen = on_cpu;
264-
}
265-
266-
auto stack_info = std::make_unique<StackInfo>(leaf_task.get().name, on_cpu);
256+
auto stack_info = std::make_unique<StackInfo>(leaf_task.get().name, leaf_task.get().is_on_cpu);
267257
auto& stack = stack_info->stack;
268258
for (auto current_task = leaf_task;;) {
269259
auto& task = current_task.get();
270260

271261
size_t stack_size = task.unwind(stack);
272262

273-
if (on_cpu) {
263+
if (task.is_on_cpu) {
274264
// Undo the stack unwinding
275265
// TODO[perf]: not super-efficient :(
276266
for (size_t i = 0; i < stack_size; i++)

0 commit comments

Comments
 (0)