Skip to content

Commit 7cc4990

Browse files
authored
[turbopack] Track task durations in the task_statistics file (vercel#83522)
# Track task execution duration in TaskStatistics ## What? This PR adds tracking of task execution duration in Turbopack's task system, enabling better performance analysis and optimization. ## Why? Understanding how long tasks take to execute helps identify optimization opportunities, especially for determining whether caching is beneficial for specific tasks. ## How? - Added `track_task_duration` method to record execution time for tasks - Updated task statistics to track execution count and duration - Added a Python script `analyze_cache_effectiveness.py` to identify tasks that would benefit from removing caching - Updated tests to account for the new statistics fields Sample output from the script ``` Tasks ranked by estimated time savings from removing caching layer Savings Hit Rate Exec Time Operations Task Name --------------------------------------------------- 2.33s 39.5% 765ns 661,176 turbopack-ecmascript@turbopack_ecmascript::references::esm::base::EsmAssetReference::ChunkableModuleReference::chunking_type 2.29s 18.5% 1.6μs 490,488 turbopack-ecmascript@turbopack_ecmascript::references::esm::base::EsmAssetReference::ChunkableModuleReference::export_usage 1.99s 9.0% 9.8μs 430,149 turbopack@turbopack::ModuleAssetContext::AssetContext::resolve_asset 1.17s 51.7% 1.2μs 462,164 turbopack-ecmascript@turbopack_ecmascript::EcmascriptModuleAsset::ResolveOrigin::get_inner_asset 1.10s 54.1% 1.2μs 462,387 turbopack-core@turbopack_core::resolve::ModuleResolveResult::is_unresolvable 916.01ms 0.0% 19.2μs 152,669 turbopack@turbopack::apply_module_type 807.37ms 74.5% 1.1μs 722,106 turbopack-ecmascript@turbopack_ecmascript::references::esm::base::ReferencedAsset::from_resolve_result 782.16ms 69.7% 1.5μs 680,828 turbopack-core@turbopack_core::resolve::ModuleResolveResult::primary_modules 749.54ms 4.0% 80ns 129,625 turbopack-core@turbopack_core::ident::AssetIdent::new_inner 717.59ms 94.5% 5ns 887,040 turbopack-ecmascript@turbopack_ecmascript::EcmascriptModuleAsset::ResolveOrigin::asset_context 522.31ms 30.2% 1.7μs 136,180 turbopack-core@turbopack_core::resolve::ResolveResult::is_unresolvable 452.88ms 0.0% 5.2μs 75,484 next-core@next_core::next_server::resolve::ExternalCjsModulesResolvePlugin::AfterResolvePlugin::after_resolve 415.54ms 45.2% 937ns 134,377 turbopack-core@turbopack_core::resolve::pattern::Pattern::new_internal 388.03ms 0.0% 191.1μs 64,672 turbopack-ecmascript@turbopack_ecmascript::parse::parse ``` The script analyzes task statistics to find tasks where the overhead of caching exceeds the benefit, providing recommendations for optimization based on execution patterns. It leverages data from the overhead.rs benchmark which is also enhanced to provide an estimate on the delta between the measured duration and the actual duration. ## Conclusions? There are a few items of low hanging fruit but the real issue is `trait` items. We need to provide more flexibility to `value_trait` items to make it possible to have non-turbotask items that are `async`
1 parent 9db7750 commit 7cc4990

File tree

5 files changed

+280
-14
lines changed

5 files changed

+280
-14
lines changed

turbopack/crates/turbo-tasks-backend/benches/overhead.rs

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::time::{Duration, Instant};
33
use criterion::{BenchmarkId, Criterion, black_box};
44
use futures::{FutureExt, StreamExt, stream::FuturesUnordered};
55
use tokio::spawn;
6-
use turbo_tasks::TurboTasks;
6+
use turbo_tasks::{TurboTasks, TurboTasksApi};
77
use turbo_tasks_backend::{BackendOptions, TurboTasksBackend, noop_backing_storage};
88

99
#[global_allocator]
@@ -79,6 +79,15 @@ pub fn overhead(c: &mut Criterion) {
7979
run_turbo::<Uncached>(&rt, b, d, false);
8080
},
8181
);
82+
// Same as turbo-uncached but reports the time as measured by turbotasks itself
83+
// This allows us to understand the cost of the indirection within turbotasks
84+
group.bench_with_input(
85+
BenchmarkId::new("turbo-uncached-stats", micros),
86+
&duration,
87+
|b, &d| {
88+
run_turbo_stats(&rt, b, d);
89+
},
90+
);
8291

8392
group.bench_with_input(
8493
BenchmarkId::new("turbo-cached-same-keys", micros),
@@ -215,3 +224,29 @@ fn run_turbo<Mode: TurboMode>(
215224
}
216225
});
217226
}
227+
228+
fn run_turbo_stats(rt: &tokio::runtime::Runtime, b: &mut criterion::Bencher<'_>, d: Duration) {
229+
b.to_async(rt).iter_custom(|iters| {
230+
// It is important to create the tt instance here to ensure the cache is not shared across
231+
// iterations.
232+
let tt = TurboTasks::new(TurboTasksBackend::new(
233+
BackendOptions {
234+
storage_mode: None,
235+
..Default::default()
236+
},
237+
noop_backing_storage(),
238+
));
239+
let stats = tt.task_statistics().enable().clone();
240+
241+
async move {
242+
tt.run_once(async move {
243+
for i in 0..iters {
244+
black_box(busy_turbo(i, black_box(d)).await?);
245+
}
246+
Ok(stats.get(&BUSY_TURBO_FUNCTION).duration)
247+
})
248+
.await
249+
.unwrap()
250+
}
251+
});
252+
}

turbopack/crates/turbo-tasks-backend/src/backend/mod.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,14 @@ impl<B: BackingStorage> TurboTasksBackendInner<B> {
388388
self.task_statistics
389389
.map(|stats| stats.increment_cache_miss(task_type.native_fn));
390390
}
391+
392+
fn track_task_duration(&self, task_id: TaskId, duration: std::time::Duration) {
393+
self.task_statistics.map(|stats| {
394+
if let Some(task_type) = self.task_cache.lookup_reverse(&task_id) {
395+
stats.increment_execution_duration(task_type.native_fn, duration);
396+
}
397+
});
398+
}
391399
}
392400

393401
pub(crate) struct OperationGuard<'a, B: BackingStorage> {
@@ -1690,7 +1698,7 @@ impl<B: BackingStorage> TurboTasksBackendInner<B> {
16901698
fn task_execution_completed(
16911699
&self,
16921700
task_id: TaskId,
1693-
_duration: Duration,
1701+
duration: Duration,
16941702
_memory_usage: usize,
16951703
cell_counters: &AutoMap<ValueTypeId, u32, BuildHasherDefault<FxHasher>, 8>,
16961704
stateful: bool,
@@ -1708,12 +1716,14 @@ impl<B: BackingStorage> TurboTasksBackendInner<B> {
17081716
// ok, since the dirty flag won't be removed until step 3 and step 4 is only affecting the
17091717
// in-memory representation.
17101718

1711-
// The task might be invalidated during this process, so we need to change the stale flag
1719+
// The task might be invalidated during this process, so we need to check the stale flag
17121720
// at the start of every step.
17131721

17141722
let span = tracing::trace_span!("task execution completed", immutable = Empty).entered();
17151723
let mut ctx = self.execute_context(turbo_tasks);
17161724

1725+
self.track_task_duration(task_id, duration);
1726+
17171727
//// STEP 1 ////
17181728

17191729
let mut task = ctx.task(task_id, TaskDataCategory::All);

turbopack/crates/turbo-tasks-backend/tests/task_statistics.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,16 +254,23 @@ fn enable_stats() {
254254

255255
fn stats_json() -> serde_json::Value {
256256
let tt = turbo_tasks::turbo_tasks();
257-
remove_crate(serde_json::to_value(tt.task_statistics().get()).unwrap())
257+
make_stats_deterministic(serde_json::to_value(tt.task_statistics().get()).unwrap())
258258
}
259259

260260
// Global task identifiers can contain the crate name, remove it to simplify test assertions
261-
fn remove_crate(mut json: serde_json::Value) -> serde_json::Value {
261+
fn make_stats_deterministic(mut json: serde_json::Value) -> serde_json::Value {
262262
static HASH_RE: Lazy<Regex> = Lazy::new(|| Regex::new("^[^:@]+@[^:]+:+").unwrap());
263263
match &mut json {
264264
serde_json::Value::Object(map) => {
265265
let old_map = std::mem::take(map);
266266
for (k, v) in old_map {
267+
// Replace `duration` with a fixed value to simplify test assertions
268+
let mut v = v.clone();
269+
let object = v.as_object_mut().unwrap();
270+
// These are only populated after the task has finalized execution so it racy to
271+
// assert on it.
272+
object.remove("duration");
273+
object.remove("executions");
267274
map.insert(HASH_RE.replace(&k, "").into_owned(), v);
268275
}
269276
}

turbopack/crates/turbo-tasks/src/task_statistics.rs

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,13 @@ impl TaskStatisticsApi {
1919
})
2020
}
2121

22-
pub fn is_enabled(&self) -> bool {
23-
self.inner.get().is_some()
24-
}
25-
2622
// Calls `func` if statistics have been enabled (via
2723
// [`TaskStatisticsApi::enable`]).
2824
pub fn map<T>(&self, func: impl FnOnce(&Arc<TaskStatistics>) -> T) -> Option<T> {
2925
self.get().map(func)
3026
}
3127

32-
// Calls `func` if statistics have been enabled (via
28+
// Returns the statistics if they have been enabled (via
3329
// [`TaskStatisticsApi::enable`]).
3430
pub fn get(&self) -> Option<&Arc<TaskStatistics>> {
3531
self.inner.get()
@@ -50,20 +46,39 @@ impl TaskStatistics {
5046
self.with_task_type_statistics(native_fn, |stats| stats.cache_miss += 1)
5147
}
5248

49+
pub fn increment_execution_duration(
50+
&self,
51+
native_fn: &'static NativeFunction,
52+
duration: std::time::Duration,
53+
) {
54+
self.with_task_type_statistics(native_fn, |stats| {
55+
stats.executions += 1;
56+
stats.duration += duration
57+
})
58+
}
59+
5360
fn with_task_type_statistics(
5461
&self,
5562
native_fn: &'static NativeFunction,
5663
func: impl Fn(&mut TaskFunctionStatistics),
5764
) {
5865
func(self.inner.entry(native_fn).or_default().value_mut())
5966
}
67+
68+
pub fn get(&self, f: &'static NativeFunction) -> TaskFunctionStatistics {
69+
self.inner.get(f).unwrap().value().clone()
70+
}
6071
}
6172

6273
/// Statistics for an individual function.
63-
#[derive(Default, Serialize)]
64-
struct TaskFunctionStatistics {
65-
cache_hit: u32,
66-
cache_miss: u32,
74+
#[derive(Default, Serialize, Clone)]
75+
pub struct TaskFunctionStatistics {
76+
pub cache_hit: u32,
77+
pub cache_miss: u32,
78+
// Generally executions == cache_miss, however they can diverge when there are invalidations.
79+
// The caller gets one cache miss but we might execute multiple times.
80+
pub executions: u32,
81+
pub duration: std::time::Duration,
6782
}
6883

6984
impl Serialize for TaskStatistics {
Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Cache Effectiveness Analysis Script
4+
5+
This script analyzes task statistics to identify which tasks are not getting
6+
significant benefit from caching and would be candidates for removing the
7+
caching layer.
8+
9+
To use this script, run: a build with `NEXT_TURBOPACK_TASK_STATISTICS=path/to/stats.json` set
10+
11+
Then run this script with the path to the stats.json file to get a report on optimization opportunities.
12+
13+
Based on benchmarking data from the `turbopack/crates/turbo-tasks-backend/benches/overhead.rs` benchmark we have the following estimates:
14+
- Cache hit cost: 200-500ns
15+
- Execution overhead: 4-6us
16+
- Measurement overhead: 260ns-750ns
17+
18+
This script assumes the best case scenario and reports on the potential time savings from removing the caching layer.
19+
"""
20+
21+
import json
22+
import sys
23+
from typing import Dict, List, Tuple
24+
from dataclasses import dataclass
25+
26+
27+
@dataclass
28+
class TaskStats:
29+
name: str
30+
cache_hit: int
31+
cache_miss: int
32+
executions: int
33+
duration_ns: int
34+
35+
@property
36+
def total_operations(self) -> int:
37+
return self.cache_hit + self.cache_miss
38+
39+
@property
40+
def cache_hit_rate(self) -> float:
41+
if self.total_operations == 0:
42+
return 0.0
43+
return self.cache_hit / self.total_operations
44+
45+
@property
46+
def avg_execution_time_ns(self) -> int:
47+
MEASUREMENT_OVERHEAD = 750 # OVerhead implicit in the reported duration
48+
if self.executions == 0:
49+
return 0
50+
return max(0, (self.duration_ns - MEASUREMENT_OVERHEAD * self.executions) // self.executions)
51+
52+
53+
def parse_duration(duration_dict: Dict) -> int:
54+
"""Convert duration dict to nanoseconds."""
55+
return duration_dict.get("secs", 0) * 1_000_000_000 + duration_dict.get("nanos", 0)
56+
57+
58+
def load_task_stats(file_path: str) -> List[TaskStats]:
59+
"""Load and parse task statistics from JSON file."""
60+
with open(file_path, 'r') as f:
61+
data = json.load(f)
62+
63+
tasks = []
64+
for task_name, stats in data.items():
65+
duration_ns = parse_duration(stats["duration"])
66+
task = TaskStats(
67+
name=task_name,
68+
cache_hit=stats["cache_hit"],
69+
cache_miss=stats["cache_miss"],
70+
executions=stats["executions"],
71+
duration_ns=duration_ns
72+
)
73+
tasks.append(task)
74+
75+
return tasks
76+
77+
78+
def calculate_cache_effectiveness(task: TaskStats) -> float:
79+
"""
80+
Calculate the effectiveness of caching for a task.
81+
82+
Returns:
83+
Time savings from removing caching (negative means caching is beneficial)
84+
"""
85+
# Constants based on benchmarking
86+
# These are optimistic estimates
87+
CACHE_HIT_COST_NS = 500 # Average of 200-500ns
88+
EXECUTION_OVERHEAD_NS = 6000 # Average of 4-6us (caching layer overhead)
89+
MEASUREMENT_OVERHEAD = 750 # OVerhead implicit in the reported duration
90+
91+
if task.total_operations == 0:
92+
return 0.0
93+
94+
# Current cost with caching
95+
# Cache hits: just the cache lookup cost
96+
# Cache misses: cache overhead + actual execution time
97+
cache_hit_cost = task.cache_hit * CACHE_HIT_COST_NS
98+
cache_miss_cost = task.cache_miss * (EXECUTION_OVERHEAD_NS + task.avg_execution_time_ns)
99+
current_total_cost = cache_hit_cost + cache_miss_cost
100+
101+
# Cost without caching (all operations would be direct executions, no overhead)
102+
no_cache_cost = task.total_operations * task.avg_execution_time_ns
103+
104+
# Time savings from removing caching (positive means we save time by removing cache)
105+
time_savings = current_total_cost - no_cache_cost
106+
107+
return time_savings
108+
109+
110+
def analyze_tasks(tasks: List[TaskStats]) -> List[Tuple[TaskStats, float]]:
111+
"""Analyze all tasks and return sorted by potential time savings."""
112+
results = []
113+
114+
for task in tasks:
115+
results.append((task, calculate_cache_effectiveness(task)))
116+
117+
# Sort by time savings (descending - highest savings first)
118+
results.sort(key=lambda x: x[1], reverse=True)
119+
120+
return results
121+
122+
123+
def format_time(nanoseconds: float) -> str:
124+
"""Format time in appropriate units (ns, μs, ms, s)."""
125+
sign = "-" if nanoseconds < 0 else ""
126+
nanoseconds = abs(nanoseconds)
127+
if nanoseconds >= 1_000_000_000: # >= 1 second
128+
return f"{sign}{nanoseconds / 1_000_000_000:.2f}s"
129+
elif nanoseconds >= 1_000_000: # >= 1 millisecond
130+
return f"{sign}{nanoseconds / 1_000_000:.2f}ms"
131+
elif nanoseconds >= 1_000: # >= 1 microsecond
132+
return f"{sign}{nanoseconds / 1_000:.1f}μs"
133+
else: # nanoseconds
134+
return f"{sign}{nanoseconds:.0f}ns"
135+
136+
137+
def print_analysis(results: List[Tuple[TaskStats, float]]):
138+
"""Print the analysis results."""
139+
print("Tasks ranked by estimated time savings from removing caching layer")
140+
print()
141+
142+
if not results:
143+
print("No tasks would benefit from removing caching.")
144+
return
145+
# Print header
146+
header = (f"{'Savings':<10} {'Hit Rate':<8} {'Exec Time':<10} "
147+
f"{'Operations':<10} {'Task Name'}")
148+
print(header)
149+
print("-" * len(header))
150+
151+
# Print results
152+
for (task, time_savings) in results:
153+
savings_str = format_time(time_savings)
154+
hit_rate_str = f"{task.cache_hit_rate:.1%}"
155+
exec_time_str = format_time(task.avg_execution_time_ns)
156+
operations_str = f"{task.total_operations:,}"
157+
158+
print(f"{savings_str:<10} {hit_rate_str:<8} {exec_time_str:<10} "
159+
f"{operations_str:<10} {task.name}")
160+
161+
# Print summary
162+
total_savings = sum(time_savings if time_savings > 0 else 0 for _, time_savings in results)
163+
print()
164+
print(f"Summary: {sum(1 if time_savings > 0 else 0 for _, time_savings in results)} tasks would benefit from removing caching")
165+
print(f"Total potential savings: {format_time(total_savings)}")
166+
print()
167+
print("Legend:")
168+
print("- Savings: Time saved by removing caching layer")
169+
print("- Hit Rate: Percentage of operations that were cache hits")
170+
print("- Exec Time: Average execution time per operation")
171+
print("- Operations: Total number of cache hits + misses")
172+
173+
174+
175+
def main():
176+
if len(sys.argv) != 2:
177+
print("Usage: python analyze_cache_effectiveness.py <stats-durations.json>")
178+
sys.exit(1)
179+
180+
file_path = sys.argv[1]
181+
182+
try:
183+
tasks = load_task_stats(file_path)
184+
results = analyze_tasks(tasks)
185+
print_analysis(results)
186+
187+
except FileNotFoundError:
188+
print(f"Error: File '{file_path}' not found")
189+
sys.exit(1)
190+
except json.JSONDecodeError as e:
191+
print(f"Error parsing JSON: {e}")
192+
sys.exit(1)
193+
except Exception as e:
194+
print(f"Error: {e}")
195+
sys.exit(1)
196+
197+
198+
if __name__ == "__main__":
199+
main()

0 commit comments

Comments
 (0)