[turbopack] Track task durations in the task_statistics file (vercel#83522)

lukesandberg · web-flow · commit 7cc4990f49f5 · 2025-09-18T16:33:29.000-07:00
# Track task execution duration in TaskStatistics

## What?
This PR adds tracking of task execution duration in Turbopack's task system, enabling better performance analysis and optimization.

## Why?
Understanding how long tasks take to execute helps identify optimization opportunities, especially for determining whether caching is beneficial for specific tasks.

## How?
- Added `track_task_duration` method to record execution time for tasks
- Updated task statistics to track execution count and duration
- Added a Python script `analyze_cache_effectiveness.py` to identify tasks that would benefit from removing caching
- Updated tests to account for the new statistics fields

Sample output from the script
```
Tasks ranked by estimated time savings from removing caching layer

Savings    Hit Rate Exec Time  Operations Task Name
---------------------------------------------------
2.33s      39.5%    765ns      661,176    turbopack-ecmascript@turbopack_ecmascript::references::esm::base::EsmAssetReference::ChunkableModuleReference::chunking_type
2.29s      18.5%    1.6μs      490,488    turbopack-ecmascript@turbopack_ecmascript::references::esm::base::EsmAssetReference::ChunkableModuleReference::export_usage
1.99s      9.0%     9.8μs      430,149    turbopack@turbopack::ModuleAssetContext::AssetContext::resolve_asset
1.17s      51.7%    1.2μs      462,164    turbopack-ecmascript@turbopack_ecmascript::EcmascriptModuleAsset::ResolveOrigin::get_inner_asset
1.10s      54.1%    1.2μs      462,387    turbopack-core@turbopack_core::resolve::ModuleResolveResult::is_unresolvable
916.01ms   0.0%     19.2μs     152,669    turbopack@turbopack::apply_module_type
807.37ms   74.5%    1.1μs      722,106    turbopack-ecmascript@turbopack_ecmascript::references::esm::base::ReferencedAsset::from_resolve_result
782.16ms   69.7%    1.5μs      680,828    turbopack-core@turbopack_core::resolve::ModuleResolveResult::primary_modules
749.54ms   4.0%     80ns       129,625    turbopack-core@turbopack_core::ident::AssetIdent::new_inner
717.59ms   94.5%    5ns        887,040    turbopack-ecmascript@turbopack_ecmascript::EcmascriptModuleAsset::ResolveOrigin::asset_context
522.31ms   30.2%    1.7μs      136,180    turbopack-core@turbopack_core::resolve::ResolveResult::is_unresolvable
452.88ms   0.0%     5.2μs      75,484     next-core@next_core::next_server::resolve::ExternalCjsModulesResolvePlugin::AfterResolvePlugin::after_resolve
415.54ms   45.2%    937ns      134,377    turbopack-core@turbopack_core::resolve::pattern::Pattern::new_internal
388.03ms   0.0%     191.1μs    64,672     turbopack-ecmascript@turbopack_ecmascript::parse::parse
```

The script analyzes task statistics to find tasks where the overhead of caching exceeds the benefit, providing recommendations for optimization based on execution patterns.  It leverages data from the overhead.rs benchmark which is also enhanced to provide an estimate on the delta between the measured duration and the actual duration.

## Conclusions?

There are a few items of low hanging fruit but the real issue is `trait` items.  We need to provide more flexibility to `value_trait` items to make it possible to have non-turbotask items that are `async`
diff --git a/turbopack/crates/turbo-tasks-backend/benches/overhead.rs b/turbopack/crates/turbo-tasks-backend/benches/overhead.rs
@@ -3,7 +3,7 @@ use std::time::{Duration, Instant};
 use criterion::{BenchmarkId, Criterion, black_box};
 use futures::{FutureExt, StreamExt, stream::FuturesUnordered};
 use tokio::spawn;
-use turbo_tasks::TurboTasks;
+use turbo_tasks::{TurboTasks, TurboTasksApi};
 use turbo_tasks_backend::{BackendOptions, TurboTasksBackend, noop_backing_storage};
 
 #[global_allocator]
@@ -79,6 +79,15 @@ pub fn overhead(c: &mut Criterion) {
                 run_turbo::<Uncached>(&rt, b, d, false);
             },
         );
+        // Same as turbo-uncached but reports the time as measured by turbotasks itself
+        // This allows us to understand the cost of the indirection within turbotasks
+        group.bench_with_input(
+            BenchmarkId::new("turbo-uncached-stats", micros),
+            &duration,
+            |b, &d| {
+                run_turbo_stats(&rt, b, d);
+            },
+        );
 
         group.bench_with_input(
             BenchmarkId::new("turbo-cached-same-keys", micros),
@@ -215,3 +224,29 @@ fn run_turbo<Mode: TurboMode>(
         }
     });
 }
+
+fn run_turbo_stats(rt: &tokio::runtime::Runtime, b: &mut criterion::Bencher<'_>, d: Duration) {
+    b.to_async(rt).iter_custom(|iters| {
+        // It is important to create the tt instance here to ensure the cache is not shared across
+        // iterations.
+        let tt = TurboTasks::new(TurboTasksBackend::new(
+            BackendOptions {
+                storage_mode: None,
+                ..Default::default()
+            },
+            noop_backing_storage(),
+        ));
+        let stats = tt.task_statistics().enable().clone();
+
+        async move {
+            tt.run_once(async move {
+                for i in 0..iters {
+                    black_box(busy_turbo(i, black_box(d)).await?);
+                }
+                Ok(stats.get(&BUSY_TURBO_FUNCTION).duration)
+            })
+            .await
+            .unwrap()
+        }
+    });
+}
diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs
@@ -388,6 +388,14 @@ impl<B: BackingStorage> TurboTasksBackendInner<B> {
         self.task_statistics
             .map(|stats| stats.increment_cache_miss(task_type.native_fn));
     }
+
+    fn track_task_duration(&self, task_id: TaskId, duration: std::time::Duration) {
+        self.task_statistics.map(|stats| {
+            if let Some(task_type) = self.task_cache.lookup_reverse(&task_id) {
+                stats.increment_execution_duration(task_type.native_fn, duration);
+            }
+        });
+    }
 }
 
 pub(crate) struct OperationGuard<'a, B: BackingStorage> {
@@ -1690,7 +1698,7 @@ impl<B: BackingStorage> TurboTasksBackendInner<B> {
     fn task_execution_completed(
         &self,
         task_id: TaskId,
-        _duration: Duration,
+        duration: Duration,
         _memory_usage: usize,
         cell_counters: &AutoMap<ValueTypeId, u32, BuildHasherDefault<FxHasher>, 8>,
         stateful: bool,
@@ -1708,12 +1716,14 @@ impl<B: BackingStorage> TurboTasksBackendInner<B> {
         // ok, since the dirty flag won't be removed until step 3 and step 4 is only affecting the
         // in-memory representation.
 
-        // The task might be invalidated during this process, so we need to change the stale flag
+        // The task might be invalidated during this process, so we need to check the stale flag
         // at the start of every step.
 
         let span = tracing::trace_span!("task execution completed", immutable = Empty).entered();
         let mut ctx = self.execute_context(turbo_tasks);
 
+        self.track_task_duration(task_id, duration);
+
         //// STEP 1 ////
 
         let mut task = ctx.task(task_id, TaskDataCategory::All);
diff --git a/turbopack/crates/turbo-tasks-backend/tests/task_statistics.rs b/turbopack/crates/turbo-tasks-backend/tests/task_statistics.rs
@@ -254,16 +254,23 @@ fn enable_stats() {
 
 fn stats_json() -> serde_json::Value {
     let tt = turbo_tasks::turbo_tasks();
-    remove_crate(serde_json::to_value(tt.task_statistics().get()).unwrap())
+    make_stats_deterministic(serde_json::to_value(tt.task_statistics().get()).unwrap())
 }
 
 // Global task identifiers can contain the crate name, remove it to simplify test assertions
-fn remove_crate(mut json: serde_json::Value) -> serde_json::Value {
+fn make_stats_deterministic(mut json: serde_json::Value) -> serde_json::Value {
     static HASH_RE: Lazy<Regex> = Lazy::new(|| Regex::new("^[^:@]+@[^:]+:+").unwrap());
     match &mut json {
         serde_json::Value::Object(map) => {
             let old_map = std::mem::take(map);
             for (k, v) in old_map {
+                // Replace `duration` with a fixed value to simplify test assertions
+                let mut v = v.clone();
+                let object = v.as_object_mut().unwrap();
+                // These are only populated after the task has finalized execution so it racy to
+                // assert on it.
+                object.remove("duration");
+                object.remove("executions");
                 map.insert(HASH_RE.replace(&k, "").into_owned(), v);
             }
         }
diff --git a/turbopack/crates/turbo-tasks/src/task_statistics.rs b/turbopack/crates/turbo-tasks/src/task_statistics.rs
@@ -19,17 +19,13 @@ impl TaskStatisticsApi {
         })
     }
 
-    pub fn is_enabled(&self) -> bool {
-        self.inner.get().is_some()
-    }
-
     // Calls `func` if statistics have been enabled (via
     // [`TaskStatisticsApi::enable`]).
     pub fn map<T>(&self, func: impl FnOnce(&Arc<TaskStatistics>) -> T) -> Option<T> {
         self.get().map(func)
     }
 
-    // Calls `func` if statistics have been enabled (via
+    // Returns the statistics if they have been enabled (via
     // [`TaskStatisticsApi::enable`]).
     pub fn get(&self) -> Option<&Arc<TaskStatistics>> {
         self.inner.get()
@@ -50,20 +46,39 @@ impl TaskStatistics {
         self.with_task_type_statistics(native_fn, |stats| stats.cache_miss += 1)
     }
 
+    pub fn increment_execution_duration(
+        &self,
+        native_fn: &'static NativeFunction,
+        duration: std::time::Duration,
+    ) {
+        self.with_task_type_statistics(native_fn, |stats| {
+            stats.executions += 1;
+            stats.duration += duration
+        })
+    }
+
     fn with_task_type_statistics(
         &self,
         native_fn: &'static NativeFunction,
         func: impl Fn(&mut TaskFunctionStatistics),
     ) {
         func(self.inner.entry(native_fn).or_default().value_mut())
     }
+
+    pub fn get(&self, f: &'static NativeFunction) -> TaskFunctionStatistics {
+        self.inner.get(f).unwrap().value().clone()
+    }
 }
 
 /// Statistics for an individual function.
-#[derive(Default, Serialize)]
-struct TaskFunctionStatistics {
-    cache_hit: u32,
-    cache_miss: u32,
+#[derive(Default, Serialize, Clone)]
+pub struct TaskFunctionStatistics {
+    pub cache_hit: u32,
+    pub cache_miss: u32,
+    // Generally executions == cache_miss, however they can diverge when there are invalidations.
+    // The caller gets one cache miss but we might execute multiple times.
+    pub executions: u32,
+    pub duration: std::time::Duration,
 }
 
 impl Serialize for TaskStatistics {
diff --git a/turbopack/scripts/analyze_cache_effectiveness.py b/turbopack/scripts/analyze_cache_effectiveness.py
@@ -0,0 +1,199 @@
+#!/usr/bin/env python3
+"""
+Cache Effectiveness Analysis Script
+
+This script analyzes task statistics to identify which tasks are not getting
+significant benefit from caching and would be candidates for removing the
+caching layer.
+
+To use this script, run: a build with `NEXT_TURBOPACK_TASK_STATISTICS=path/to/stats.json` set
+
+Then run this script with the path to the stats.json file to get a report on optimization opportunities.
+
+Based on benchmarking data from the `turbopack/crates/turbo-tasks-backend/benches/overhead.rs` benchmark we have the following estimates:
+- Cache hit cost: 200-500ns
+- Execution overhead: 4-6us
+- Measurement overhead: 260ns-750ns
+
+This script assumes the best case scenario and reports on the potential time savings from removing the caching layer.
+"""
+
+import json
+import sys
+from typing import Dict, List, Tuple
+from dataclasses import dataclass
+
+
+@dataclass
+class TaskStats:
+    name: str
+    cache_hit: int
+    cache_miss: int
+    executions: int
+    duration_ns: int
+
+    @property
+    def total_operations(self) -> int:
+        return self.cache_hit + self.cache_miss
+
+    @property
+    def cache_hit_rate(self) -> float:
+        if self.total_operations == 0:
+            return 0.0
+        return self.cache_hit / self.total_operations
+
+    @property
+    def avg_execution_time_ns(self) -> int:
+        MEASUREMENT_OVERHEAD =   750 # OVerhead implicit in the reported duration
+        if self.executions == 0:
+            return 0
+        return max(0, (self.duration_ns  - MEASUREMENT_OVERHEAD * self.executions) // self.executions)
+
+
+def parse_duration(duration_dict: Dict) -> int:
+    """Convert duration dict to nanoseconds."""
+    return duration_dict.get("secs", 0) * 1_000_000_000 + duration_dict.get("nanos", 0)
+
+
+def load_task_stats(file_path: str) -> List[TaskStats]:
+    """Load and parse task statistics from JSON file."""
+    with open(file_path, 'r') as f:
+        data = json.load(f)
+
+    tasks = []
+    for task_name, stats in data.items():
+        duration_ns = parse_duration(stats["duration"])
+        task = TaskStats(
+            name=task_name,
+            cache_hit=stats["cache_hit"],
+            cache_miss=stats["cache_miss"],
+            executions=stats["executions"],
+            duration_ns=duration_ns
+        )
+        tasks.append(task)
+
+    return tasks
+
+
+def calculate_cache_effectiveness(task: TaskStats) -> float:
+    """
+    Calculate the effectiveness of caching for a task.
+
+    Returns:
+        Time savings from removing caching (negative means caching is beneficial)
+    """
+    # Constants based on benchmarking
+    # These are optimistic estimates
+    CACHE_HIT_COST_NS = 500  # Average of 200-500ns
+    EXECUTION_OVERHEAD_NS = 6000  # Average of 4-6us (caching layer overhead)
+    MEASUREMENT_OVERHEAD =   750 # OVerhead implicit in the reported duration
+
+    if task.total_operations == 0:
+        return 0.0
+
+    # Current cost with caching
+    # Cache hits: just the cache lookup cost
+    # Cache misses: cache overhead + actual execution time
+    cache_hit_cost = task.cache_hit * CACHE_HIT_COST_NS
+    cache_miss_cost = task.cache_miss * (EXECUTION_OVERHEAD_NS + task.avg_execution_time_ns)
+    current_total_cost = cache_hit_cost + cache_miss_cost
+
+    # Cost without caching (all operations would be direct executions, no overhead)
+    no_cache_cost = task.total_operations * task.avg_execution_time_ns
+
+    # Time savings from removing caching (positive means we save time by removing cache)
+    time_savings = current_total_cost - no_cache_cost
+
+    return time_savings
+
+
+def analyze_tasks(tasks: List[TaskStats]) -> List[Tuple[TaskStats, float]]:
+    """Analyze all tasks and return sorted by potential time savings."""
+    results = []
+
+    for task in tasks:
+        results.append((task, calculate_cache_effectiveness(task)))
+
+    # Sort by time savings (descending - highest savings first)
+    results.sort(key=lambda x: x[1], reverse=True)
+
+    return results
+
+
+def format_time(nanoseconds: float) -> str:
+    """Format time in appropriate units (ns, μs, ms, s)."""
+    sign = "-" if nanoseconds < 0 else ""
+    nanoseconds = abs(nanoseconds)
+    if nanoseconds >= 1_000_000_000:  # >= 1 second
+        return f"{sign}{nanoseconds / 1_000_000_000:.2f}s"
+    elif nanoseconds >= 1_000_000:  # >= 1 millisecond
+        return f"{sign}{nanoseconds / 1_000_000:.2f}ms"
+    elif nanoseconds >= 1_000:  # >= 1 microsecond
+        return f"{sign}{nanoseconds / 1_000:.1f}μs"
+    else:  # nanoseconds
+        return f"{sign}{nanoseconds:.0f}ns"
+
+
+def print_analysis(results: List[Tuple[TaskStats, float]]):
+    """Print the analysis results."""
+    print("Tasks ranked by estimated time savings from removing caching layer")
+    print()
+
+    if not results:
+        print("No tasks would benefit from removing caching.")
+        return
+    # Print header
+    header = (f"{'Savings':<10} {'Hit Rate':<8} {'Exec Time':<10} "
+             f"{'Operations':<10} {'Task Name'}")
+    print(header)
+    print("-" * len(header))
+
+    # Print results
+    for (task, time_savings) in results:
+        savings_str = format_time(time_savings)
+        hit_rate_str = f"{task.cache_hit_rate:.1%}"
+        exec_time_str = format_time(task.avg_execution_time_ns)
+        operations_str = f"{task.total_operations:,}"
+
+        print(f"{savings_str:<10} {hit_rate_str:<8} {exec_time_str:<10} "
+              f"{operations_str:<10} {task.name}")
+
+    # Print summary
+    total_savings = sum(time_savings if time_savings > 0 else 0 for _, time_savings in results)
+    print()
+    print(f"Summary: {sum(1 if time_savings > 0 else 0 for _, time_savings in results)} tasks would benefit from removing caching")
+    print(f"Total potential savings: {format_time(total_savings)}")
+    print()
+    print("Legend:")
+    print("- Savings: Time saved by removing caching layer")
+    print("- Hit Rate: Percentage of operations that were cache hits")
+    print("- Exec Time: Average execution time per operation")
+    print("- Operations: Total number of cache hits + misses")
+
+
+
+def main():
+    if len(sys.argv) != 2:
+        print("Usage: python analyze_cache_effectiveness.py <stats-durations.json>")
+        sys.exit(1)
+
+    file_path = sys.argv[1]
+
+    try:
+        tasks = load_task_stats(file_path)
+        results = analyze_tasks(tasks)
+        print_analysis(results)
+
+    except FileNotFoundError:
+        print(f"Error: File '{file_path}' not found")
+        sys.exit(1)
+    except json.JSONDecodeError as e:
+        print(f"Error parsing JSON: {e}")
+        sys.exit(1)
+    except Exception as e:
+        print(f"Error: {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()