Only run agent evals on request

ConradIrwin · ConradIrwin · commit d2f9a06c4f88 · 2025-11-10T15:39:34.000-07:00
Also, maybe pass model param?
diff --git a/.github/workflows/run_agent_evals.yml b/.github/workflows/run_agent_evals.yml
@@ -10,15 +10,6 @@ env:
   ZED_EVAL_TELEMETRY: '1'
   MODEL_NAME: ${{ inputs.model_name }}
 on:
-  pull_request:
-    types:
-    - synchronize
-    - reopened
-    - labeled
-    branches:
-    - '**'
-  schedule:
-  - cron: 0 0 * * *
   workflow_dispatch:
     inputs:
       model_name:
@@ -27,9 +18,6 @@ on:
         type: string
 jobs:
   agent_evals:
-    if: |
-      github.repository_owner == 'zed-industries' &&
-      (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-eval'))
     runs-on: namespace-profile-16x32-ubuntu-2204
     steps:
     - name: steps::checkout_repo
@@ -58,7 +46,7 @@ jobs:
       run: cargo build --package=eval
       shell: bash -euxo pipefail {0}
     - name: run_agent_evals::agent_evals::run_eval
-      run: cargo run --package=eval -- --repetitions=8 --concurrency=1
+      run: cargo run --package=eval -- --repetitions=8 --concurrency=1 --model ${MODEL_NAME}
       shell: bash -euxo pipefail {0}
     - name: steps::cleanup_cargo_config
       if: always()
@@ -69,3 +57,4 @@ jobs:
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
   cancel-in-progress: true
+timeout-minutes: 720
diff --git a/tooling/xtask/src/tasks/workflows/run_agent_evals.rs b/tooling/xtask/src/tasks/workflows/run_agent_evals.rs
@@ -1,7 +1,4 @@
-use gh_workflow::{
-    Event, Expression, Job, PullRequest, PullRequestType, Run, Schedule, Step, Use, Workflow,
-    WorkflowDispatch,
-};
+use gh_workflow::{Event, Expression, Job, Run, Schedule, Step, Use, Workflow, WorkflowDispatch};
 
 use crate::tasks::workflows::{
     runners::{self, Platform},
@@ -14,16 +11,10 @@ pub(crate) fn run_agent_evals() -> Workflow {
     let model_name = Input::string("model_name", None);
 
     named::workflow()
-        .on(Event::default()
-            .schedule([Schedule::default().cron("0 0 * * *")])
-            .pull_request(PullRequest::default().add_branch("**").types([
-                PullRequestType::Synchronize,
-                PullRequestType::Reopened,
-                PullRequestType::Labeled,
-            ]))
-            .workflow_dispatch(
-                WorkflowDispatch::default().add_input(model_name.name, model_name.input()),
-            ))
+        .timeout_minutes(12u32 * 60)
+        .on(Event::default().workflow_dispatch(
+            WorkflowDispatch::default().add_input(model_name.name, model_name.input()),
+        ))
         .concurrency(vars::one_workflow_per_non_main_branch())
         .add_env(("CARGO_TERM_COLOR", "always"))
         .add_env(("CARGO_INCREMENTAL", 0))
@@ -37,15 +28,13 @@ pub(crate) fn run_agent_evals() -> Workflow {
 
 fn agent_evals() -> NamedJob {
     fn run_eval() -> Step<Run> {
-        named::bash("cargo run --package=eval -- --repetitions=8 --concurrency=1")
+        named::bash(
+            "cargo run --package=eval -- --repetitions=8 --concurrency=1 --model ${MODEL_NAME}",
+        )
     }
 
     named::job(
         Job::default()
-            .cond(Expression::new(indoc::indoc!{r#"
-                github.repository_owner == 'zed-industries' &&
-                (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-eval'))
-            "#}))
             .runs_on(runners::LINUX_DEFAULT)
             .timeout_minutes(60_u32)
             .add_step(steps::checkout_repo())
@@ -54,7 +43,7 @@ fn agent_evals() -> NamedJob {
             .add_step(setup_cargo_config(Platform::Linux))
             .add_step(steps::script("cargo build --package=eval"))
             .add_step(run_eval())
-            .add_step(steps::cleanup_cargo_config(Platform::Linux))
+            .add_step(steps::cleanup_cargo_config(Platform::Linux)),
     )
 }