Skip to content

Commit d2f9a06

Browse files
committed
Only run agent evals on request
Also, maybe pass model param?
1 parent 40d1902 commit d2f9a06

File tree

2 files changed

+11
-33
lines changed

2 files changed

+11
-33
lines changed

.github/workflows/run_agent_evals.yml

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,6 @@ env:
1010
ZED_EVAL_TELEMETRY: '1'
1111
MODEL_NAME: ${{ inputs.model_name }}
1212
on:
13-
pull_request:
14-
types:
15-
- synchronize
16-
- reopened
17-
- labeled
18-
branches:
19-
- '**'
20-
schedule:
21-
- cron: 0 0 * * *
2213
workflow_dispatch:
2314
inputs:
2415
model_name:
@@ -27,9 +18,6 @@ on:
2718
type: string
2819
jobs:
2920
agent_evals:
30-
if: |
31-
github.repository_owner == 'zed-industries' &&
32-
(github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-eval'))
3321
runs-on: namespace-profile-16x32-ubuntu-2204
3422
steps:
3523
- name: steps::checkout_repo
@@ -58,7 +46,7 @@ jobs:
5846
run: cargo build --package=eval
5947
shell: bash -euxo pipefail {0}
6048
- name: run_agent_evals::agent_evals::run_eval
61-
run: cargo run --package=eval -- --repetitions=8 --concurrency=1
49+
run: cargo run --package=eval -- --repetitions=8 --concurrency=1 --model ${MODEL_NAME}
6250
shell: bash -euxo pipefail {0}
6351
- name: steps::cleanup_cargo_config
6452
if: always()
@@ -69,3 +57,4 @@ jobs:
6957
concurrency:
7058
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
7159
cancel-in-progress: true
60+
timeout-minutes: 720

tooling/xtask/src/tasks/workflows/run_agent_evals.rs

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
use gh_workflow::{
2-
Event, Expression, Job, PullRequest, PullRequestType, Run, Schedule, Step, Use, Workflow,
3-
WorkflowDispatch,
4-
};
1+
use gh_workflow::{Event, Expression, Job, Run, Schedule, Step, Use, Workflow, WorkflowDispatch};
52

63
use crate::tasks::workflows::{
74
runners::{self, Platform},
@@ -14,16 +11,10 @@ pub(crate) fn run_agent_evals() -> Workflow {
1411
let model_name = Input::string("model_name", None);
1512

1613
named::workflow()
17-
.on(Event::default()
18-
.schedule([Schedule::default().cron("0 0 * * *")])
19-
.pull_request(PullRequest::default().add_branch("**").types([
20-
PullRequestType::Synchronize,
21-
PullRequestType::Reopened,
22-
PullRequestType::Labeled,
23-
]))
24-
.workflow_dispatch(
25-
WorkflowDispatch::default().add_input(model_name.name, model_name.input()),
26-
))
14+
.timeout_minutes(12u32 * 60)
15+
.on(Event::default().workflow_dispatch(
16+
WorkflowDispatch::default().add_input(model_name.name, model_name.input()),
17+
))
2718
.concurrency(vars::one_workflow_per_non_main_branch())
2819
.add_env(("CARGO_TERM_COLOR", "always"))
2920
.add_env(("CARGO_INCREMENTAL", 0))
@@ -37,15 +28,13 @@ pub(crate) fn run_agent_evals() -> Workflow {
3728

3829
fn agent_evals() -> NamedJob {
3930
fn run_eval() -> Step<Run> {
40-
named::bash("cargo run --package=eval -- --repetitions=8 --concurrency=1")
31+
named::bash(
32+
"cargo run --package=eval -- --repetitions=8 --concurrency=1 --model ${MODEL_NAME}",
33+
)
4134
}
4235

4336
named::job(
4437
Job::default()
45-
.cond(Expression::new(indoc::indoc!{r#"
46-
github.repository_owner == 'zed-industries' &&
47-
(github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-eval'))
48-
"#}))
4938
.runs_on(runners::LINUX_DEFAULT)
5039
.timeout_minutes(60_u32)
5140
.add_step(steps::checkout_repo())
@@ -54,7 +43,7 @@ fn agent_evals() -> NamedJob {
5443
.add_step(setup_cargo_config(Platform::Linux))
5544
.add_step(steps::script("cargo build --package=eval"))
5645
.add_step(run_eval())
57-
.add_step(steps::cleanup_cargo_config(Platform::Linux))
46+
.add_step(steps::cleanup_cargo_config(Platform::Linux)),
5847
)
5948
}
6049

0 commit comments

Comments
 (0)