name: Run Agent Eval on: schedule: - cron: "0 0 * * *" pull_request: branches: - "**" types: [synchronize, reopened, labeled] workflow_dispatch: concurrency: # Allow only one workflow per any non-`main` branch. group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }} cancel-in-progress: true env: CARGO_TERM_COLOR: always CARGO_INCREMENTAL: 0 RUST_BACKTRACE: 1 ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }} ZED_EVAL_TELEMETRY: 1 jobs: run_eval: timeout-minutes: 60 name: Run Agent Eval if: > github.repository_owner == 'zed-industries' && (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-eval')) runs-on: - namespace-profile-16x32-ubuntu-2204 steps: - name: Add Rust to the PATH run: echo "$HOME/.cargo/bin" >> "$GITHUB_PATH" - name: Checkout repo uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 with: clean: false - name: Cache dependencies uses: swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2 with: save-if: ${{ github.ref == 'refs/heads/main' }} # cache-provider: "buildjet" - name: Install Linux dependencies run: ./script/linux - name: Configure CI run: | mkdir -p ./../.cargo cp ./.cargo/ci-config.toml ./../.cargo/config.toml - name: Compile eval run: cargo build --package=eval - name: Run eval run: cargo run --package=eval -- --repetitions=8 --concurrency=1 # Even the Linux runner is not stateful, in theory there is no need to do this cleanup. # But, to avoid potential issues in the future if we choose to use a stateful Linux runner and forget to add code # to clean up the config file, I’ve included the cleanup code here as a precaution. # While it’s not strictly necessary at this moment, I believe it’s better to err on the side of caution. - name: Clean CI config file if: always() run: rm -rf ./../.cargo