eval: Add eval unit tests as a CI job (#32152)
We run the unit evals once a day in the middle of the night, and trigger a Slack post if it fails. Release Notes: - N/A --------- Co-authored-by: Oleksiy Syvokon <oleksiy.syvokon@gmail.com>
This commit is contained in:
parent
fa9da6ad5b
commit
dda614091a
2 changed files with 86 additions and 1 deletions
85
.github/workflows/unit_evals.yml
vendored
Normal file
85
.github/workflows/unit_evals.yml
vendored
Normal file
|
@ -0,0 +1,85 @@
|
|||
name: Run Unit Evals
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# GitHub might drop jobs at busy times, so we choose a random time in the middle of the night.
|
||||
- cron: "47 1 * * *"
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
# Allow only one workflow per any non-`main` branch.
|
||||
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
CARGO_INCREMENTAL: 0
|
||||
RUST_BACKTRACE: 1
|
||||
ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
|
||||
|
||||
jobs:
|
||||
unit_evals:
|
||||
timeout-minutes: 60
|
||||
name: Run unit evals
|
||||
runs-on:
|
||||
- buildjet-16vcpu-ubuntu-2204
|
||||
steps:
|
||||
- name: Add Rust to the PATH
|
||||
run: echo "$HOME/.cargo/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
|
||||
with:
|
||||
clean: false
|
||||
|
||||
- name: Cache dependencies
|
||||
uses: swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2
|
||||
with:
|
||||
save-if: ${{ github.ref == 'refs/heads/main' }}
|
||||
cache-provider: "buildjet"
|
||||
|
||||
- name: Install Linux dependencies
|
||||
run: ./script/linux
|
||||
|
||||
- name: Configure CI
|
||||
run: |
|
||||
mkdir -p ./../.cargo
|
||||
cp ./.cargo/ci-config.toml ./../.cargo/config.toml
|
||||
|
||||
- name: Install Rust
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
cargo install cargo-nextest --locked
|
||||
|
||||
- name: Install Node
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||
with:
|
||||
node-version: "18"
|
||||
|
||||
- name: Limit target directory size
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: script/clear-target-dir-if-larger-than 100
|
||||
|
||||
- name: Run unit evals
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: cargo nextest run --workspace --no-fail-fast --features eval --no-capture -E 'test(::eval_)' --test-threads 1
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
|
||||
- name: Send the pull request link into the Slack channel
|
||||
if: ${{ failure() }}
|
||||
uses: slackapi/slack-github-action@b0fa283ad8fea605de13dc3f449259339835fc52
|
||||
with:
|
||||
method: chat.postMessage
|
||||
token: ${{ secrets.SLACK_APP_ZED_UNIT_EVALS_BOT_TOKEN }}
|
||||
payload: |
|
||||
channel: C04UDRNNJFQ
|
||||
text: "Unit Evals Failed: https://github.com/zed-industries/zed/actions/runs/${{ github.run_id }}"
|
||||
|
||||
# Even the Linux runner is not stateful, in theory there is no need to do this cleanup.
|
||||
# But, to avoid potential issues in the future if we choose to use a stateful Linux runner and forget to add code
|
||||
# to clean up the config file, I’ve included the cleanup code here as a precaution.
|
||||
# While it’s not strictly necessary at this moment, I believe it’s better to err on the side of caution.
|
||||
- name: Clean CI config file
|
||||
if: always()
|
||||
run: rm -rf ./../.cargo
|
Loading…
Add table
Add a link
Reference in a new issue