Skip to content

Pstjohn/llama3 lingua fixes (#1364) #1456

Pstjohn/llama3 lingua fixes (#1364)

Pstjohn/llama3 lingua fixes (#1364) #1456

# BioNeMo Framework CI Workflow
#
# This workflow runs comprehensive tests for the BioNeMo framework on various triggers:
#
# TRIGGERS:
# - Push to main branch, pull-request branches, or dependabot branches
# - Merge group events (when PRs are merged via merge queue)
# - Scheduled runs (daily at 7 AM UTC)
#
# WORKFLOW OVERVIEW:
# 1. changed-files: Detects which files have changed compared to main branch
# - Tracks changes in: All files (**) except:
# * bionemo-recipes/** (recipes have separate workflow)
# * **.md (markdown documentation files)
# * .github/** (GitHub configuration, except this workflow file)
# * .gitignore, .devcontainer/**
# * ci/scripts/recipes_local_test.py
# - Includes: .github/workflows/unit-tests-framework.yml (this workflow file)
# 2. pre-commit: Runs static code checks and linting
# 3. get-pr-labels: Retrieves PR labels for conditional job execution
# 4. build-bionemo-image: Builds Docker image (conditional on triggers/labels)
# 5. run-tests: Runs unit tests (when image build succeeds)
# 6. run-tests-slow: Runs slow tests (when image succeeds + ciflow:slow label OR schedule/merge_group/ciflow:all)
# 7. run-tests-notebooks: Runs notebook tests (when image succeeds + ciflow:notebooks label OR schedule/merge_group/ciflow:all)
# 8. verify-tests-status: Verifies all test jobs completed successfully
#
# CONDITIONAL EXECUTION:
# - build-bionemo-image runs on: schedule, ciflow:all label, (no ciflow:skip + modified files), (merge_group + modified files)
# - run-tests runs when: build-bionemo-image succeeds
# - run-tests-slow runs when: build-bionemo-image succeeds AND (schedule OR merge_group OR ciflow:all OR ciflow:slow)
# - run-tests-notebooks runs when: build-bionemo-image succeeds AND (schedule OR merge_group OR ciflow:all OR ciflow:notebooks)
name: "BioNeMo Framework CI"
on:
push:
branches:
- main
- "pull-request/[0-9]+"
- "dependabot/**"
merge_group:
types: [checks_requested]
schedule:
- cron: "0 7 * * *" # Runs at 7 AM UTC daily (12 AM MST)
defaults:
run:
shell: bash -x -e -u -o pipefail {0}
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
changed-files:
runs-on: ubuntu-latest
outputs:
any_changed: ${{ steps.changed-files.outputs.any_changed }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: "recursive"
- name: Get merge-base commit
id: merge-base
run: |
# Get the merge-base between current branch and main
MERGE_BASE=$(git merge-base HEAD origin/main)
echo "merge-base=$MERGE_BASE" >> $GITHUB_OUTPUT
echo "Merge-base commit: $MERGE_BASE"
- uses: step-security/changed-files@v46
id: changed-files
with:
base_sha: ${{ steps.merge-base.outputs.merge-base }}
files: |
**
!bionemo-recipes/**
!**.md
!.github/**
!.gitignore
!.devcontainer/**
!ci/scripts/recipes_local_test.py
!ci/scripts/check_copied_files.py
.github/workflows/unit-tests-framework.yml
- name: Show output
run: |
echo '${{ toJSON(steps.changed-files.outputs) }}'
shell:
bash
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/setup-python@v5
with:
python-version: "3.13"
- name: Setup UV
uses: astral-sh/setup-uv@v6
with:
enable-cache: true
- run: |
uv tool install pre-commit --with pre-commit-uv --force-reinstall
uv tool install tach>=0.9.0
uv tool update-shell
- run: ./ci/scripts/static_checks.sh
# With copy-pr-bot, we need to get the PR labels from the PR API rather than from the event metadata.
get-pr-labels:
runs-on: ubuntu-latest
outputs:
labels: ${{ steps.get-labels.outputs.labels || steps.get-labels-empty.outputs.labels }}
steps:
- name: Get PR number from branch
if: startsWith(github.ref, 'refs/heads/pull-request/')
id: get-pr-num
run: |
PR_NUM=$(echo ${{ github.ref_name }} | grep -oE '[0-9]+$')
echo "pr_num=$PR_NUM" >> $GITHUB_OUTPUT
- name: Get PR labels
id: get-labels
if: startsWith(github.ref, 'refs/heads/pull-request/')
env:
GH_TOKEN: ${{ github.token }}
run: |
LABELS=$(gh api repos/${{ github.repository }}/pulls/${{ steps.get-pr-num.outputs.pr_num }} --jq '[.labels[].name]' || echo "[]")
echo "labels=$LABELS" >> $GITHUB_OUTPUT
echo "Retrieved labels: $LABELS"
- name: Set empty labels for non-PR branches
if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
id: get-labels-empty
run: |
echo "labels=[]" >> $GITHUB_OUTPUT
echo "Set empty labels for non-PR branch"
build-bionemo-image:
needs:
- pre-commit
- get-pr-labels
- changed-files
runs-on: linux-amd64-cpu16
if: |
(github.event_name == 'schedule') ||
contains(fromJSON(needs.get-pr-labels.outputs.labels || '[]'), 'ciflow:all') ||
(
!contains(fromJSON(needs.get-pr-labels.outputs.labels || '[]'), 'ciflow:skip') &&
(needs.changed-files.outputs.any_changed == 'true')
) ||
(
(github.event_name == 'merge_group') &&
(needs.changed-files.outputs.any_changed == 'true')
)
steps:
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ vars.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Checkout repository
uses: actions/checkout@v4
with:
submodules: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Docker Metadata
id: metadata
uses: docker/metadata-action@v5
with:
images: svcbionemo023/bionemo-framework
tags: |
type=schedule
type=ref,event=branch
type=ref,event=tag
type=ref,event=pr
type=raw,value=${{ github.run_id }}
# This action sets up our cache-from and cache-to flags appropriately; see the README of this action for more
# info. It doesn't seem to cache correctly for merge_group events, so we need to add that as an extra argument in
# the step below. There's probably a slight optimization to be had here by caching from the pr- caches for
# merge_group events. See https://github.com/int128/docker-build-cache-config-action/issues/1005 for more info.
- uses: int128/docker-build-cache-config-action@v1
id: cache
with:
image: svcbionemo023/bionemo-build-cache
- name: Build and push
uses: docker/build-push-action@v5
with:
push: true
tags: ${{ steps.metadata.outputs.tags }}
labels: ${{ steps.metadata.outputs.labels }}
cache-from: |
${{ steps.cache.outputs.cache-from }}
type=registry,ref=svcbionemo023/bionemo-build-cache:main
cache-to: ${{ steps.cache.outputs.cache-to }}
run-tests:
needs:
- build-bionemo-image
- get-pr-labels
runs-on: linux-amd64-gpu-l4-latest-1
container:
image: svcbionemo023/bionemo-framework:${{ github.run_id }}
credentials:
username: ${{ vars.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
if: needs.build-bionemo-image.result == 'success'
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Run tests
# Tests in this stage generate code coverage metrics for the repository
# Coverage data is uploaded to Codecov in subsequent stages
env:
BIONEMO_DATA_SOURCE: ngc
run: |
chmod +x ./ci/scripts/run_pytest_unittests.sh
./ci/scripts/run_pytest_unittests.sh
- name: Upload coverage to Codecov
# Don't run coverage on merge queue or nightly CI to avoid duplicating reports
# to codecov. See https://github.com/matplotlib/napari-matplotlib/issues/155
if: github.event_name != 'merge_group' && github.event_name != 'schedule'
uses: codecov/codecov-action@v5
with:
token: ${{ secrets.CODECOV_TOKEN }}
- name: Upload test results to Codecov
# Don't run coverage on merge queue or nightly CI to avoid duplicating reports
# to codecov. See https://github.com/matplotlib/napari-matplotlib/issues/155
if: ${{ !cancelled() && github.event_name != 'merge_group' && github.event_name != 'schedule' }}
uses: codecov/test-results-action@v1
with:
token: ${{ secrets.CODECOV_TOKEN }}
run-tests-slow:
needs:
- build-bionemo-image
- get-pr-labels
runs-on: linux-amd64-gpu-l4-latest-1
container:
image: svcbionemo023/bionemo-framework:${{ github.run_id }}
credentials:
username: ${{ vars.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
if: |
(needs.build-bionemo-image.result == 'success') &&
(
(github.event_name == 'schedule') ||
(github.event_name == 'merge_group') ||
contains(fromJSON(needs.get-pr-labels.outputs.labels || '[]'), 'ciflow:all') ||
contains(fromJSON(needs.get-pr-labels.outputs.labels || '[]'), 'ciflow:slow')
)
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Run slow tests
env:
BIONEMO_DATA_SOURCE: ngc
# Not every sub-package has slow tests, and since some sub-packages have tests under the same name we need
# to run package by package like we do with the fast tests.
run: |
chmod +x ./ci/scripts/run_pytest_slow.sh
./ci/scripts/run_pytest_slow.sh
run-tests-notebooks:
needs:
- build-bionemo-image
- get-pr-labels
runs-on: linux-amd64-gpu-l4-latest-1
if: |
(needs.build-bionemo-image.result == 'success') &&
(
(github.event_name == 'schedule') ||
(github.event_name == 'merge_group') ||
contains(fromJSON(needs.get-pr-labels.outputs.labels || '[]'), 'ciflow:all') ||
contains(fromJSON(needs.get-pr-labels.outputs.labels || '[]'), 'ciflow:notebooks')
)
container:
image: svcbionemo023/bionemo-framework:${{ github.run_id }}
credentials:
username: ${{ vars.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Run notebook tests
env:
BIONEMO_DATA_SOURCE: ngc
run: |
chmod +x ./ci/scripts/run_pytest_notebooks.sh
./ci/scripts/run_pytest_notebooks.sh
verify-tests-status:
# Base on the status of this job, the unit-tests workflow succeeds or fails
# This steps checks the status of all test jobs and fails if any of them failed or were cancelled.
# It is a workaround for the lack of a built-in feature to finalize a pipeline by checking the status of multiple jobs
needs: # List all your run-*-test jobs
- pre-commit
- get-pr-labels
- build-bionemo-image
- run-tests
- run-tests-slow
- run-tests-notebooks
# Add all other run-*-test jobs
runs-on: ubuntu-latest
if: always() # This ensures the job runs even if previous jobs fail
steps:
- name: Check test job statuses
run: |
if [[ "${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
echo "Some test jobs have failed or been cancelled!"
exit 1
else
echo "All test jobs have completed successfully or been skipped!"
exit 0
fi