Skip to content

Support sanity checking weight consistency especially for RL #8899

Support sanity checking weight consistency especially for RL

Support sanity checking weight consistency especially for RL #8899

Workflow file for this run

name: PR Test (XPU)
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
workflow_dispatch:
concurrency:
group: pr-test-xpu-${{ github.ref }}
cancel-in-progress: true
jobs:
# ==================== Check Changes ==================== #
check-changes:
runs-on: ubuntu-latest
outputs:
main_package: ${{ steps.filter.outputs.main_package }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Detect file changes
id: filter
uses: dorny/paths-filter@v3
with:
filters: |
main_package:
- "python/sglang/!(multimodal_gen)/**"
- "python/*.toml"
- "scripts/ci/**"
- "test/**"
- "sgl-kernel/**"
- ".github/workflows/pr-test-xpu.yml"
- "docker/xpu.Dockerfile"
# ==================== PR Gate ==================== #
pr-gate:
needs: check-changes
if: needs.check-changes.outputs.main_package == 'true'
uses: ./.github/workflows/pr-gate.yml
secrets: inherit
build-and-test:
needs: [check-changes, pr-gate]
if: needs.check-changes.outputs.main_package == 'true'
runs-on: intel-bmg
env:
HF_HOME: /home/sdp/.cache/huggingface
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build Docker image
run: |
PR_REPO=${{ github.event.pull_request.head.repo.clone_url }}
PR_HEAD_REF=${{ github.head_ref }}
docker build \
${PR_REPO:+--build-arg SG_LANG_REPO=$PR_REPO} \
${PR_HEAD_REF:+--build-arg SG_LANG_BRANCH=$PR_HEAD_REF} \
--no-cache --progress=plain -f docker/xpu.Dockerfile -t xpu_sglang_main:bmg .
- name: Run container
id: start_container
run: |
container_id=$(docker run -dt \
--group-add 992 \
--group-add $(getent group video | cut -d: -f3) \
-v ${HF_HOME}:/root/.cache/huggingface \
--device /dev/dri \
-e HF_TOKEN="$(cat ~/huggingface_token.txt)" \
xpu_sglang_main:bmg)
echo "Started container: $container_id"
echo "container_id=$container_id" >> "$GITHUB_OUTPUT"
- name: Install Dependency
timeout-minutes: 20
run: |
cid="${{ steps.start_container.outputs.container_id }}"
docker exec "$cid" /home/sdp/miniforge3/envs/py3.10/bin/python3 -m pip install --upgrade pip
docker exec "$cid" /home/sdp/miniforge3/envs/py3.10/bin/python3 -m pip install pytest expecttest ray huggingface_hub
docker exec "$cid" /home/sdp/miniforge3/envs/py3.10/bin/python3 -m pip uninstall -y flashinfer-python
docker exec "$cid" /bin/bash -c '/home/sdp/miniforge3/envs/py3.10/bin/huggingface-cli login --token ${HF_TOKEN} '
docker exec -u root "$cid" /bin/bash -c "ln -sf /home/sdp/miniforge3/envs/py3.10/bin/python3 /usr/bin/python3"
- name: Run E2E Bfloat16 tests
timeout-minutes: 20
run: |
cid="${{ steps.start_container.outputs.container_id }}"
docker exec -w /home/sdp/sglang/ "$cid" \
bash -c "LD_LIBRARY_PATH=/home/sdp/miniforge3/envs/py3.10/lib:$LD_LIBRARY_PATH && cd ./test/srt && python3 run_suite.py --suite per-commit-xpu"
- name: Cleanup container
if: always()
run: |
cid="${{ steps.start_container.outputs.container_id }}"
docker rm -f "$cid" || true
finish:
if: always()
needs: [build-and-test, pr-gate]
runs-on: ubuntu-latest
steps:
- name: Check job status
run: |
result="${{ needs.build-and-test.result }}"
if [ "$result" != "success" ] && [ "$result" != "skipped" ]; then
echo "Job failed with result: $result"
exit 1
fi
echo "All jobs completed successfully (result: $result)"
exit 0