Skip to content

Commit 7c21500

Browse files
authored
Merge branch 'main' into fix/sglang-multimodal-worker-registration
2 parents 44a18bd + 44e8600 commit 7c21500

File tree

227 files changed

+13050
-2388
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

227 files changed

+13050
-2388
lines changed

.github/actions/docker-build/action.yml

Lines changed: 114 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ runs:
6262
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 #v3.11.1
6363
with:
6464
driver: docker
65+
# Enable BuildKit for enhanced metadata
66+
buildkitd-flags: --debug
6567
- name: Login to ECR
6668
shell: bash
6769
env:
@@ -88,6 +90,8 @@ runs:
8890
AWS_ACCESS_KEY_ID: ${{ inputs.aws_access_key_id }}
8991
AWS_SECRET_ACCESS_KEY: ${{ inputs.aws_secret_access_key }}
9092
PLATFORM: ${{ inputs.platform }}
93+
GITHUB_RUN_ID: ${{ github.run_id }}
94+
GITHUB_JOB: ${{ github.job }}
9195
run: |
9296
# Determine image tag
9397
if [ -n "${{ inputs.image_tag }}" ]; then
@@ -97,10 +101,16 @@ runs:
97101
fi
98102
99103
BUILD_START_TIME=$(date -u +%Y-%m-%dT%H:%M:%SZ)
100-
echo "🕐 Build started at: ${BUILD_START_TIME}"
101104
echo "BUILD_START_TIME=${BUILD_START_TIME}" >> $GITHUB_ENV
102105
103106
echo "image_tag=$IMAGE_TAG" >> $GITHUB_OUTPUT
107+
108+
# Create build logs directory
109+
mkdir -p build-logs
110+
BUILD_LOG_FILE="build-logs/build-${{ inputs.framework }}-$(echo '${{ inputs.platform }}' | sed 's/linux\///').log"
111+
echo "BUILD_LOG_FILE=${BUILD_LOG_FILE}" >> $GITHUB_ENV
112+
echo "📝 Build log will be saved to: ${BUILD_LOG_FILE}"
113+
104114
# Collect optional overrides provided by the workflow
105115
EXTRA_ARGS=""
106116
if [ -n "${{ inputs.base_image_tag }}" ]; then
@@ -116,24 +126,28 @@ runs:
116126
EXTRA_ARGS+=" --build-arg TORCH_BACKEND=${{ inputs.torch_backend }}"
117127
fi
118128
129+
# Execute build and capture output (show on console AND save to file)
119130
./container/build.sh --tag "$IMAGE_TAG" \
120131
--target ${{ inputs.target }} \
121132
--vllm-max-jobs 10 \
122133
--framework ${{ inputs.framework }} \
123134
--platform ${{ inputs.platform }} \
124135
--use-sccache \
125136
--sccache-bucket "$SCCACHE_S3_BUCKET" \
126-
--sccache-region "$AWS_DEFAULT_REGION" $EXTRA_ARGS
137+
--sccache-region "$AWS_DEFAULT_REGION" $EXTRA_ARGS 2>&1 | tee "${BUILD_LOG_FILE}"
138+
139+
BUILD_EXIT_CODE=${PIPESTATUS[0]}
127140
128141
BUILD_END_TIME=$(date -u +%Y-%m-%dT%H:%M:%SZ)
129-
echo "🕐 Build ended at: ${BUILD_END_TIME}"
130142
echo "BUILD_END_TIME=${BUILD_END_TIME}" >> $GITHUB_ENV
131143
144+
# Exit with the build's exit code
145+
exit ${BUILD_EXIT_CODE}
146+
132147
- name: Capture Build Metrics
133148
id: metrics
134149
shell: bash
135150
run: |
136-
echo "📊 Capturing build metrics for ${{ inputs.framework }}..."
137151
138152
# Create metrics directory
139153
mkdir -p build-metrics
@@ -162,21 +176,17 @@ runs:
162176
echo "⚠️ No image tag available"
163177
fi
164178
165-
echo "📊 Final metrics captured"
166-
167-
# Create consolidated metrics JSON file
168-
echo "🔍 Debug: inputs.platform = '${{ inputs.platform }}'"
169179
PLATFORM_ARCH=$(echo "${{ inputs.platform }}" | sed 's/linux\///')
170-
echo "🔍 Debug: PLATFORM_ARCH = '${PLATFORM_ARCH}'"
180+
echo " Architecture: ${PLATFORM_ARCH}"
171181
echo "PLATFORM_ARCH=${PLATFORM_ARCH}" >> $GITHUB_ENV
172182
JOB_KEY="${{ inputs.framework }}-${PLATFORM_ARCH}"
173-
echo "🔍 Debug: JOB_KEY = '${JOB_KEY}'"
183+
echo " Job Key: ${JOB_KEY}"
174184
175185
# Create job-specific metrics file
176186
mkdir -p build-metrics
177187
METRICS_FILE="build-metrics/metrics-${{ inputs.framework }}-${PLATFORM_ARCH}-${{ github.run_id }}-${{ job.check_run_id }}.json"
178188
179-
# Create the job metrics file directly
189+
# Create the job metrics file
180190
cat > "$METRICS_FILE" << EOF
181191
{
182192
"framework": "${{ inputs.framework }}",
@@ -190,15 +200,103 @@ runs:
190200
}
191201
EOF
192202
193-
echo "📁 Created build metrics file for ${JOB_KEY}:"
194203
cat "$METRICS_FILE"
195204
196-
# Metrics captured and saved to JSON file
205+
- name: Generate Comprehensive Build Metrics
206+
id: comprehensive-metrics
207+
if: always()
208+
shell: bash
209+
run: |
210+
echo "=========================================="
211+
echo "📊 GENERATING COMPREHENSIVE BUILD METRICS"
212+
echo "=========================================="
213+
214+
# Create metrics directory
215+
mkdir -p build-metrics
216+
217+
PLATFORM_ARCH="${{ env.PLATFORM_ARCH }}"
218+
WORKFLOW_ID="${{ github.run_id }}"
219+
JOB_ID="${{ job.check_run_id }}"
220+
FRAMEWORK_LOWER=$(echo "${{ inputs.framework }}" | tr '[:upper:]' '[:lower:]')
221+
222+
# Make parser executable
223+
chmod +x .github/scripts/parse_buildkit_output.py
224+
225+
# Check for build logs and build stage arguments dynamically
226+
BASE_BUILD_LOG="build-logs/base-image-build.log"
227+
FRAMEWORK_BUILD_LOG="build-logs/framework-${FRAMEWORK_LOWER}-build.log"
228+
229+
# Path to container metadata created in previous step
230+
CONTAINER_METADATA="build-metrics/metrics-${{ inputs.framework }}-${PLATFORM_ARCH}-${WORKFLOW_ID}-${JOB_ID}.json"
231+
232+
# Output single comprehensive JSON with all build stages
233+
COMPREHENSIVE_JSON="build-metrics/build-${{ inputs.framework }}-${PLATFORM_ARCH}-${WORKFLOW_ID}-${JOB_ID}.json"
234+
235+
echo "🚀 Parsing BuildKit outputs and merging with container metrics..."
197236
198-
# Upload job-specific build metrics as artifact
199-
- name: Upload Build Metrics
237+
# Build stage arguments dynamically based on which logs exist
238+
STAGE_ARGS=()
239+
240+
if [ -f "$BASE_BUILD_LOG" ]; then
241+
echo " ✓ Found base image log: ${BASE_BUILD_LOG}"
242+
STAGE_ARGS+=("base:${BASE_BUILD_LOG}")
243+
else
244+
echo " ℹ️ No base image log found"
245+
fi
246+
247+
if [ -f "$FRAMEWORK_BUILD_LOG" ]; then
248+
echo " ✓ Found framework log: ${FRAMEWORK_BUILD_LOG}"
249+
STAGE_ARGS+=("runtime:${FRAMEWORK_BUILD_LOG}")
250+
else
251+
echo " ℹ️ No framework log found"
252+
fi
253+
254+
# Check for any additional stage logs (e.g., build-logs/stage3-*.log)
255+
for extra_log in build-logs/stage*.log; do
256+
if [ -f "$extra_log" ]; then
257+
stage_name=$(basename "$extra_log" .log)
258+
echo " ✓ Found additional stage log: ${extra_log} (${stage_name})"
259+
STAGE_ARGS+=("${stage_name}:${extra_log}")
260+
fi
261+
done
262+
263+
echo "Container Metadata: ${CONTAINER_METADATA}"
264+
echo "Output: ${COMPREHENSIVE_JSON}"
265+
echo ""
266+
267+
# Run parser with all discovered stages
268+
# Usage: parse_buildkit_output.py <output_json> <stage1_name:log_file> [stage2_name:log_file] ... [--metadata=<file>]
269+
set +e
270+
python3 .github/scripts/parse_buildkit_output.py \
271+
"$COMPREHENSIVE_JSON" \
272+
"${STAGE_ARGS[@]}" \
273+
"--metadata=${CONTAINER_METADATA}"
274+
PARSER_EXIT_CODE=$?
275+
set -e
276+
277+
echo ""
278+
echo "📊 Parser exit code: ${PARSER_EXIT_CODE}"
279+
280+
if [ ${PARSER_EXIT_CODE} -eq 0 ] && [ -f "$COMPREHENSIVE_JSON" ]; then
281+
echo "✅ Comprehensive build metrics generated successfully"
282+
echo "📄 Output file: ${COMPREHENSIVE_JSON}"
283+
echo ""
284+
echo "=========================================="
285+
echo "📋 FULL JSON OUTPUT (for debugging)"
286+
echo "=========================================="
287+
cat "$COMPREHENSIVE_JSON"
288+
echo ""
289+
echo "=========================================="
290+
else
291+
echo "⚠️ Metrics generation had issues but continuing..."
292+
fi
293+
294+
# Upload comprehensive build metrics as artifact
295+
- name: Upload Comprehensive Build Metrics
200296
uses: actions/upload-artifact@v4
297+
if: always()
201298
with:
202299
name: build-metrics-${{ inputs.framework }}-${{ env.PLATFORM_ARCH }}-${{ github.run_id }}-${{ job.check_run_id }}
203-
path: build-metrics/metrics-${{ inputs.framework }}-${{ env.PLATFORM_ARCH }}-${{ github.run_id }}-${{ job.check_run_id }}.json
300+
path: build-metrics/build-${{ inputs.framework }}-${{ env.PLATFORM_ARCH }}-${{ github.run_id }}-${{ job.check_run_id }}.json
204301
retention-days: 7
302+

.github/actions/pytest/action.yml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,16 @@ runs:
5454
# Run pytest with detailed output and JUnit XML
5555
set +e # Don't exit on test failures
5656
57-
docker run --runtime=nvidia --gpus all -w /workspace \
57+
# Detect GPU availability and conditionally add GPU flags
58+
GPU_FLAGS=""
59+
if command -v nvidia-smi &> /dev/null && nvidia-smi &> /dev/null; then
60+
echo "GPU detected, enabling GPU runtime"
61+
GPU_FLAGS="--runtime=nvidia --gpus all"
62+
else
63+
echo "No GPU detected, running in CPU-only mode"
64+
fi
65+
66+
docker run ${GPU_FLAGS} --rm -w /workspace \
5867
--cpus=${NUM_CPUS} \
5968
--network host \
6069
--name ${{ env.CONTAINER_ID }}_pytest \

0 commit comments

Comments
 (0)