7676 with :
7777 aws_default_region : ${{ secrets.AWS_DEFAULT_REGION }}
7878 aws_account_id : ${{ secrets.AWS_ACCOUNT_ID }}
79+ azure_acr_hostname : ${{ secrets.AZURE_ACR_HOSTNAME }}
80+ azure_acr_user : ${{ secrets.AZURE_ACR_USER }}
81+ azure_acr_password : ${{ secrets.AZURE_ACR_PASSWORD }}
7982 - name : Linter
8083 shell : bash
8184 env :
@@ -416,6 +419,7 @@ jobs:
416419 export KUBECONFIG=$(pwd)/.kubeconfig
417420 kubectl config set-context --current --namespace=$NAMESPACE
418421 - name : Run Fault Tolerance Tests
422+ id : run-ft-tests
419423 run : |
420424 set -x
421425 export KUBECONFIG=$(pwd)/.kubeconfig
@@ -437,14 +441,49 @@ jobs:
437441 pip install -r container/deps/requirements.test.txt
438442 pip install kubernetes==32.0.1 kubernetes_asyncio kr8s pyyaml requests tabulate pydantic
439443
440- # Run the pytest command (tests orchestrate K8s, don't need dynamo package)
444+ # Create test-results directory
445+ mkdir -p test-results
446+
447+ # Run the pytest command with JUnit XML output
448+ set +e # Don't exit on test failures
441449 pytest tests/fault_tolerance/deploy/test_deployment.py \
442450 -m 'k8s and fault_tolerance' \
443451 -k '${{ matrix.framework.test_scenario }}' \
444452 -s -v \
445453 --namespace ${NAMESPACE} \
446454 --image ${IMAGE} \
447- --client-type legacy
455+ --client-type legacy \
456+ --junitxml=test-results/pytest_ft_report.xml \
457+ --tb=short
458+
459+ TEST_EXIT_CODE=$?
460+ echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> $GITHUB_ENV
461+ echo "🧪 Fault tolerance tests completed with exit code: ${TEST_EXIT_CODE}"
462+
463+ exit ${TEST_EXIT_CODE}
464+ continue-on-error : true
465+
466+ - name : Process Fault Tolerance Test Results
467+ if : always()
468+ run : |
469+ set -x
470+
471+ # Rename JUnit XML with unique naming if it exists
472+ if [ -f "test-results/pytest_ft_report.xml" ]; then
473+ mv "test-results/pytest_ft_report.xml" "test-results/pytest_ft_report_${{ matrix.framework.name }}_amd64_${{ github.run_id }}_${{ job.check_run_id }}.xml"
474+ echo "✅ JUnit XML report renamed with unique identifier"
475+ else
476+ echo "⚠️ JUnit XML report not found"
477+ fi
478+
479+ - name : Upload Fault Tolerance Test Results
480+ uses : actions/upload-artifact@v4
481+ if : always()
482+ with :
483+ name : test-results-${{ matrix.framework.name }}-fault_tolerance-amd64-${{ github.run_id }}-${{ job.check_run_id }}
484+ path : test-results/pytest_ft_report_${{ matrix.framework.name }}_amd64_${{ github.run_id }}_${{ job.check_run_id }}.xml
485+ retention-days : 7
486+
448487 - name : Cleanup
449488 if : always()
450489 timeout-minutes : 5
@@ -468,56 +507,6 @@ jobs:
468507 kubectl delete namespace $NAMESPACE || true
469508 echo "Namespace $NAMESPACE completed."
470509
471- # Upload metrics for this workflow and all its jobs
472- upload-workflow-metrics :
473- name : Upload Workflow Metrics
474- runs-on : gitlab
475- if : always() # Always run, even if other jobs fail
476- needs : [backend-status-check] # Wait for the status check which waits for all build jobs
477-
478- steps :
479- - name : Check out repository
480- uses : actions/checkout@v4
481-
482- - name : Set up Python
483- uses : actions/setup-python@v4
484- with :
485- python-version : ' 3.x'
486-
487- - name : Install dependencies
488- run : |
489- python -m pip install --upgrade pip
490- pip install requests
491-
492- - name : Download build metrics
493- uses : actions/download-artifact@v4
494- with :
495- pattern : build-metrics-*
496- path : build-metrics/
497- merge-multiple : true
498- continue-on-error : true # Don't fail if artifacts don't exist
499-
500- - name : Download test results
501- uses : actions/download-artifact@v4
502- with :
503- pattern : test-results-*
504- path : test-results/
505- merge-multiple : true
506- continue-on-error : true # Don't fail if artifacts don't exist
507-
508- - name : Upload Complete Workflow Metrics
509- env :
510- GITHUB_TOKEN : ${{ secrets.GITHUB_TOKEN }}
511- WORKFLOW_INDEX : ${{ secrets.WORKFLOW_INDEX }}
512- JOB_INDEX : ${{ secrets.JOB_INDEX }}
513- STEPS_INDEX : ${{ secrets.STEPS_INDEX }}
514- # Container and test index configuration
515- CONTAINER_INDEX : ${{ secrets.CONTAINER_INDEX }}
516- TEST_INDEX : ${{ secrets.TEST_INDEX }}
517- run : |
518- # Upload complete workflow metrics including container metrics
519- python3 .github/workflows/upload_complete_workflow_metrics.py
520-
521510 deploy-operator :
522511 runs-on : cpu-amd-m5-2xlarge
523512 # TODO: Uncomment this when we have a way to test the deploy-operator job in CI.
@@ -637,13 +626,17 @@ jobs:
637626 kubectl config set-context --current --namespace=$NAMESPACE --kubeconfig "${KUBECONFIG}"
638627 kubectl config get-contexts
639628 - name : Run Tests
629+ id : run-tests
640630 env :
641631 NAMESPACE : ${{ needs.deploy-operator.outputs.NAMESPACE }}
642632 run : |
643633 set -x
644634 export KUBECONFIG=$(pwd)/.kubeconfig
645635 kubectl config set-context --current --namespace=$NAMESPACE
646636
637+ # Redirect all output to a log file while still showing it
638+ exec > >(tee -a test-output.log) 2>&1
639+
647640 cd examples/backends/$FRAMEWORK
648641 export FRAMEWORK_RUNTIME_IMAGE="${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-${FRAMEWORK}-amd64"
649642 export KUBE_NS=$NAMESPACE
@@ -736,6 +729,32 @@ jobs:
736729 echo "Test passed: Response matches expected format and content"
737730 fi
738731 exit $TEST_RESULT
732+ continue-on-error : true
733+
734+ - name : Process Deployment Test Results
735+ if : always()
736+ run : |
737+ set -x
738+
739+ # Create test-results directory
740+ mkdir -p test-results
741+
742+ # Copy and rename the test output log with unique naming
743+ if [ -f "test-output.log" ]; then
744+ cp test-output.log "test-results/deploy_test_output_${{ env.FRAMEWORK }}_${{ matrix.profile }}_amd64_${{ github.run_id }}_${{ job.check_run_id }}.log"
745+ echo "✅ Test output log copied to test-results/"
746+ else
747+ echo "⚠️ test-output.log not found"
748+ fi
749+
750+ - name : Upload Deployment Test Results
751+ uses : actions/upload-artifact@v4
752+ if : always()
753+ with :
754+ name : test-results-${{ env.FRAMEWORK }}-deploy-${{ matrix.profile }}-amd64-${{ github.run_id }}-${{ job.check_run_id }}
755+ path : test-results/deploy_test_output_${{ env.FRAMEWORK }}_${{ matrix.profile }}_amd64_${{ github.run_id }}_${{ job.check_run_id }}.log
756+ retention-days : 7
757+
739758 - name : Cleanup
740759 if : always()
741760 timeout-minutes : 5
0 commit comments