From f0113d6c10340786f4bb419dce9c2457cfcfc6e5 Mon Sep 17 00:00:00 2001 From: Alexander Poddubny Date: Fri, 6 Mar 2026 14:36:23 -0800 Subject: [PATCH 01/13] Adding docker image caching --- .github/actions/docker-build/action.yml | 91 ++++++++++++----- .github/workflows/build.yml | 130 ++++++++++++++++++------ .github/workflows/postmerge-ci.yml | 14 ++- 3 files changed, 174 insertions(+), 61 deletions(-) diff --git a/.github/actions/docker-build/action.yml b/.github/actions/docker-build/action.yml index 4595d47fd033..8f7086898ef7 100644 --- a/.github/actions/docker-build/action.yml +++ b/.github/actions/docker-build/action.yml @@ -24,10 +24,25 @@ inputs: description: 'Build context path' default: '.' required: false + push: + description: 'Push the built image to the registry instead of loading it locally' + default: 'false' + required: false + ecr-cache-tags: + description: 'Space-separated ECR image refs to use as registry layer-cache sources (--cache-from type=registry,ref=...). Falls back to GitHub Actions cache when empty.' + default: '' + required: false + ecr-write-cache-tag: + description: 'ECR image ref to write the build cache to (--cache-to type=registry,ref=...,mode=max). Falls back to GitHub Actions cache when empty.' + default: '' + required: false runs: using: composite steps: + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: NGC Login shell: sh run: | @@ -44,35 +59,55 @@ runs: - name: Build Docker Image shell: sh run: | - # Function to build Docker image - build_docker_image() { - local image_tag="$1" - local isaacsim_base_image="$2" - local isaacsim_version="$3" - local dockerfile_path="$4" - local context_path="$5" + IMAGE_TAG="${{ inputs.image-tag }}" + PUSH="${{ inputs.push }}" + ECR_CACHE_TAGS="${{ inputs.ecr-cache-tags }}" + ECR_WRITE_CACHE_TAG="${{ inputs.ecr-write-cache-tag }}" - echo "Building Docker image: $image_tag" - echo "Using Dockerfile: $dockerfile_path" - echo "Build context: $context_path" + echo "Building Docker image: $IMAGE_TAG" + echo "Using Dockerfile: ${{ inputs.dockerfile-path }}" + echo "Build context: ${{ inputs.context-path }}" - # Build Docker image - docker buildx build --progress=plain --platform linux/amd64 \ - -t isaac-lab-dev \ - -t $image_tag \ - --build-arg ISAACSIM_BASE_IMAGE_ARG="$isaacsim_base_image" \ - --build-arg ISAACSIM_VERSION_ARG="$isaacsim_version" \ - --build-arg ISAACSIM_ROOT_PATH_ARG=/isaac-sim \ - --build-arg ISAACLAB_PATH_ARG=/workspace/isaaclab \ - --build-arg DOCKER_USER_HOME_ARG=/root \ - --cache-from type=gha \ - --cache-to type=gha,mode=max \ - -f $dockerfile_path \ - --load $context_path + # Assemble --cache-from flags: prefer ECR registry cache, fall back to GHA cache + if [ -n "$ECR_CACHE_TAGS" ]; then + CACHE_FROM_ARGS="" + for tag in $ECR_CACHE_TAGS; do + CACHE_FROM_ARGS="$CACHE_FROM_ARGS --cache-from type=registry,ref=$tag" + done + else + CACHE_FROM_ARGS="--cache-from type=gha" + fi - echo "✅ Docker image built successfully: $image_tag" - docker images | grep isaac-lab-dev - } + # Assemble --cache-to flag: prefer ECR registry cache, fall back to GHA cache + if [ -n "$ECR_WRITE_CACHE_TAG" ]; then + CACHE_TO_ARG="--cache-to type=registry,ref=$ECR_WRITE_CACHE_TAG,mode=max" + else + CACHE_TO_ARG="--cache-to type=gha,mode=max" + fi + + # Assemble output mode: push to registry or load into local daemon + if [ "$PUSH" = "true" ]; then + OUTPUT_ARG="--push" + TAG_ARGS="-t $IMAGE_TAG" + else + OUTPUT_ARG="--load" + TAG_ARGS="-t isaac-lab-dev -t $IMAGE_TAG" + fi + + # shellcheck disable=SC2086 + docker buildx build --progress=plain --platform linux/amd64 \ + $TAG_ARGS \ + --build-arg ISAACSIM_BASE_IMAGE_ARG="${{ inputs.isaacsim-base-image }}" \ + --build-arg ISAACSIM_VERSION_ARG="${{ inputs.isaacsim-version }}" \ + --build-arg ISAACSIM_ROOT_PATH_ARG=/isaac-sim \ + --build-arg ISAACLAB_PATH_ARG=/workspace/isaaclab \ + --build-arg DOCKER_USER_HOME_ARG=/root \ + $CACHE_FROM_ARGS \ + $CACHE_TO_ARG \ + -f "${{ inputs.dockerfile-path }}" \ + $OUTPUT_ARG "${{ inputs.context-path }}" - # Call the function with provided parameters - build_docker_image "${{ inputs.image-tag }}" "${{ inputs.isaacsim-base-image }}" "${{ inputs.isaacsim-version }}" "${{ inputs.dockerfile-path }}" "${{ inputs.context-path }}" + echo "✅ Docker image built successfully: $IMAGE_TAG" + if [ "$PUSH" != "true" ]; then + docker images | grep "isaac-lab-dev" || true + fi diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 28df43417118..ca0e5f1b14c7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,13 +28,20 @@ env: NGC_API_KEY: ${{ secrets.NGC_API_KEY }} ISAACSIM_BASE_IMAGE: nvcr.io/nvidian/isaac-sim #${{ vars.ISAACSIM_BASE_IMAGE || 'nvcr.io/nvidia/isaac-sim' }} ISAACSIM_BASE_VERSION: 'latest-develop' #${{ vars.ISAACSIM_BASE_VERSION || '5.1.0' }} - DOCKER_IMAGE_TAG: isaac-lab-dev:${{ github.event_name == 'pull_request' && format('pr-{0}', github.event.pull_request.number) || github.ref_name }}-${{ github.sha }} jobs: - test-isaaclab-tasks: + # ── Build Phase ────────────────────────────────────────────────────────────── + # Each Dockerfile is built once per pipeline and pushed to ECR with the commit + # SHA as the image tag. All test jobs pull from ECR rather than rebuilding, + # eliminating redundant multi-gigabyte builds. Layer caching is stored in ECR + # with per-branch keys so that feature branches inherit the develop cache on + # first run and build incrementally on subsequent runs. + + build-base-image: runs-on: [self-hosted, gpu] - timeout-minutes: 180 - continue-on-error: true + timeout-minutes: 60 + outputs: + ecr-image-tag: ${{ steps.tags.outputs.ecr_image_tag }} steps: - name: Checkout Code @@ -43,12 +50,85 @@ jobs: fetch-depth: 0 lfs: true - - name: Build Docker Image + - name: Compute ECR image and cache tags + id: tags + run: | + BRANCH=$(echo "${{ github.ref_name }}" | tr '/' '-') + SHA="${{ github.sha }}" + if [ "${{ github.event_name }}" = "pull_request" ]; then + ECR_IMAGE_TAG="${ECR_CACHE_URL}:pr-${{ github.event.pull_request.number }}-${SHA}" + else + ECR_IMAGE_TAG="${ECR_CACHE_URL}:${BRANCH}-${SHA}" + fi + echo "ecr_image_tag=${ECR_IMAGE_TAG}" >> $GITHUB_OUTPUT + echo "ecr_cache_tag=${ECR_CACHE_URL}:cache-base-${BRANCH}" >> $GITHUB_OUTPUT + echo "ecr_cache_fallback=${ECR_CACHE_URL}:cache-base-develop" >> $GITHUB_OUTPUT + + - name: Build and Push Base Docker Image uses: ./.github/actions/docker-build with: - image-tag: ${{ env.DOCKER_IMAGE_TAG }} + image-tag: ${{ steps.tags.outputs.ecr_image_tag }} isaacsim-base-image: ${{ env.ISAACSIM_BASE_IMAGE }} isaacsim-version: ${{ env.ISAACSIM_BASE_VERSION }} + push: 'true' + ecr-cache-tags: "${{ steps.tags.outputs.ecr_cache_tag }} ${{ steps.tags.outputs.ecr_cache_fallback }}" + ecr-write-cache-tag: ${{ steps.tags.outputs.ecr_cache_tag }} + + build-curobo-image: + runs-on: [self-hosted, gpu] + timeout-minutes: 90 + outputs: + ecr-image-tag: ${{ steps.tags.outputs.ecr_image_tag }} + + steps: + - name: Checkout Code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + lfs: true + + - name: Compute ECR image and cache tags + id: tags + run: | + BRANCH=$(echo "${{ github.ref_name }}" | tr '/' '-') + SHA="${{ github.sha }}" + if [ "${{ github.event_name }}" = "pull_request" ]; then + ECR_IMAGE_TAG="${ECR_CACHE_URL}:pr-${{ github.event.pull_request.number }}-${SHA}-curobo" + else + ECR_IMAGE_TAG="${ECR_CACHE_URL}:${BRANCH}-${SHA}-curobo" + fi + echo "ecr_image_tag=${ECR_IMAGE_TAG}" >> $GITHUB_OUTPUT + echo "ecr_cache_tag=${ECR_CACHE_URL}:cache-curobo-${BRANCH}" >> $GITHUB_OUTPUT + echo "ecr_cache_fallback=${ECR_CACHE_URL}:cache-curobo-develop" >> $GITHUB_OUTPUT + + - name: Build and Push cuRobo Docker Image + uses: ./.github/actions/docker-build + with: + image-tag: ${{ steps.tags.outputs.ecr_image_tag }} + isaacsim-base-image: ${{ env.ISAACSIM_BASE_IMAGE }} + isaacsim-version: ${{ env.ISAACSIM_BASE_VERSION }} + dockerfile-path: 'docker/Dockerfile.curobo' + push: 'true' + ecr-cache-tags: "${{ steps.tags.outputs.ecr_cache_tag }} ${{ steps.tags.outputs.ecr_cache_fallback }}" + ecr-write-cache-tag: ${{ steps.tags.outputs.ecr_cache_tag }} + + # ── Test Phase ─────────────────────────────────────────────────────────────── + + test-isaaclab-tasks: + needs: [build-base-image] + runs-on: [self-hosted, gpu] + timeout-minutes: 180 + continue-on-error: true + + steps: + - name: Checkout Code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + lfs: true + + - name: Pull Base Docker Image from ECR + run: docker pull ${{ needs.build-base-image.outputs.ecr-image-tag }} - name: Run IsaacLab Tasks Tests uses: ./.github/actions/run-tests @@ -56,7 +136,7 @@ jobs: test-path: "tools" result-file: "isaaclab-tasks-report.xml" container-name: "isaac-lab-tasks-test-$$" - image-tag: ${{ env.DOCKER_IMAGE_TAG }} + image-tag: ${{ needs.build-base-image.outputs.ecr-image-tag }} pytest-options: "" filter-pattern: "isaaclab_tasks" include-files: "test_multi_agent_environments.py,test_pickplace_stack_environments.py,test_environments.py,test_factory_environments.py,test_environments_training.py,test_cartpole_showcase_environments.py,test_teleop_environments.py" @@ -93,6 +173,7 @@ jobs: fi test-isaaclab-tasks-2: + needs: [build-base-image] runs-on: [self-hosted, gpu] timeout-minutes: 180 continue-on-error: true @@ -104,12 +185,8 @@ jobs: fetch-depth: 0 lfs: true - - name: Build Docker Image - uses: ./.github/actions/docker-build - with: - image-tag: ${{ env.DOCKER_IMAGE_TAG }} - isaacsim-base-image: ${{ env.ISAACSIM_BASE_IMAGE }} - isaacsim-version: ${{ env.ISAACSIM_BASE_VERSION }} + - name: Pull Base Docker Image from ECR + run: docker pull ${{ needs.build-base-image.outputs.ecr-image-tag }} - name: Run IsaacLab Tasks Tests 2 uses: ./.github/actions/run-tests @@ -117,7 +194,7 @@ jobs: test-path: "tools" result-file: "isaaclab-tasks-2-report.xml" container-name: "isaac-lab-tasks-2-test-$$" - image-tag: ${{ env.DOCKER_IMAGE_TAG }} + image-tag: ${{ needs.build-base-image.outputs.ecr-image-tag }} pytest-options: "" filter-pattern: "isaaclab_tasks" include-files: "test_teleop_environments_with_stage_in_memory.py,test_lift_teddy_bear.py,test_environment_determinism.py,test_hydra.py,test_env_cfg_no_forbidden_imports.py,test_rl_device_separation.py,test_cartpole_showcase_environments_with_stage_in_memory.py,test_environments_with_stage_in_memory.py" @@ -154,6 +231,7 @@ jobs: fi test-general: + needs: [build-base-image] runs-on: [self-hosted, gpu] timeout-minutes: 180 @@ -164,12 +242,8 @@ jobs: fetch-depth: 0 lfs: true - - name: Build Docker Image - uses: ./.github/actions/docker-build - with: - image-tag: ${{ env.DOCKER_IMAGE_TAG }} - isaacsim-base-image: ${{ env.ISAACSIM_BASE_IMAGE }} - isaacsim-version: ${{ env.ISAACSIM_BASE_VERSION }} + - name: Pull Base Docker Image from ECR + run: docker pull ${{ needs.build-base-image.outputs.ecr-image-tag }} - name: Run General Tests id: run-general-tests @@ -178,7 +252,7 @@ jobs: test-path: "tools" result-file: "general-tests-report.xml" container-name: "isaac-lab-general-test-$$" - image-tag: ${{ env.DOCKER_IMAGE_TAG }} + image-tag: ${{ needs.build-base-image.outputs.ecr-image-tag }} pytest-options: "" filter-pattern: "not isaaclab_tasks" @@ -214,6 +288,7 @@ jobs: fi test-curobo: + needs: [build-curobo-image] runs-on: [self-hosted, gpu] timeout-minutes: 120 continue-on-error: true @@ -225,13 +300,8 @@ jobs: fetch-depth: 0 lfs: true - - name: Build Docker Image (cuRobo) - uses: ./.github/actions/docker-build - with: - image-tag: ${{ env.DOCKER_IMAGE_TAG }}-curobo - isaacsim-base-image: ${{ env.ISAACSIM_BASE_IMAGE }} - isaacsim-version: ${{ env.ISAACSIM_BASE_VERSION }} - dockerfile-path: 'docker/Dockerfile.curobo' + - name: Pull cuRobo Docker Image from ECR + run: docker pull ${{ needs.build-curobo-image.outputs.ecr-image-tag }} - name: Run cuRobo and SkillGen Tests uses: ./.github/actions/run-tests @@ -239,7 +309,7 @@ jobs: test-path: "tools" result-file: "curobo-tests-report.xml" container-name: "isaac-lab-curobo-test-$$" - image-tag: ${{ env.DOCKER_IMAGE_TAG }}-curobo + image-tag: ${{ needs.build-curobo-image.outputs.ecr-image-tag }} pytest-options: "" filter-pattern: "" curobo-only: "true" @@ -277,7 +347,7 @@ jobs: combine-results: needs: [test-isaaclab-tasks, test-isaaclab-tasks-2, test-general, test-curobo] - runs-on: [self-hosted, gpu] + runs-on: [self-hosted] if: always() steps: diff --git a/.github/workflows/postmerge-ci.yml b/.github/workflows/postmerge-ci.yml index e19613cd9dd3..71800e0a582c 100644 --- a/.github/workflows/postmerge-ci.yml +++ b/.github/workflows/postmerge-ci.yml @@ -93,6 +93,10 @@ jobs: echo "Repository name: $REPO_SHORT_NAME" echo "IsaacSim versions: ${{ env.ISAACSIM_BASE_VERSIONS_STRING }}" + # ECR layer-cache tags: write to branch-specific key, read branch + develop as fallback + ECR_CACHE_WRITE="${ECR_CACHE_URL}:cache-base-${SAFE_BRANCH_NAME}" + ECR_CACHE_READ_FALLBACK="${ECR_CACHE_URL}:cache-base-develop" + # Parse the env variable string into an array IMAGE_BASE_VERSIONS_STRING="${{ env.ISAACSIM_BASE_VERSIONS_STRING }}" # Use set to split the string into positional parameters, then convert to array @@ -149,8 +153,11 @@ jobs: echo "IsaacSim version: $IMAGE_BASE_VERSION" echo "Base image: $BASE_IMAGE_FULL" echo "Target platforms: $BUILD_PLATFORMS" + echo "ECR layer cache: read=${ECR_CACHE_WRITE},${ECR_CACHE_READ_FALLBACK} write=${ECR_CACHE_WRITE}" - # Build Docker image once with both tags for multiple architectures + # Build Docker image once with both tags for multiple architectures. + # Layer cache is read from / written to ECR so subsequent runs and + # parallel PR builds on the same branch skip already-built layers. docker buildx build \ --platform $BUILD_PLATFORMS \ --progress=plain \ @@ -161,8 +168,9 @@ jobs: --build-arg ISAACSIM_ROOT_PATH_ARG=/isaac-sim \ --build-arg ISAACLAB_PATH_ARG=/workspace/isaaclab \ --build-arg DOCKER_USER_HOME_ARG=/root \ - --cache-from type=gha \ - --cache-to type=gha,mode=max \ + --cache-from type=registry,ref=${ECR_CACHE_WRITE} \ + --cache-from type=registry,ref=${ECR_CACHE_READ_FALLBACK} \ + --cache-to type=registry,ref=${ECR_CACHE_WRITE},mode=max \ -f docker/Dockerfile.base \ --push . From 2ca875a82869199a26a45b8f8c8149c5e3451bf8 Mon Sep 17 00:00:00 2001 From: Mikhail Yurasov Date: Fri, 6 Mar 2026 17:20:09 -0800 Subject: [PATCH 02/13] Hardcode ECR endpoint for now --- .github/workflows/build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ca0e5f1b14c7..afb5f570bb8e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,6 +28,7 @@ env: NGC_API_KEY: ${{ secrets.NGC_API_KEY }} ISAACSIM_BASE_IMAGE: nvcr.io/nvidian/isaac-sim #${{ vars.ISAACSIM_BASE_IMAGE || 'nvcr.io/nvidia/isaac-sim' }} ISAACSIM_BASE_VERSION: 'latest-develop' #${{ vars.ISAACSIM_BASE_VERSION || '5.1.0' }} + ECR_CACHE_URL: 968945269301.dkr.ecr.us-west-2.amazonaws.com/gitci-docker-cache jobs: # ── Build Phase ────────────────────────────────────────────────────────────── From f5187511bfeaa0964319814e12ee9a359a929223 Mon Sep 17 00:00:00 2001 From: Mikhail Yurasov Date: Fri, 6 Mar 2026 17:22:00 -0800 Subject: [PATCH 03/13] Add ECR login step --- .github/workflows/build.yml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index afb5f570bb8e..00ede45bf2f7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -51,6 +51,11 @@ jobs: fetch-depth: 0 lfs: true + - name: Login to ECR + run: | + aws ecr get-login-password --region us-west-2 \ + | docker login --username AWS --password-stdin ${{ env.ECR_CACHE_URL }} + - name: Compute ECR image and cache tags id: tags run: | @@ -88,6 +93,11 @@ jobs: fetch-depth: 0 lfs: true + - name: Login to ECR + run: | + aws ecr get-login-password --region us-west-2 \ + | docker login --username AWS --password-stdin ${{ env.ECR_CACHE_URL }} + - name: Compute ECR image and cache tags id: tags run: | @@ -128,6 +138,11 @@ jobs: fetch-depth: 0 lfs: true + - name: Login to ECR + run: | + aws ecr get-login-password --region us-west-2 \ + | docker login --username AWS --password-stdin ${{ env.ECR_CACHE_URL }} + - name: Pull Base Docker Image from ECR run: docker pull ${{ needs.build-base-image.outputs.ecr-image-tag }} @@ -186,6 +201,11 @@ jobs: fetch-depth: 0 lfs: true + - name: Login to ECR + run: | + aws ecr get-login-password --region us-west-2 \ + | docker login --username AWS --password-stdin ${{ env.ECR_CACHE_URL }} + - name: Pull Base Docker Image from ECR run: docker pull ${{ needs.build-base-image.outputs.ecr-image-tag }} @@ -243,6 +263,11 @@ jobs: fetch-depth: 0 lfs: true + - name: Login to ECR + run: | + aws ecr get-login-password --region us-west-2 \ + | docker login --username AWS --password-stdin ${{ env.ECR_CACHE_URL }} + - name: Pull Base Docker Image from ECR run: docker pull ${{ needs.build-base-image.outputs.ecr-image-tag }} @@ -301,6 +326,11 @@ jobs: fetch-depth: 0 lfs: true + - name: Login to ECR + run: | + aws ecr get-login-password --region us-west-2 \ + | docker login --username AWS --password-stdin ${{ env.ECR_CACHE_URL }} + - name: Pull cuRobo Docker Image from ECR run: docker pull ${{ needs.build-curobo-image.outputs.ecr-image-tag }} From 432a16c2303c01dab71d9917e52914915f37da99 Mon Sep 17 00:00:00 2001 From: Mikhail Yurasov Date: Fri, 6 Mar 2026 17:32:15 -0800 Subject: [PATCH 04/13] Add caching explanation --- .github/workflows/build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 00ede45bf2f7..82b4a95e92f5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -56,6 +56,8 @@ jobs: aws ecr get-login-password --region us-west-2 \ | docker login --username AWS --password-stdin ${{ env.ECR_CACHE_URL }} + # Caching: Each branch maintains its own warm cache layer in AWS ECR, + # falling back to "develop"'s cache if no branch cache exists yet. - name: Compute ECR image and cache tags id: tags run: | From d4d1b362bfe698482a2b2ef055fb1eb39810c0f5 Mon Sep 17 00:00:00 2001 From: Mikhail Yurasov Date: Fri, 6 Mar 2026 17:45:11 -0800 Subject: [PATCH 05/13] Test on gpu-int runners --- .github/workflows/build.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 82b4a95e92f5..9e89603208da 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -39,7 +39,7 @@ jobs: # first run and build incrementally on subsequent runs. build-base-image: - runs-on: [self-hosted, gpu] + runs-on: [self-hosted, gpu-int] timeout-minutes: 60 outputs: ecr-image-tag: ${{ steps.tags.outputs.ecr_image_tag }} @@ -83,7 +83,7 @@ jobs: ecr-write-cache-tag: ${{ steps.tags.outputs.ecr_cache_tag }} build-curobo-image: - runs-on: [self-hosted, gpu] + runs-on: [self-hosted, gpu-int] timeout-minutes: 90 outputs: ecr-image-tag: ${{ steps.tags.outputs.ecr_image_tag }} @@ -129,7 +129,7 @@ jobs: test-isaaclab-tasks: needs: [build-base-image] - runs-on: [self-hosted, gpu] + runs-on: [self-hosted, gpu-int] timeout-minutes: 180 continue-on-error: true @@ -192,7 +192,7 @@ jobs: test-isaaclab-tasks-2: needs: [build-base-image] - runs-on: [self-hosted, gpu] + runs-on: [self-hosted, gpu-int] timeout-minutes: 180 continue-on-error: true @@ -255,7 +255,7 @@ jobs: test-general: needs: [build-base-image] - runs-on: [self-hosted, gpu] + runs-on: [self-hosted, gpu-int] timeout-minutes: 180 steps: @@ -317,7 +317,7 @@ jobs: test-curobo: needs: [build-curobo-image] - runs-on: [self-hosted, gpu] + runs-on: [self-hosted, gpu-int] timeout-minutes: 120 continue-on-error: true From d705220ed8015ae219f74473c8f9581ae1717d6a Mon Sep 17 00:00:00 2001 From: Mikhail Yurasov Date: Fri, 6 Mar 2026 17:55:53 -0800 Subject: [PATCH 06/13] Make sure docker config exists --- .github/workflows/build.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9e89603208da..a8b74ad572c5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -53,6 +53,8 @@ jobs: - name: Login to ECR run: | + mkdir -p ~/.docker + echo '{}' > ~/.docker/config.json aws ecr get-login-password --region us-west-2 \ | docker login --username AWS --password-stdin ${{ env.ECR_CACHE_URL }} @@ -97,6 +99,8 @@ jobs: - name: Login to ECR run: | + mkdir -p ~/.docker + echo '{}' > ~/.docker/config.json aws ecr get-login-password --region us-west-2 \ | docker login --username AWS --password-stdin ${{ env.ECR_CACHE_URL }} @@ -142,6 +146,8 @@ jobs: - name: Login to ECR run: | + mkdir -p ~/.docker + echo '{}' > ~/.docker/config.json aws ecr get-login-password --region us-west-2 \ | docker login --username AWS --password-stdin ${{ env.ECR_CACHE_URL }} @@ -205,6 +211,8 @@ jobs: - name: Login to ECR run: | + mkdir -p ~/.docker + echo '{}' > ~/.docker/config.json aws ecr get-login-password --region us-west-2 \ | docker login --username AWS --password-stdin ${{ env.ECR_CACHE_URL }} @@ -267,6 +275,8 @@ jobs: - name: Login to ECR run: | + mkdir -p ~/.docker + echo '{}' > ~/.docker/config.json aws ecr get-login-password --region us-west-2 \ | docker login --username AWS --password-stdin ${{ env.ECR_CACHE_URL }} @@ -330,6 +340,8 @@ jobs: - name: Login to ECR run: | + mkdir -p ~/.docker + echo '{}' > ~/.docker/config.json aws ecr get-login-password --region us-west-2 \ | docker login --username AWS --password-stdin ${{ env.ECR_CACHE_URL }} From 5814e23b75138070375b2da3306f0bb20cf42301 Mon Sep 17 00:00:00 2001 From: Mikhail Yurasov Date: Fri, 6 Mar 2026 21:45:10 -0800 Subject: [PATCH 07/13] Update runner labels from gpu-int to int-gpu --- .github/workflows/build.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a8b74ad572c5..3df70c2e79a5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -39,7 +39,7 @@ jobs: # first run and build incrementally on subsequent runs. build-base-image: - runs-on: [self-hosted, gpu-int] + runs-on: [self-hosted, int-gpu] timeout-minutes: 60 outputs: ecr-image-tag: ${{ steps.tags.outputs.ecr_image_tag }} @@ -85,7 +85,7 @@ jobs: ecr-write-cache-tag: ${{ steps.tags.outputs.ecr_cache_tag }} build-curobo-image: - runs-on: [self-hosted, gpu-int] + runs-on: [self-hosted, int-gpu] timeout-minutes: 90 outputs: ecr-image-tag: ${{ steps.tags.outputs.ecr_image_tag }} @@ -133,7 +133,7 @@ jobs: test-isaaclab-tasks: needs: [build-base-image] - runs-on: [self-hosted, gpu-int] + runs-on: [self-hosted, int-gpu] timeout-minutes: 180 continue-on-error: true @@ -198,7 +198,7 @@ jobs: test-isaaclab-tasks-2: needs: [build-base-image] - runs-on: [self-hosted, gpu-int] + runs-on: [self-hosted, int-gpu] timeout-minutes: 180 continue-on-error: true @@ -263,7 +263,7 @@ jobs: test-general: needs: [build-base-image] - runs-on: [self-hosted, gpu-int] + runs-on: [self-hosted, int-gpu] timeout-minutes: 180 steps: @@ -327,7 +327,7 @@ jobs: test-curobo: needs: [build-curobo-image] - runs-on: [self-hosted, gpu-int] + runs-on: [self-hosted, int-gpu] timeout-minutes: 120 continue-on-error: true From 2eec28ac58f8ef46e1fa6e6a6dbca7f86e2de799 Mon Sep 17 00:00:00 2001 From: Mikhail Yurasov Date: Fri, 6 Mar 2026 21:45:46 -0800 Subject: [PATCH 08/13] Temporarily disable all CI jobs by adding conditional checks --- .github/workflows/check-links.yml | 1 + .github/workflows/daily-compatibility.yml | 7 +++++-- .github/workflows/docs.yaml | 4 +++- .github/workflows/labeler.yml | 1 + .github/workflows/license-check.yaml | 1 + .github/workflows/postmerge-ci.yml | 1 + 6 files changed, 12 insertions(+), 3 deletions(-) diff --git a/.github/workflows/check-links.yml b/.github/workflows/check-links.yml index ac142552172d..446f303b21f0 100644 --- a/.github/workflows/check-links.yml +++ b/.github/workflows/check-links.yml @@ -35,6 +35,7 @@ concurrency: jobs: check-links: + if: ${{ false }} #TMP name: Check for Broken Links runs-on: ubuntu-latest diff --git a/.github/workflows/daily-compatibility.yml b/.github/workflows/daily-compatibility.yml index bbf59e45160d..3ade002ede41 100644 --- a/.github/workflows/daily-compatibility.yml +++ b/.github/workflows/daily-compatibility.yml @@ -33,6 +33,7 @@ env: jobs: setup-versions: + if: ${{ false }} #TMP runs-on: ubuntu-latest outputs: versions: ${{ steps.set-versions.outputs.versions }} @@ -52,6 +53,7 @@ jobs: fi test-isaaclab-tasks-compat: + if: ${{ false }} #TMP needs: setup-versions runs-on: [self-hosted, gpu] timeout-minutes: 180 @@ -110,6 +112,7 @@ jobs: compression-level: 9 test-general-compat: + if: ${{ false }} #TMP needs: setup-versions runs-on: [self-hosted, gpu] timeout-minutes: 180 @@ -169,7 +172,7 @@ jobs: combine-compat-results: needs: [test-isaaclab-tasks-compat, test-general-compat] runs-on: [self-hosted, gpu] - if: always() + if: ${{ false }} #TMP steps: - name: Checkout Code @@ -218,7 +221,7 @@ jobs: notify-compatibility-status: needs: [setup-versions, combine-compat-results] runs-on: [self-hosted, gpu] - if: always() + if: ${{ false }} #TMP steps: - name: Checkout Code diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 9b27d63c1562..7e9fc8da034f 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -21,6 +21,7 @@ concurrency: jobs: check-secrets: + if: ${{ false }} #TMP name: Check secrets runs-on: ubuntu-latest outputs: @@ -33,6 +34,7 @@ jobs: run: echo "defined=true" >> "$GITHUB_OUTPUT" build-docs: + if: ${{ false }} #TMP name: Build Docs runs-on: ubuntu-latest needs: [check-secrets] @@ -69,10 +71,10 @@ jobs: path: ./docs/_build deploy-docs: + if: ${{ false }} #TMP name: Deploy Docs runs-on: ubuntu-latest needs: [check-secrets, build-docs] - if: needs.check-secrets.outputs.trigger-deploy == 'true' steps: - name: Download docs artifact diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index 593aec9a2cb0..609d0fb86fab 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -9,6 +9,7 @@ on: jobs: labeler: + if: ${{ false }} #TMP permissions: contents: read pull-requests: write diff --git a/.github/workflows/license-check.yaml b/.github/workflows/license-check.yaml index ca7ead9aa345..240029497372 100644 --- a/.github/workflows/license-check.yaml +++ b/.github/workflows/license-check.yaml @@ -15,6 +15,7 @@ concurrency: jobs: license-check: + if: ${{ false }} #TMP runs-on: ubuntu-24.04 steps: diff --git a/.github/workflows/postmerge-ci.yml b/.github/workflows/postmerge-ci.yml index 71800e0a582c..ef3e9adcfeb5 100644 --- a/.github/workflows/postmerge-ci.yml +++ b/.github/workflows/postmerge-ci.yml @@ -29,6 +29,7 @@ env: jobs: build-and-push-images: + if: ${{ false }} #TMP runs-on: [self-hosted, gpu] timeout-minutes: 180 environment: From 23124ee6dda8202b3fabe1f04754aa2d7dd79190 Mon Sep 17 00:00:00 2001 From: Mikhail Yurasov Date: Fri, 6 Mar 2026 21:47:26 -0800 Subject: [PATCH 09/13] Rename workflow to avoid GH bug that skips it --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3df70c2e79a5..8dec486daec1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -3,7 +3,7 @@ # # SPDX-License-Identifier: BSD-3-Clause -name: Build and Test +name: Build and Tests on: pull_request: From 186bea366ed1e9f139b6a6b42b6b693afbcb6553 Mon Sep 17 00:00:00 2001 From: Mikhail Yurasov Date: Fri, 6 Mar 2026 21:49:31 -0800 Subject: [PATCH 10/13] Add new CI workflow for Build and Test v2 with enhanced concurrency and caching --- .github/workflows/{build.yml => build2.yml} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename .github/workflows/{build.yml => build2.yml} (99%) diff --git a/.github/workflows/build.yml b/.github/workflows/build2.yml similarity index 99% rename from .github/workflows/build.yml rename to .github/workflows/build2.yml index 8dec486daec1..1894a00aa6db 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build2.yml @@ -3,7 +3,7 @@ # # SPDX-License-Identifier: BSD-3-Clause -name: Build and Tests +name: Build and Test v2 on: pull_request: From 9fe6c1edb7144734105f9d874534677eb10c2078 Mon Sep 17 00:00:00 2001 From: Mikhail Yurasov Date: Fri, 6 Mar 2026 22:16:23 -0800 Subject: [PATCH 11/13] Revert build job renaming --- .github/workflows/{build2.yml => build.yml} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename .github/workflows/{build2.yml => build.yml} (99%) diff --git a/.github/workflows/build2.yml b/.github/workflows/build.yml similarity index 99% rename from .github/workflows/build2.yml rename to .github/workflows/build.yml index 1894a00aa6db..3df70c2e79a5 100644 --- a/.github/workflows/build2.yml +++ b/.github/workflows/build.yml @@ -3,7 +3,7 @@ # # SPDX-License-Identifier: BSD-3-Clause -name: Build and Test v2 +name: Build and Test on: pull_request: From 9f322ba8f1bcb463869b9dd2e6f4cf0362f8ff2a Mon Sep 17 00:00:00 2001 From: Mikhail Yurasov Date: Fri, 6 Mar 2026 22:16:26 -0800 Subject: [PATCH 12/13] Revert "Temporarily disable all CI jobs by adding conditional checks" This reverts commit 2eec28ac58f8ef46e1fa6e6a6dbca7f86e2de799. --- .github/workflows/check-links.yml | 1 - .github/workflows/daily-compatibility.yml | 7 ++----- .github/workflows/docs.yaml | 4 +--- .github/workflows/labeler.yml | 1 - .github/workflows/license-check.yaml | 1 - .github/workflows/postmerge-ci.yml | 1 - 6 files changed, 3 insertions(+), 12 deletions(-) diff --git a/.github/workflows/check-links.yml b/.github/workflows/check-links.yml index 446f303b21f0..ac142552172d 100644 --- a/.github/workflows/check-links.yml +++ b/.github/workflows/check-links.yml @@ -35,7 +35,6 @@ concurrency: jobs: check-links: - if: ${{ false }} #TMP name: Check for Broken Links runs-on: ubuntu-latest diff --git a/.github/workflows/daily-compatibility.yml b/.github/workflows/daily-compatibility.yml index 3ade002ede41..bbf59e45160d 100644 --- a/.github/workflows/daily-compatibility.yml +++ b/.github/workflows/daily-compatibility.yml @@ -33,7 +33,6 @@ env: jobs: setup-versions: - if: ${{ false }} #TMP runs-on: ubuntu-latest outputs: versions: ${{ steps.set-versions.outputs.versions }} @@ -53,7 +52,6 @@ jobs: fi test-isaaclab-tasks-compat: - if: ${{ false }} #TMP needs: setup-versions runs-on: [self-hosted, gpu] timeout-minutes: 180 @@ -112,7 +110,6 @@ jobs: compression-level: 9 test-general-compat: - if: ${{ false }} #TMP needs: setup-versions runs-on: [self-hosted, gpu] timeout-minutes: 180 @@ -172,7 +169,7 @@ jobs: combine-compat-results: needs: [test-isaaclab-tasks-compat, test-general-compat] runs-on: [self-hosted, gpu] - if: ${{ false }} #TMP + if: always() steps: - name: Checkout Code @@ -221,7 +218,7 @@ jobs: notify-compatibility-status: needs: [setup-versions, combine-compat-results] runs-on: [self-hosted, gpu] - if: ${{ false }} #TMP + if: always() steps: - name: Checkout Code diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 7e9fc8da034f..9b27d63c1562 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -21,7 +21,6 @@ concurrency: jobs: check-secrets: - if: ${{ false }} #TMP name: Check secrets runs-on: ubuntu-latest outputs: @@ -34,7 +33,6 @@ jobs: run: echo "defined=true" >> "$GITHUB_OUTPUT" build-docs: - if: ${{ false }} #TMP name: Build Docs runs-on: ubuntu-latest needs: [check-secrets] @@ -71,10 +69,10 @@ jobs: path: ./docs/_build deploy-docs: - if: ${{ false }} #TMP name: Deploy Docs runs-on: ubuntu-latest needs: [check-secrets, build-docs] + if: needs.check-secrets.outputs.trigger-deploy == 'true' steps: - name: Download docs artifact diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index 609d0fb86fab..593aec9a2cb0 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -9,7 +9,6 @@ on: jobs: labeler: - if: ${{ false }} #TMP permissions: contents: read pull-requests: write diff --git a/.github/workflows/license-check.yaml b/.github/workflows/license-check.yaml index 240029497372..ca7ead9aa345 100644 --- a/.github/workflows/license-check.yaml +++ b/.github/workflows/license-check.yaml @@ -15,7 +15,6 @@ concurrency: jobs: license-check: - if: ${{ false }} #TMP runs-on: ubuntu-24.04 steps: diff --git a/.github/workflows/postmerge-ci.yml b/.github/workflows/postmerge-ci.yml index ef3e9adcfeb5..71800e0a582c 100644 --- a/.github/workflows/postmerge-ci.yml +++ b/.github/workflows/postmerge-ci.yml @@ -29,7 +29,6 @@ env: jobs: build-and-push-images: - if: ${{ false }} #TMP runs-on: [self-hosted, gpu] timeout-minutes: 180 environment: From a12bd11142b7c6f3544708b7956c467a90042db7 Mon Sep 17 00:00:00 2001 From: Mikhail Yurasov Date: Fri, 6 Mar 2026 22:17:43 -0800 Subject: [PATCH 13/13] Use 'gpu' tags --- .github/workflows/build.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3df70c2e79a5..8258c03e1445 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -39,7 +39,7 @@ jobs: # first run and build incrementally on subsequent runs. build-base-image: - runs-on: [self-hosted, int-gpu] + runs-on: [self-hosted, gpu] timeout-minutes: 60 outputs: ecr-image-tag: ${{ steps.tags.outputs.ecr_image_tag }} @@ -85,7 +85,7 @@ jobs: ecr-write-cache-tag: ${{ steps.tags.outputs.ecr_cache_tag }} build-curobo-image: - runs-on: [self-hosted, int-gpu] + runs-on: [self-hosted, gpu] timeout-minutes: 90 outputs: ecr-image-tag: ${{ steps.tags.outputs.ecr_image_tag }} @@ -133,7 +133,7 @@ jobs: test-isaaclab-tasks: needs: [build-base-image] - runs-on: [self-hosted, int-gpu] + runs-on: [self-hosted, gpu] timeout-minutes: 180 continue-on-error: true @@ -198,7 +198,7 @@ jobs: test-isaaclab-tasks-2: needs: [build-base-image] - runs-on: [self-hosted, int-gpu] + runs-on: [self-hosted, gpu] timeout-minutes: 180 continue-on-error: true @@ -263,7 +263,7 @@ jobs: test-general: needs: [build-base-image] - runs-on: [self-hosted, int-gpu] + runs-on: [self-hosted, gpu] timeout-minutes: 180 steps: @@ -327,7 +327,7 @@ jobs: test-curobo: needs: [build-curobo-image] - runs-on: [self-hosted, int-gpu] + runs-on: [self-hosted, gpu] timeout-minutes: 120 continue-on-error: true