diff --git a/.github/actions/docker-build/action.yml b/.github/actions/docker-build/action.yml index 4595d47fd033..8f7086898ef7 100644 --- a/.github/actions/docker-build/action.yml +++ b/.github/actions/docker-build/action.yml @@ -24,10 +24,25 @@ inputs: description: 'Build context path' default: '.' required: false + push: + description: 'Push the built image to the registry instead of loading it locally' + default: 'false' + required: false + ecr-cache-tags: + description: 'Space-separated ECR image refs to use as registry layer-cache sources (--cache-from type=registry,ref=...). Falls back to GitHub Actions cache when empty.' + default: '' + required: false + ecr-write-cache-tag: + description: 'ECR image ref to write the build cache to (--cache-to type=registry,ref=...,mode=max). Falls back to GitHub Actions cache when empty.' + default: '' + required: false runs: using: composite steps: + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: NGC Login shell: sh run: | @@ -44,35 +59,55 @@ runs: - name: Build Docker Image shell: sh run: | - # Function to build Docker image - build_docker_image() { - local image_tag="$1" - local isaacsim_base_image="$2" - local isaacsim_version="$3" - local dockerfile_path="$4" - local context_path="$5" + IMAGE_TAG="${{ inputs.image-tag }}" + PUSH="${{ inputs.push }}" + ECR_CACHE_TAGS="${{ inputs.ecr-cache-tags }}" + ECR_WRITE_CACHE_TAG="${{ inputs.ecr-write-cache-tag }}" - echo "Building Docker image: $image_tag" - echo "Using Dockerfile: $dockerfile_path" - echo "Build context: $context_path" + echo "Building Docker image: $IMAGE_TAG" + echo "Using Dockerfile: ${{ inputs.dockerfile-path }}" + echo "Build context: ${{ inputs.context-path }}" - # Build Docker image - docker buildx build --progress=plain --platform linux/amd64 \ - -t isaac-lab-dev \ - -t $image_tag \ - --build-arg ISAACSIM_BASE_IMAGE_ARG="$isaacsim_base_image" \ - --build-arg ISAACSIM_VERSION_ARG="$isaacsim_version" \ - --build-arg ISAACSIM_ROOT_PATH_ARG=/isaac-sim \ - --build-arg ISAACLAB_PATH_ARG=/workspace/isaaclab \ - --build-arg DOCKER_USER_HOME_ARG=/root \ - --cache-from type=gha \ - --cache-to type=gha,mode=max \ - -f $dockerfile_path \ - --load $context_path + # Assemble --cache-from flags: prefer ECR registry cache, fall back to GHA cache + if [ -n "$ECR_CACHE_TAGS" ]; then + CACHE_FROM_ARGS="" + for tag in $ECR_CACHE_TAGS; do + CACHE_FROM_ARGS="$CACHE_FROM_ARGS --cache-from type=registry,ref=$tag" + done + else + CACHE_FROM_ARGS="--cache-from type=gha" + fi - echo "✅ Docker image built successfully: $image_tag" - docker images | grep isaac-lab-dev - } + # Assemble --cache-to flag: prefer ECR registry cache, fall back to GHA cache + if [ -n "$ECR_WRITE_CACHE_TAG" ]; then + CACHE_TO_ARG="--cache-to type=registry,ref=$ECR_WRITE_CACHE_TAG,mode=max" + else + CACHE_TO_ARG="--cache-to type=gha,mode=max" + fi + + # Assemble output mode: push to registry or load into local daemon + if [ "$PUSH" = "true" ]; then + OUTPUT_ARG="--push" + TAG_ARGS="-t $IMAGE_TAG" + else + OUTPUT_ARG="--load" + TAG_ARGS="-t isaac-lab-dev -t $IMAGE_TAG" + fi + + # shellcheck disable=SC2086 + docker buildx build --progress=plain --platform linux/amd64 \ + $TAG_ARGS \ + --build-arg ISAACSIM_BASE_IMAGE_ARG="${{ inputs.isaacsim-base-image }}" \ + --build-arg ISAACSIM_VERSION_ARG="${{ inputs.isaacsim-version }}" \ + --build-arg ISAACSIM_ROOT_PATH_ARG=/isaac-sim \ + --build-arg ISAACLAB_PATH_ARG=/workspace/isaaclab \ + --build-arg DOCKER_USER_HOME_ARG=/root \ + $CACHE_FROM_ARGS \ + $CACHE_TO_ARG \ + -f "${{ inputs.dockerfile-path }}" \ + $OUTPUT_ARG "${{ inputs.context-path }}" - # Call the function with provided parameters - build_docker_image "${{ inputs.image-tag }}" "${{ inputs.isaacsim-base-image }}" "${{ inputs.isaacsim-version }}" "${{ inputs.dockerfile-path }}" "${{ inputs.context-path }}" + echo "✅ Docker image built successfully: $IMAGE_TAG" + if [ "$PUSH" != "true" ]; then + docker images | grep "isaac-lab-dev" || true + fi diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 28df43417118..8258c03e1445 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,13 +28,21 @@ env: NGC_API_KEY: ${{ secrets.NGC_API_KEY }} ISAACSIM_BASE_IMAGE: nvcr.io/nvidian/isaac-sim #${{ vars.ISAACSIM_BASE_IMAGE || 'nvcr.io/nvidia/isaac-sim' }} ISAACSIM_BASE_VERSION: 'latest-develop' #${{ vars.ISAACSIM_BASE_VERSION || '5.1.0' }} - DOCKER_IMAGE_TAG: isaac-lab-dev:${{ github.event_name == 'pull_request' && format('pr-{0}', github.event.pull_request.number) || github.ref_name }}-${{ github.sha }} + ECR_CACHE_URL: 968945269301.dkr.ecr.us-west-2.amazonaws.com/gitci-docker-cache jobs: - test-isaaclab-tasks: + # ── Build Phase ────────────────────────────────────────────────────────────── + # Each Dockerfile is built once per pipeline and pushed to ECR with the commit + # SHA as the image tag. All test jobs pull from ECR rather than rebuilding, + # eliminating redundant multi-gigabyte builds. Layer caching is stored in ECR + # with per-branch keys so that feature branches inherit the develop cache on + # first run and build incrementally on subsequent runs. + + build-base-image: runs-on: [self-hosted, gpu] - timeout-minutes: 180 - continue-on-error: true + timeout-minutes: 60 + outputs: + ecr-image-tag: ${{ steps.tags.outputs.ecr_image_tag }} steps: - name: Checkout Code @@ -43,12 +51,108 @@ jobs: fetch-depth: 0 lfs: true - - name: Build Docker Image + - name: Login to ECR + run: | + mkdir -p ~/.docker + echo '{}' > ~/.docker/config.json + aws ecr get-login-password --region us-west-2 \ + | docker login --username AWS --password-stdin ${{ env.ECR_CACHE_URL }} + + # Caching: Each branch maintains its own warm cache layer in AWS ECR, + # falling back to "develop"'s cache if no branch cache exists yet. + - name: Compute ECR image and cache tags + id: tags + run: | + BRANCH=$(echo "${{ github.ref_name }}" | tr '/' '-') + SHA="${{ github.sha }}" + if [ "${{ github.event_name }}" = "pull_request" ]; then + ECR_IMAGE_TAG="${ECR_CACHE_URL}:pr-${{ github.event.pull_request.number }}-${SHA}" + else + ECR_IMAGE_TAG="${ECR_CACHE_URL}:${BRANCH}-${SHA}" + fi + echo "ecr_image_tag=${ECR_IMAGE_TAG}" >> $GITHUB_OUTPUT + echo "ecr_cache_tag=${ECR_CACHE_URL}:cache-base-${BRANCH}" >> $GITHUB_OUTPUT + echo "ecr_cache_fallback=${ECR_CACHE_URL}:cache-base-develop" >> $GITHUB_OUTPUT + + - name: Build and Push Base Docker Image uses: ./.github/actions/docker-build with: - image-tag: ${{ env.DOCKER_IMAGE_TAG }} + image-tag: ${{ steps.tags.outputs.ecr_image_tag }} isaacsim-base-image: ${{ env.ISAACSIM_BASE_IMAGE }} isaacsim-version: ${{ env.ISAACSIM_BASE_VERSION }} + push: 'true' + ecr-cache-tags: "${{ steps.tags.outputs.ecr_cache_tag }} ${{ steps.tags.outputs.ecr_cache_fallback }}" + ecr-write-cache-tag: ${{ steps.tags.outputs.ecr_cache_tag }} + + build-curobo-image: + runs-on: [self-hosted, gpu] + timeout-minutes: 90 + outputs: + ecr-image-tag: ${{ steps.tags.outputs.ecr_image_tag }} + + steps: + - name: Checkout Code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + lfs: true + + - name: Login to ECR + run: | + mkdir -p ~/.docker + echo '{}' > ~/.docker/config.json + aws ecr get-login-password --region us-west-2 \ + | docker login --username AWS --password-stdin ${{ env.ECR_CACHE_URL }} + + - name: Compute ECR image and cache tags + id: tags + run: | + BRANCH=$(echo "${{ github.ref_name }}" | tr '/' '-') + SHA="${{ github.sha }}" + if [ "${{ github.event_name }}" = "pull_request" ]; then + ECR_IMAGE_TAG="${ECR_CACHE_URL}:pr-${{ github.event.pull_request.number }}-${SHA}-curobo" + else + ECR_IMAGE_TAG="${ECR_CACHE_URL}:${BRANCH}-${SHA}-curobo" + fi + echo "ecr_image_tag=${ECR_IMAGE_TAG}" >> $GITHUB_OUTPUT + echo "ecr_cache_tag=${ECR_CACHE_URL}:cache-curobo-${BRANCH}" >> $GITHUB_OUTPUT + echo "ecr_cache_fallback=${ECR_CACHE_URL}:cache-curobo-develop" >> $GITHUB_OUTPUT + + - name: Build and Push cuRobo Docker Image + uses: ./.github/actions/docker-build + with: + image-tag: ${{ steps.tags.outputs.ecr_image_tag }} + isaacsim-base-image: ${{ env.ISAACSIM_BASE_IMAGE }} + isaacsim-version: ${{ env.ISAACSIM_BASE_VERSION }} + dockerfile-path: 'docker/Dockerfile.curobo' + push: 'true' + ecr-cache-tags: "${{ steps.tags.outputs.ecr_cache_tag }} ${{ steps.tags.outputs.ecr_cache_fallback }}" + ecr-write-cache-tag: ${{ steps.tags.outputs.ecr_cache_tag }} + + # ── Test Phase ─────────────────────────────────────────────────────────────── + + test-isaaclab-tasks: + needs: [build-base-image] + runs-on: [self-hosted, gpu] + timeout-minutes: 180 + continue-on-error: true + + steps: + - name: Checkout Code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + lfs: true + + - name: Login to ECR + run: | + mkdir -p ~/.docker + echo '{}' > ~/.docker/config.json + aws ecr get-login-password --region us-west-2 \ + | docker login --username AWS --password-stdin ${{ env.ECR_CACHE_URL }} + + - name: Pull Base Docker Image from ECR + run: docker pull ${{ needs.build-base-image.outputs.ecr-image-tag }} - name: Run IsaacLab Tasks Tests uses: ./.github/actions/run-tests @@ -56,7 +160,7 @@ jobs: test-path: "tools" result-file: "isaaclab-tasks-report.xml" container-name: "isaac-lab-tasks-test-$$" - image-tag: ${{ env.DOCKER_IMAGE_TAG }} + image-tag: ${{ needs.build-base-image.outputs.ecr-image-tag }} pytest-options: "" filter-pattern: "isaaclab_tasks" include-files: "test_multi_agent_environments.py,test_pickplace_stack_environments.py,test_environments.py,test_factory_environments.py,test_environments_training.py,test_cartpole_showcase_environments.py,test_teleop_environments.py" @@ -93,6 +197,7 @@ jobs: fi test-isaaclab-tasks-2: + needs: [build-base-image] runs-on: [self-hosted, gpu] timeout-minutes: 180 continue-on-error: true @@ -104,12 +209,15 @@ jobs: fetch-depth: 0 lfs: true - - name: Build Docker Image - uses: ./.github/actions/docker-build - with: - image-tag: ${{ env.DOCKER_IMAGE_TAG }} - isaacsim-base-image: ${{ env.ISAACSIM_BASE_IMAGE }} - isaacsim-version: ${{ env.ISAACSIM_BASE_VERSION }} + - name: Login to ECR + run: | + mkdir -p ~/.docker + echo '{}' > ~/.docker/config.json + aws ecr get-login-password --region us-west-2 \ + | docker login --username AWS --password-stdin ${{ env.ECR_CACHE_URL }} + + - name: Pull Base Docker Image from ECR + run: docker pull ${{ needs.build-base-image.outputs.ecr-image-tag }} - name: Run IsaacLab Tasks Tests 2 uses: ./.github/actions/run-tests @@ -117,7 +225,7 @@ jobs: test-path: "tools" result-file: "isaaclab-tasks-2-report.xml" container-name: "isaac-lab-tasks-2-test-$$" - image-tag: ${{ env.DOCKER_IMAGE_TAG }} + image-tag: ${{ needs.build-base-image.outputs.ecr-image-tag }} pytest-options: "" filter-pattern: "isaaclab_tasks" include-files: "test_teleop_environments_with_stage_in_memory.py,test_lift_teddy_bear.py,test_environment_determinism.py,test_hydra.py,test_env_cfg_no_forbidden_imports.py,test_rl_device_separation.py,test_cartpole_showcase_environments_with_stage_in_memory.py,test_environments_with_stage_in_memory.py" @@ -154,6 +262,7 @@ jobs: fi test-general: + needs: [build-base-image] runs-on: [self-hosted, gpu] timeout-minutes: 180 @@ -164,12 +273,15 @@ jobs: fetch-depth: 0 lfs: true - - name: Build Docker Image - uses: ./.github/actions/docker-build - with: - image-tag: ${{ env.DOCKER_IMAGE_TAG }} - isaacsim-base-image: ${{ env.ISAACSIM_BASE_IMAGE }} - isaacsim-version: ${{ env.ISAACSIM_BASE_VERSION }} + - name: Login to ECR + run: | + mkdir -p ~/.docker + echo '{}' > ~/.docker/config.json + aws ecr get-login-password --region us-west-2 \ + | docker login --username AWS --password-stdin ${{ env.ECR_CACHE_URL }} + + - name: Pull Base Docker Image from ECR + run: docker pull ${{ needs.build-base-image.outputs.ecr-image-tag }} - name: Run General Tests id: run-general-tests @@ -178,7 +290,7 @@ jobs: test-path: "tools" result-file: "general-tests-report.xml" container-name: "isaac-lab-general-test-$$" - image-tag: ${{ env.DOCKER_IMAGE_TAG }} + image-tag: ${{ needs.build-base-image.outputs.ecr-image-tag }} pytest-options: "" filter-pattern: "not isaaclab_tasks" @@ -214,6 +326,7 @@ jobs: fi test-curobo: + needs: [build-curobo-image] runs-on: [self-hosted, gpu] timeout-minutes: 120 continue-on-error: true @@ -225,13 +338,15 @@ jobs: fetch-depth: 0 lfs: true - - name: Build Docker Image (cuRobo) - uses: ./.github/actions/docker-build - with: - image-tag: ${{ env.DOCKER_IMAGE_TAG }}-curobo - isaacsim-base-image: ${{ env.ISAACSIM_BASE_IMAGE }} - isaacsim-version: ${{ env.ISAACSIM_BASE_VERSION }} - dockerfile-path: 'docker/Dockerfile.curobo' + - name: Login to ECR + run: | + mkdir -p ~/.docker + echo '{}' > ~/.docker/config.json + aws ecr get-login-password --region us-west-2 \ + | docker login --username AWS --password-stdin ${{ env.ECR_CACHE_URL }} + + - name: Pull cuRobo Docker Image from ECR + run: docker pull ${{ needs.build-curobo-image.outputs.ecr-image-tag }} - name: Run cuRobo and SkillGen Tests uses: ./.github/actions/run-tests @@ -239,7 +354,7 @@ jobs: test-path: "tools" result-file: "curobo-tests-report.xml" container-name: "isaac-lab-curobo-test-$$" - image-tag: ${{ env.DOCKER_IMAGE_TAG }}-curobo + image-tag: ${{ needs.build-curobo-image.outputs.ecr-image-tag }} pytest-options: "" filter-pattern: "" curobo-only: "true" @@ -277,7 +392,7 @@ jobs: combine-results: needs: [test-isaaclab-tasks, test-isaaclab-tasks-2, test-general, test-curobo] - runs-on: [self-hosted, gpu] + runs-on: [self-hosted] if: always() steps: diff --git a/.github/workflows/postmerge-ci.yml b/.github/workflows/postmerge-ci.yml index e19613cd9dd3..71800e0a582c 100644 --- a/.github/workflows/postmerge-ci.yml +++ b/.github/workflows/postmerge-ci.yml @@ -93,6 +93,10 @@ jobs: echo "Repository name: $REPO_SHORT_NAME" echo "IsaacSim versions: ${{ env.ISAACSIM_BASE_VERSIONS_STRING }}" + # ECR layer-cache tags: write to branch-specific key, read branch + develop as fallback + ECR_CACHE_WRITE="${ECR_CACHE_URL}:cache-base-${SAFE_BRANCH_NAME}" + ECR_CACHE_READ_FALLBACK="${ECR_CACHE_URL}:cache-base-develop" + # Parse the env variable string into an array IMAGE_BASE_VERSIONS_STRING="${{ env.ISAACSIM_BASE_VERSIONS_STRING }}" # Use set to split the string into positional parameters, then convert to array @@ -149,8 +153,11 @@ jobs: echo "IsaacSim version: $IMAGE_BASE_VERSION" echo "Base image: $BASE_IMAGE_FULL" echo "Target platforms: $BUILD_PLATFORMS" + echo "ECR layer cache: read=${ECR_CACHE_WRITE},${ECR_CACHE_READ_FALLBACK} write=${ECR_CACHE_WRITE}" - # Build Docker image once with both tags for multiple architectures + # Build Docker image once with both tags for multiple architectures. + # Layer cache is read from / written to ECR so subsequent runs and + # parallel PR builds on the same branch skip already-built layers. docker buildx build \ --platform $BUILD_PLATFORMS \ --progress=plain \ @@ -161,8 +168,9 @@ jobs: --build-arg ISAACSIM_ROOT_PATH_ARG=/isaac-sim \ --build-arg ISAACLAB_PATH_ARG=/workspace/isaaclab \ --build-arg DOCKER_USER_HOME_ARG=/root \ - --cache-from type=gha \ - --cache-to type=gha,mode=max \ + --cache-from type=registry,ref=${ECR_CACHE_WRITE} \ + --cache-from type=registry,ref=${ECR_CACHE_READ_FALLBACK} \ + --cache-to type=registry,ref=${ECR_CACHE_WRITE},mode=max \ -f docker/Dockerfile.base \ --push .