NVIDIA · cjluo-nv · Apr 2, 2026 · Mar 23, 2026 · Mar 31, 2026 · Mar 31, 2026
diff --git a/tools/launcher/common/hf/ptq.sh b/tools/launcher/common/hf/ptq.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# HuggingFace PTQ wrapper: downloads the model if needed, then runs huggingface_example.sh.
+#
+# Usage:
+#   ptq.sh --repo <org/model> --local-dir <path> -- [huggingface_example.sh args...]
+#
+# Everything before "--" is handled by this wrapper (download logic).
+# Everything after "--" is passed directly to huggingface_example.sh.
+# The --model arg is automatically set to <local-dir> for huggingface_example.sh.
+
+set -e
+
+REPO=""
+LOCAL_DIR=""
+PTQ_ARGS=()
+
+# Parse wrapper args up to "--", collect the rest for huggingface_example.sh
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --repo) REPO="$2"; shift 2 ;;
+        --local-dir) LOCAL_DIR="$2"; shift 2 ;;
+        --) shift; PTQ_ARGS=("$@"); break ;;
+        *) echo "Unknown argument: $1 (use -- to separate PTQ args)" >&2; exit 1 ;;
+    esac
+done
+
+if [ -z "$REPO" ] || [ -z "$LOCAL_DIR" ]; then
+    echo "Usage: ptq.sh --repo <org/model> --local-dir <path> -- [huggingface_example.sh args...]" >&2
+    exit 1
+fi
+
+# --- Step 1: Download model if not already present ---
+if [ -f "$LOCAL_DIR/config.json" ]; then
+    echo "Model already exists at $LOCAL_DIR, skipping download."
+else
+    echo "Downloading $REPO to $LOCAL_DIR ..."
+    pip install -q huggingface_hub 2>/dev/null || true
+    huggingface-cli download "$REPO" --local-dir "$LOCAL_DIR"
+    echo "Download complete: $LOCAL_DIR"
+fi
+
+# --- Step 2: Run huggingface_example.sh ---
+script_dir="$(dirname "$(readlink -f "$0")")"
+HF_EXAMPLE="${script_dir}/../../modules/Model-Optimizer/examples/llm_ptq/scripts/huggingface_example.sh"
+
+echo "Running huggingface_example.sh --model $LOCAL_DIR --trust_remote_code ${PTQ_ARGS[*]}"
+exec bash "$HF_EXAMPLE" --model "$LOCAL_DIR" --trust_remote_code "${PTQ_ARGS[@]}"
diff --git a/tools/launcher/common/hf_ptq/hf_ptq.sh b/tools/launcher/common/hf_ptq/hf_ptq.sh
diff --git a/tools/launcher/core.py b/tools/launcher/core.py
@@ -268,7 +268,7 @@ def build_slurm_executor(
         container_image=slurm_config.container,
         container_mounts=container_mounts,
         array=slurm_config.array,
-        time="04:00:00",
+        time=slurm_config.time,
         mem="0",
         retries=0,
         packager=packager,

diff --git a/tools/launcher/examples/Qwen/Qwen3-8B/hf_ptq.yaml b/tools/launcher/examples/Qwen/Qwen3-8B/hf_ptq.yaml
@@ -0,0 +1,51 @@
+# HuggingFace PTQ via huggingface_example.sh
+#
+# Quantizes a HuggingFace model using examples/llm_ptq/scripts/huggingface_example.sh.
+# Default: Qwen/Qwen3.5-9B with nvfp4_mlp_only on 8xH200.
-# Default: Qwen/Qwen3.5-9B with nvfp4_mlp_only on 8xH200.
+# Default: Qwen/Qwen3-8B with nvfp4 on 1 GPU.
-# Default: Qwen/Qwen3.5-9B with nvfp4_mlp_only on 8xH200.
+# Default: Qwen/Qwen3-8B with nvfp4 on 1 GPU.
+#
+# Usage (Slurm):
+#   export SLURM_HOST=<slurm-host>
+#   export SLURM_ACCOUNT=<your-team>
+#   export SLURM_PARTITION=<your-partition>  # default: batch
+#   export SLURM_JOB_DIR=/home/scratch.<user>/experiments
+#   export SLURM_HF_LOCAL=/home/scratch.<user>/hf-local
+#   export HF_TOKEN=<your-hf-token>  # for gated models; auto-injected into all tasks
+#   cd tools/launcher
+#   uv run launch.py --yaml examples/llm_ptq/hf_ptq.yaml --yes
+#
+# Usage (local Docker):
+#   cd tools/launcher
+#   uv run launch.py --yaml examples/llm_ptq/hf_ptq.yaml hf_local=/mnt/hf-local --yes
+#
+# Override model/quant via CLI:
+#   uv run launch.py --yaml examples/llm_ptq/hf_ptq.yaml \
+#       pipeline.global_vars.hf_model=Qwen/Qwen3-8B \
+#       pipeline.task_0.args='[--model,<<global_vars.hf_local>>Qwen/Qwen3-8B,--quant,nvfp4]' \
+#       --yes
+
+job_name: hf_ptq_nvfp4
+pipeline:
+  skip: false
+  allow_to_fail: false
+  note: "HF PTQ with nvfp4"
+
+  global_vars:
+    hf_local: /hf-local/
+    hf_model: Qwen/Qwen3-8B
+
+  # Downloads model if needed, then runs huggingface_example.sh
+  task_0:
+    script: common/hf/ptq.sh
+    args:
+      - --repo <<global_vars.hf_model>>
+      - --local-dir <<global_vars.hf_local>><<global_vars.hf_model>>
+      - --
+      - --quant nvfp4
+      - --tasks quant
+    slurm_config:
+      _factory_: "slurm_factory"
+      nodes: 1
+      ntasks_per_node: 1
+      gpus_per_node: 1
+      time: "04:00:00"
+      container: nvcr.io/nvidia/tensorrt-llm/release:1.3.0rc7
diff --git a/tools/launcher/examples/Qwen/Qwen3-8B/hf_ptq_local.yaml b/tools/launcher/examples/Qwen/Qwen3-8B/hf_ptq_local.yaml
diff --git a/tools/launcher/launch.py b/tools/launcher/launch.py
@@ -30,6 +30,7 @@
 
 import getpass
 import os
+import subprocess  # nosec B404
 import warnings
 
 import nemo_run as run
@@ -61,10 +62,11 @@
         "modules/Megatron-LM/examples/*",
         "modules/Megatron-LM/*.py",
         "modules/Model-Optimizer/modelopt/*",
+        "modules/Model-Optimizer/modelopt_recipes/*",
         "modules/Model-Optimizer/examples/*",
         "common/*",
     ],
-    relative_path=[LAUNCHER_DIR] * 6,
+    relative_path=[LAUNCHER_DIR] * 7,
 )
 
 MODELOPT_SRC_PATH = os.path.join(LAUNCHER_DIR, "modules/Model-Optimizer/modelopt")
@@ -84,8 +86,14 @@ def launch(
     user: str = getpass.getuser(),
     identity: str = None,  # noqa: RUF013
     detach: bool = False,
+    clean: bool = False,
 ) -> None:
     """Launch ModelOpt jobs on Slurm or locally with Docker."""
+    if clean:
+        examples_dir = os.path.join(_mo_symlink, "examples")
+        print(f"Cleaning {examples_dir} with git clean -xdf ...")
+        subprocess.run(["git", "clean", "-xdf", "."], cwd=examples_dir, check=True)  # nosec B603 B607
+
     if "NEMORUN_HOME" not in os.environ:
         warnings.warn("NEMORUN_HOME is not set. Defaulting to current working directory.")
     run.config.set_nemorun_home(os.environ.get("NEMORUN_HOME", os.getcwd()))

diff --git a/tools/launcher/slurm_config.py b/tools/launcher/slurm_config.py
@@ -41,6 +41,7 @@ class SlurmConfig:
     nodes: int = 1
     ntasks_per_node: int = 1
     gpus_per_node: int = 1
+    time: str = "04:00:00"
     local: bool = False
 
 
@@ -49,7 +50,7 @@ class SlurmConfig:
 def slurm_factory(
     host: str = os.environ.get("SLURM_HOST", ""),
     account: str = os.environ.get("SLURM_ACCOUNT", ""),
-    partition: str = "batch",
+    partition: str = os.environ.get("SLURM_PARTITION", "batch"),
     nodes: int = 1,
     ntasks_per_node: int = 1,
     gpus_per_node: int = 1,
@@ -60,6 +61,7 @@ def slurm_factory(
     ],
     srun_args: list[str] = ["--no-container-mount-home"],
     array: str = None,  # noqa: RUF013
+    time: str = "04:00:00",
 ) -> SlurmConfig:
     """Generic Slurm factory — configure via environment variables or CLI overrides."""
     return SlurmConfig(
@@ -74,4 +76,5 @@ def slurm_factory(
         container_mounts=container_mounts,
         srun_args=srun_args,
         array=array,
+        time=time,
     )
diff --git a/tools/launcher/tests/test_slurm_executor.py b/tools/launcher/tests/test_slurm_executor.py
@@ -168,6 +168,7 @@ def test_executor_params(self, mock_tunnel, mock_executor):
             ntasks_per_node=8,
             gpus_per_node=8,
             array="0-3",
+            time="04:00:00",
         )
 
         packager = MagicMock()