diff --git a/docs/source/_static/tasks/manipulation/g1_assemble_trocar.jpg b/docs/source/_static/tasks/manipulation/g1_assemble_trocar.jpg new file mode 100644 index 00000000000..ad62167a293 Binary files /dev/null and b/docs/source/_static/tasks/manipulation/g1_assemble_trocar.jpg differ diff --git a/docs/source/experimental-features/bleeding-edge.rst b/docs/source/experimental-features/bleeding-edge.rst index 5927ba1ae8d..860c08611d1 100644 --- a/docs/source/experimental-features/bleeding-edge.rst +++ b/docs/source/experimental-features/bleeding-edge.rst @@ -9,3 +9,144 @@ Directly integrating such features before they are complete and without feedback To address this, some major features will be released as Experimental Feature Branches. This way, the community can experiment with and contribute to the feature before it's fully integrated, reducing the likelihood of being derailed by unexpected and new errors. + +RL Post-Training for VLA Models +--------------------------------- + +`RLinf `_ is a flexible and scalable open-source RL infrastructure designed for +Embodied and Agentic AI. This integration enables **reinforcement learning fine-tuning of Vision-Language-Action +(VLA) models** (e.g., GR00T, OpenVLA) on Isaac Lab simulation tasks. + +The typical workflow follows three stages: + +1. **Data collection** — Collect demonstration data from the Isaac Lab environment (e.g., via teleoperation or scripted policy). +2. **Base model training** — Train a VLA base model (e.g., GR00T) on the collected demonstrations using supervised learning. +3. **RL fine-tuning** — Fine-tune the pretrained VLA model on the Isaac Lab task using RLinf with PPO / Actor-Critic / SAC. + +Overview +~~~~~~~~ + +The RLinf integration allows Isaac Lab users to: + +- Fine-tune pretrained VLA models on Isaac Lab tasks using PPO / Actor-Critic / SAC +- Leverage RLinf's FSDP-based distributed training across multiple GPUs/nodes +- Define observation/action mappings from Isaac Lab to GR00T format via a single YAML config +- Register Isaac Lab tasks into RLinf without modifying RLinf source code + +Architecture +~~~~~~~~~~~~ + +.. code-block:: text + + ┌────────────────────────────────────────────────────────────────┐ + │ RLinf Runner │ + │ (EmbodiedRunner / EvalRunner) │ + ├────────────────┬──────────────────────┬────────────────────────┤ + │ Actor Worker │ Rollout Worker │ Env Worker │ + │ (FSDP) │ (HF Inference) │ (IsaacLab Sim) │ + │ │ │ │ + │ Policy │ Multi-step rollout │ IsaacLabGenericEnv │ + │ Update │ with VLA model │ ├─ _make_env_function │ + │ │ │ ├─ _wrap_obs │ + │ │ │ └─ _wrap_action │ + └────────────────┴──────────────────────┴────────────────────────┘ + +**Data flow:** + +1. ``EnvWorker`` runs Isaac Lab simulation and converts observations to RLinf format +2. ``RolloutWorker`` runs VLA model inference (e.g., GR00T) to produce actions +3. Actions are converted back to Isaac Lab format and stepped in the environment +4. ``ActorWorker`` updates the VLA model with PPO/actor-critic loss via FSDP + +Prerequisites +~~~~~~~~~~~~~ + +- **Isaac Lab** installed and configured +- **Isaac-GR00T** repo (for VLA inference and data transforms) +- A **pretrained VLA checkpoint** in HuggingFace format +- Multi-GPU setup recommended (FSDP requires at least 1 GPU) + +Installation +~~~~~~~~~~~~ + +From the Isaac Lab root directory: + +.. code-block:: bash + + # Install isaaclab_contrib with the RLinf extra + pip install -e "source/isaaclab_contrib[rlinf]" --ignore-requires-python + + # Install Isaac-GR00T (pinned version) + git clone https://github.com/NVIDIA/Isaac-GR00T.git + cd Isaac-GR00T + git checkout 4af2b622892f7dcb5aae5a3fb70bcb02dc217b96 + pip install -e .[base] --no-deps + cd ../ + +Quick Start +~~~~~~~~~~~ + +**Training** — RL fine-tuning of a pretrained VLA model: + +.. code-block:: bash + + python scripts/reinforcement_learning/rlinf/train.py \ + --task Isaac-Assemble-Trocar-G129-Dex3-v0 \ + --config_path source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config \ + --config_name isaaclab_ppo_gr00t_assemble_trocar + +**Evaluation** — Evaluate a trained checkpoint with video recording: + +.. code-block:: bash + + python scripts/reinforcement_learning/rlinf/play.py \ + --task Isaac-Assemble-Trocar-G129-Dex3-Eval-v0 \ + --model_path /path/to/checkpoint \ + --config_path source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config \ + --config_name isaaclab_ppo_gr00t_assemble_trocar \ + --video + +Configuration +~~~~~~~~~~~~~ + +All configuration lives in a **single YAML file** loaded by `Hydra `_. +The key configuration block is the ``env.train.isaaclab`` section, which defines how Isaac Lab observations +are converted to GR00T format: + +.. code-block:: yaml + + isaaclab: &isaaclab_config + task_description: "assemble trocar from tray" + + # IsaacLab → RLinf observation mapping + main_images: "front_camera" + extra_view_images: + - "left_wrist_camera" + - "right_wrist_camera" + states: + - key: "robot_joint_state" + slice: [15, 29] + - key: "robot_dex3_joint_state" + + # GR00T → IsaacLab action conversion + action_mapping: + prefix_pad: 15 + suffix_pad: 0 + +Key Files +~~~~~~~~~ + +.. code-block:: text + + scripts/reinforcement_learning/rlinf/ + ├── README.md # Detailed documentation + ├── train.py # Training entry point + ├── play.py # Evaluation entry point + └── cli_args.py # Shared CLI argument definitions + + source/isaaclab_contrib/isaaclab_contrib/rl/rlinf/ + ├── __init__.py + └── extension.py # Task registration, obs/action conversion + +For detailed configuration options, CLI arguments, and how to add new tasks, +see ``scripts/reinforcement_learning/rlinf/README.md``. diff --git a/docs/source/overview/environments.rst b/docs/source/overview/environments.rst index a28d129f702..39536c32883 100644 --- a/docs/source/overview/environments.rst +++ b/docs/source/overview/environments.rst @@ -204,6 +204,8 @@ for the lift-cube environment: +-------------------------+------------------------------+-----------------------------------------------------------------------------+------------------------------+ | |cabi_openarm_uni| | |cabi_openarm_uni-link| | Grasp the handle of a cabinet's drawer and open it with the OpenArm robot | | +-------------------------+------------------------------+-----------------------------------------------------------------------------+------------------------------+ + | |g1_assemble_trocar| | |g1_assemble_trocar-link| | Assemble trocar with a Unitree G1 humanoid robot with Dex3 hands | | + +-------------------------+------------------------------+-----------------------------------------------------------------------------+------------------------------+ .. |reach-franka| image:: ../_static/tasks/manipulation/franka_reach.jpg .. |reach-ur10| image:: ../_static/tasks/manipulation/ur10_reach.jpg @@ -228,6 +230,7 @@ for the lift-cube environment: .. |reach_openarm_uni| image:: ../_static/tasks/manipulation/openarm_uni_reach.jpg .. |lift_openarm_uni| image:: ../_static/tasks/manipulation/openarm_uni_lift.jpg .. |cabi_openarm_uni| image:: ../_static/tasks/manipulation/openarm_uni_open_drawer.jpg +.. |g1_assemble_trocar| image:: ../_static/tasks/manipulation/g1_assemble_trocar.jpg .. |reach-franka-link| replace:: `Isaac-Reach-Franka-v0 <../../../source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/joint_pos_env_cfg.py>`__ .. |reach-ur10-link| replace:: `Isaac-Reach-UR10-v0 <../../../source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/ur_10/joint_pos_env_cfg.py>`__ @@ -261,6 +264,7 @@ for the lift-cube environment: .. |reach_openarm_uni-link| replace:: `Isaac-Reach-OpenArm-v0 <../../../source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/unimanual/joint_pos_env_cfg.py>`__ .. |lift_openarm_uni-link| replace:: `Isaac-Lift-Cube-OpenArm-v0 <../../../source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/openarm/joint_pos_env_cfg.py>`__ .. |cabi_openarm_uni-link| replace:: `Isaac-Open-Drawer-OpenArm-v0 <../../../source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/openarm/joint_pos_env_cfg.py>`__ +.. |g1_assemble_trocar-link| replace:: `Isaac-Assemble-Trocar-G129-Dex3-v0 <../../../source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/g129_dex3_env_cfg.py>`__ Contact-rich Manipulation @@ -769,6 +773,11 @@ inferencing, including reading from an already trained checkpoint and disabling - Manager Based - **rsl_rl** (PPO), **rl_games** (PPO), **skrl** (PPO), **sb3** (PPO) - ``newton_mjwarp``, ``physx`` + * - Isaac-Assemble-Trocar-G129-Dex3-v0 + - Isaac-Assemble-Trocar-G129-Dex3-Eval-v0 + - Manager Based + - **rlinf** (PPO) + - * - Isaac-Cart-Double-Pendulum-Direct-v0 - - Direct diff --git a/scripts/reinforcement_learning/rlinf/README.md b/scripts/reinforcement_learning/rlinf/README.md index 4ca96ba4fd2..725079782f9 100644 --- a/scripts/reinforcement_learning/rlinf/README.md +++ b/scripts/reinforcement_learning/rlinf/README.md @@ -81,7 +81,7 @@ python train.py python train.py --config_name isaaclab_ppo_gr00t_assemble_trocar # Training with task override -python train.py --task Isaac-Assemble-Trocar-G129-Dex3-RLinf-v0 +python train.py --task Isaac-Assemble-Trocar-G129-Dex3-v0 # Training with custom settings python train.py --num_envs 64 --max_epochs 1000 @@ -94,13 +94,13 @@ python train.py --list_tasks ```bash # Evaluate a trained checkpoint -python play.py --model_path /path/to/checkpoint +python play.py --task Isaac-Assemble-Trocar-G129-Dex3-Eval-v0 --model_path /path/to/checkpoint # Evaluate with video recording -python play.py --model_path /path/to/checkpoint --video +python play.py --task Isaac-Assemble-Trocar-G129-Dex3-Eval-v0 --model_path /path/to/checkpoint --video # Evaluate with specific number of environments -python play.py --model_path /path/to/checkpoint --num_envs 8 +python play.py --task Isaac-Assemble-Trocar-G129-Dex3-Eval-v0 --model_path /path/to/checkpoint --num_envs 8 ``` ## Configuration @@ -132,7 +132,7 @@ env: total_num_envs: 4 max_episode_steps: 256 init_params: - id: "Isaac-Assemble-Trocar-G129-Dex3-RLinf-v0" + id: "Isaac-Assemble-Trocar-G129-Dex3-v0" isaaclab: &isaaclab_config # IsaacLab ↔ RLinf mapping (see below) ... eval: diff --git a/source/isaaclab_assets/changelog.d/Adds-Assemble-Trocar-task-Based-RLinf.rst b/source/isaaclab_assets/changelog.d/Adds-Assemble-Trocar-task-Based-RLinf.rst new file mode 100644 index 00000000000..2a79e0a27d5 --- /dev/null +++ b/source/isaaclab_assets/changelog.d/Adds-Assemble-Trocar-task-Based-RLinf.rst @@ -0,0 +1,5 @@ +Added +^^^^^ + +* Added :class:`~isaaclab_assets.robots.unitree.G129_CFG_WITH_DEX3_BASE_FIX` robot configuration + for the Unitree G1 29-DOF with Dex3 hands. diff --git a/source/isaaclab_assets/isaaclab_assets/robots/unitree.py b/source/isaaclab_assets/isaaclab_assets/robots/unitree.py index 7a02c6eff29..8e4f692ca6d 100644 --- a/source/isaaclab_assets/isaaclab_assets/robots/unitree.py +++ b/source/isaaclab_assets/isaaclab_assets/robots/unitree.py @@ -21,10 +21,12 @@ """ import isaaclab.sim as sim_utils -from isaaclab.actuators import ActuatorNetMLPCfg, DCMotorCfg, ImplicitActuatorCfg +from isaaclab.actuators import ActuatorNetMLPCfg, DCMotorCfg, IdealPDActuatorCfg, ImplicitActuatorCfg from isaaclab.assets.articulation import ArticulationCfg from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR, ISAACLAB_NUCLEUS_DIR +HEALTHCARE_S3 = "https://omniverse-content-production.s3-us-west-2.amazonaws.com/Assets/Isaac/Healthcare/0.5.0/132c82d" + ## # Configuration - Actuators. ## @@ -609,3 +611,201 @@ damping=0.2, armature=0.001, ) + + +G129_CFG_WITH_DEX3_BASE_FIX = ArticulationCfg( + spawn=sim_utils.UsdFileCfg( + usd_path=f"{HEALTHCARE_S3}/Robots/UnitreeG1/g1_29dof_with_dex3_base_fix/g1_29dof_with_dex3_base_fix.usd", + activate_contact_sensors=False, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + retain_accelerations=False, + linear_damping=0.0, + angular_damping=0.0, + max_linear_velocity=1000.0, + max_angular_velocity=1000.0, + max_depenetration_velocity=1.0, + solver_position_iteration_count=4, + solver_velocity_iteration_count=0, + ), + articulation_props=sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=False, solver_position_iteration_count=4, solver_velocity_iteration_count=0 + ), + ), + prim_path="/World/envs/env_.*/Robot", + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.0, 0.0, 0.75), + joint_pos={ + "left_hip_yaw_joint": 0.0, + "left_hip_roll_joint": 0.0, + "left_hip_pitch_joint": -0.05, + "left_knee_joint": 0.2, + "left_ankle_pitch_joint": -0.15, + "left_ankle_roll_joint": 0.0, + "right_hip_yaw_joint": 0.0, + "right_hip_roll_joint": 0.0, + "right_hip_pitch_joint": -0.05, + "right_knee_joint": 0.2, + "right_ankle_pitch_joint": -0.15, + "right_ankle_roll_joint": 0.0, + "waist_yaw_joint": 0.0, + "waist_roll_joint": 0.0, + "waist_pitch_joint": 0.0, + "left_shoulder_pitch_joint": 0.0, + "left_shoulder_roll_joint": 0.0, + "left_shoulder_yaw_joint": 0.0, + "left_elbow_joint": -0.3, + "left_wrist_roll_joint": 0.0, + "left_wrist_pitch_joint": 0.0, + "left_wrist_yaw_joint": 0.0, + "right_shoulder_pitch_joint": 0.0, + "right_shoulder_roll_joint": 0.0, + "right_shoulder_yaw_joint": 0.0, + "right_elbow_joint": -0.3, + "right_wrist_roll_joint": 0.0, + "right_wrist_pitch_joint": 0.0, + "right_wrist_yaw_joint": 0.0, + "left_hand_index_0_joint": 0.0, + "left_hand_middle_0_joint": 0.0, + "left_hand_thumb_0_joint": 0.0, + "left_hand_index_1_joint": 0.0, + "left_hand_middle_1_joint": 0.0, + "left_hand_thumb_1_joint": 0.0, + "left_hand_thumb_2_joint": 0.0, + "right_hand_index_0_joint": 0.0, + "right_hand_middle_0_joint": 0.0, + "right_hand_thumb_0_joint": 0.0, + "right_hand_index_1_joint": 0.0, + "right_hand_middle_1_joint": 0.0, + "right_hand_thumb_1_joint": 0.0, + "right_hand_thumb_2_joint": 0.0, + }, + joint_vel={".*": 0.0}, + ), + soft_joint_pos_limit_factor=0.9, + actuators={ + "legs": IdealPDActuatorCfg( + joint_names_expr=[ + ".*_hip_yaw_joint", + ".*_hip_roll_joint", + ".*_hip_pitch_joint", + ".*_knee_joint", + ], + effort_limit={ + ".*_hip_yaw_joint": 88.0, + ".*_hip_roll_joint": 88.0, + ".*_hip_pitch_joint": 88.0, + ".*_knee_joint": 139.0, + }, + velocity_limit={ + ".*_hip_yaw_joint": 32.0, + ".*_hip_roll_joint": 32.0, + ".*_hip_pitch_joint": 32.0, + ".*_knee_joint": 20.0, + }, + stiffness={ + ".*_hip_yaw_joint": 150.0, + ".*_hip_roll_joint": 150.0, + ".*_hip_pitch_joint": 150.0, + ".*_knee_joint": 300.0, + }, + damping={ + ".*_hip_yaw_joint": 2.0, + ".*_hip_roll_joint": 2.0, + ".*_hip_pitch_joint": 2.0, + ".*_knee_joint": 4.0, + }, + armature={ + ".*_hip_.*": 0.03, + ".*_knee_joint": 0.03, + }, + ), + "feet": IdealPDActuatorCfg( + joint_names_expr=[".*_ankle_pitch_joint", ".*_ankle_roll_joint"], + stiffness={ + ".*_ankle_pitch_joint": 40.0, + ".*_ankle_roll_joint": 40.0, + }, + damping={ + ".*_ankle_pitch_joint": 2, + ".*_ankle_roll_joint": 2, + }, + effort_limit={ + ".*_ankle_pitch_joint": 50.0, + ".*_ankle_roll_joint": 50.0, + }, + velocity_limit={ + ".*_ankle_pitch_joint": 37.0, + ".*_ankle_roll_joint": 37.0, + }, + armature=0.03, + friction=0.03, + ), + "waist": ImplicitActuatorCfg( + joint_names_expr=["waist_yaw_joint", "waist_roll_joint", "waist_pitch_joint"], + effort_limit=1000.0, + velocity_limit=0.0, + stiffness={"waist_yaw_joint": 10000.0, "waist_roll_joint": 10000.0, "waist_pitch_joint": 10000.0}, + damping={"waist_yaw_joint": 10000.0, "waist_roll_joint": 10000.0, "waist_pitch_joint": 10000.0}, + armature=None, + ), + "arms": IdealPDActuatorCfg( + joint_names_expr=[ + ".*_shoulder_pitch_joint", + ".*_shoulder_roll_joint", + ".*_shoulder_yaw_joint", + ".*_elbow_joint", + ".*_wrist_.*_joint", + ], + effort_limit={ + ".*_shoulder_pitch_joint": 25.0, + ".*_shoulder_roll_joint": 25.0, + ".*_shoulder_yaw_joint": 25.0, + ".*_elbow_joint": 25.0, + ".*_wrist_roll_joint": 25.0, + ".*_wrist_pitch_joint": 5.0, + ".*_wrist_yaw_joint": 5.0, + }, + velocity_limit={ + ".*_shoulder_pitch_joint": 37.0, + ".*_shoulder_roll_joint": 37.0, + ".*_shoulder_yaw_joint": 37.0, + ".*_elbow_joint": 37.0, + ".*_wrist_roll_joint": 37.0, + ".*_wrist_pitch_joint": 22.0, + ".*_wrist_yaw_joint": 22.0, + }, + stiffness={ + ".*_shoulder_pitch_joint": 100.0, + ".*_shoulder_roll_joint": 100.0, + ".*_shoulder_yaw_joint": 40.0, + ".*_elbow_joint": 40.0, + ".*_wrist_.*_joint": 20.0, + }, + damping={ + ".*_shoulder_pitch_joint": 15.0, + ".*_shoulder_roll_joint": 15.0, + ".*_shoulder_yaw_joint": 8.0, + ".*_elbow_joint": 8.0, + ".*_wrist_.*_joint": 4.0, + }, + armature={".*_shoulder_.*": 0.03, ".*_elbow_.*": 0.03, ".*_wrist_.*_joint": 0.03}, + friction=0.03, + ), + "hands": IdealPDActuatorCfg( + joint_names_expr=[ + ".*_hand_.*", + ], + effort_limit=5.0, + velocity_limit=10.0, + stiffness=8.0, + damping=1.5, + armature=0.03, + friction=0.5, + ), + }, +) +"""Configuration for the Unitree G1 29DOF robot with Dex3 hands and fixed base. + +This configuration is designed for high-precision manipulation tasks such as trocar assembly. +""" diff --git a/source/isaaclab_contrib/changelog.d/Adds-Assemble-Trocar-task-Based-RLinf.rst b/source/isaaclab_contrib/changelog.d/Adds-Assemble-Trocar-task-Based-RLinf.rst new file mode 100644 index 00000000000..062bce25b77 --- /dev/null +++ b/source/isaaclab_contrib/changelog.d/Adds-Assemble-Trocar-task-Based-RLinf.rst @@ -0,0 +1,5 @@ +Fixed +^^^^^ + +* Removed ``_patched_reset`` monkey-patch in RLinf extension; use + ``num_rerenders_on_reset`` env config instead. diff --git a/source/isaaclab_contrib/isaaclab_contrib/rl/rlinf/extension.py b/source/isaaclab_contrib/isaaclab_contrib/rl/rlinf/extension.py index 89368c53210..1defd1a0d50 100644 --- a/source/isaaclab_contrib/isaaclab_contrib/rl/rlinf/extension.py +++ b/source/isaaclab_contrib/isaaclab_contrib/rl/rlinf/extension.py @@ -440,6 +440,19 @@ def __init__(self, cfg, num_envs: int, seed_offset: int, total_num_processes: in """ super().__init__(cfg, num_envs, seed_offset, total_num_processes, worker_info) + def _record_metrics(self, step_reward, terminations, infos): + """Override to use terminations (task completion) for success_once.""" + + episode_info = {} + self.returns += step_reward + self.success_once = self.success_once | terminations.bool() + episode_info["success_once"] = self.success_once.clone() + episode_info["return"] = self.returns.clone() + episode_info["episode_len"] = self.elapsed_steps.clone() + episode_info["reward"] = episode_info["return"] / episode_info["episode_len"] + infos["episode"] = episode_info + return infos + def _make_env_function(self) -> collections.abc.Callable: """Create the environment factory function. @@ -468,6 +481,7 @@ def make_env_isaaclab() -> tuple: isaac_env_cfg.scene.num_envs = self.cfg.init_params.num_envs env = gym.make(self.isaaclab_env_id, cfg=isaac_env_cfg, render_mode="rgb_array").unwrapped + return env, sim_app return make_env_isaaclab @@ -481,7 +495,6 @@ def _wrap_obs(self, obs: dict) -> dict: - ``"extra_view_images"``: ``(B, N, H, W, C)`` — stacked extra cameras. - ``"states"``: ``(B, D)`` — concatenated state vector. - ``"task_descriptions"``: ``list[str]`` — task descriptions. - Config is read from the YAML file via :func:`_get_isaaclab_cfg`. Args: diff --git a/source/isaaclab_tasks/changelog.d/Adds-Assemble-Trocar-task-Based-RLinf.rst b/source/isaaclab_tasks/changelog.d/Adds-Assemble-Trocar-task-Based-RLinf.rst new file mode 100644 index 00000000000..f5d918d3680 --- /dev/null +++ b/source/isaaclab_tasks/changelog.d/Adds-Assemble-Trocar-task-Based-RLinf.rst @@ -0,0 +1,7 @@ +Added +^^^^^ + +* Added ``Isaac-Assemble-Trocar-G129-Dex3-v0`` and + ``Isaac-Assemble-Trocar-G129-Dex3-Eval-v0`` manipulation tasks: a Unitree G1 + 29-DOF humanoid with Dex3 hands assembles a trocar from a tray, trained via + RL post-training of a VLA model using RLinf. diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/__init__.py new file mode 100644 index 00000000000..624d0226981 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Configurations for the assemble trocar environments.""" diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/__init__.py new file mode 100644 index 00000000000..cd8c26c840a --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from .camera_config import CameraBaseCfg, CameraPresets +from .robot_config import G1_29DOF_BODY_JOINT_INDICES, G1_DEX3_JOINT_INDICES, G1RobotPresets + +__all__ = ["G1_29DOF_BODY_JOINT_INDICES", "G1_DEX3_JOINT_INDICES", "G1RobotPresets", "CameraBaseCfg", "CameraPresets"] diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/camera_config.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/camera_config.py new file mode 100644 index 00000000000..40594872603 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/camera_config.py @@ -0,0 +1,131 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +public camera configuration +include the basic configuration for different types of cameras, support scene-specific parameter customization +""" + +from collections.abc import Sequence + +import isaaclab.sim as sim_utils +from isaaclab.sensors import CameraCfg +from isaaclab.utils import configclass + + +@configclass +class CameraBaseCfg: + """camera base configuration class + + provide the default configuration for different types of cameras, support scene-specific parameter customization + """ + + @classmethod + def get_camera_config( + cls, + prim_path: str = "/World/envs/env_.*/Robot/d435_link/front_cam", + update_period: float = 0.02, + height: int = 480, + width: int = 640, + focal_length: float = 7.6, + focus_distance: float = 400.0, + horizontal_aperture: float = 20.0, + clipping_range: tuple[float, float] = (0.1, 1.0e5), + pos_offset: tuple[float, float, float] = (0.0, 0.0, 0.0), + rot_offset: tuple[float, float, float, float] = (0.5, -0.5, 0.5, -0.5), + data_types: Sequence[str] | None = None, + ) -> CameraCfg: + """Get a pinhole camera configuration. + + Args: + prim_path: the path of the camera in the scene + update_period: update period (seconds) + height: image height (pixels) + width: image width (pixels) + focal_length: focal length + focus_distance: focus distance + horizontal_aperture: horizontal aperture + clipping_range: clipping range (near clipping plane, far clipping plane) + pos_offset: position offset (x, y, z) + rot_offset: rotation offset quaternion + data_types: data type list + + Returns: + CameraCfg: camera configuration + """ + if data_types is None: + data_types = ("rgb",) + + return CameraCfg( + prim_path=prim_path, + update_period=update_period, + height=height, + width=width, + data_types=list(data_types), + spawn=sim_utils.PinholeCameraCfg( + focal_length=focal_length, + focus_distance=focus_distance, + horizontal_aperture=horizontal_aperture, + clipping_range=clipping_range, + ), + offset=CameraCfg.OffsetCfg(pos=pos_offset, rot=rot_offset, convention="ros"), + ) + + +@configclass +class CameraPresets: + """camera preset configuration collection + + include the common camera configuration preset for different scenes + """ + + @classmethod + def g1_front_camera(cls, **overrides) -> CameraCfg: + params = { + "height": 224, + "width": 224, + "focal_length": 10.5, + "horizontal_aperture": 14.25, # Match original vertical FOV after crop + } + params.update(overrides) + return CameraBaseCfg.get_camera_config(**params) + + @classmethod + def left_dex3_wrist_camera(cls, **overrides) -> CameraCfg: + """left wrist camera configuration""" + params = { + "prim_path": "/World/envs/env_.*/Robot/left_hand_camera_base_link/left_wrist_camera", + "height": 224, + "width": 224, + "update_period": 0.02, + "data_types": ["rgb"], + "focal_length": 12.0, + "focus_distance": 400.0, + "horizontal_aperture": 14.25, # Match original vertical FOV after crop + "clipping_range": (0.1, 1.0e5), + "pos_offset": (-0.04012, -0.07441, 0.15711), + "rot_offset": (0.00539, 0.86024, 0.0424, 0.50809), + } + params.update(overrides) + return CameraBaseCfg.get_camera_config(**params) + + @classmethod + def right_dex3_wrist_camera(cls, **overrides) -> CameraCfg: + """right wrist camera configuration""" + params = { + "prim_path": "/World/envs/env_.*/Robot/right_hand_camera_base_link/right_wrist_camera", + "height": 224, + "width": 224, + "update_period": 0.02, + "data_types": ["rgb"], + "focal_length": 12.0, + "focus_distance": 400.0, + "horizontal_aperture": 14.25, # Match original vertical FOV after crop + "clipping_range": (0.1, 1.0e5), + "pos_offset": (-0.04012, 0.07441, 0.15711), + "rot_offset": (0.00539, 0.86024, 0.0424, 0.50809), + } + params.update(overrides) + return CameraBaseCfg.get_camera_config(**params) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/gr00t_config.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/gr00t_config.py new file mode 100644 index 00000000000..540b0edbc3a --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/gr00t_config.py @@ -0,0 +1,144 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""GR00T data configuration for IsaacLab tasks. + +This module defines customizable GR00T data configurations for different +embodiments. Users can create their own data config classes by subclassing +BaseDataConfig or copying/modifying the examples here. + +Example usage in run.sh: + export RLINF_DATA_CONFIG="policy.gr00t_config" + export RLINF_DATA_CONFIG_CLASS="policy.gr00t_config:IsaacLabDataConfig" +""" + +from gr00t.data.dataset import ModalityConfig +from gr00t.data.transform.base import ComposedModalityTransform +from gr00t.data.transform.concat import ConcatTransform +from gr00t.data.transform.state_action import StateActionSinCosTransform, StateActionToTensor, StateActionTransform +from gr00t.data.transform.video import VideoColorJitter, VideoToNumpy, VideoToTensor +from gr00t.experiment.data_config import DATA_CONFIG_MAP, BaseDataConfig +from gr00t.model.transforms import GR00TTransform + + +class IsaacLabDataConfig(BaseDataConfig): + """Generic GR00T data config for IsaacLab tasks with G1 + Dex3.""" + + # Video modality keys (from gr00t_mapping.video in RLINF_OBS_MAP_JSON) + video_keys = [ + "video.left_wrist_view", + "video.right_wrist_view", + "video.room_view", + ] + + # State modality keys (from gr00t_mapping.state in RLINF_OBS_MAP_JSON) + state_keys = [ + "state.left_arm", + "state.right_arm", + "state.left_hand", + "state.right_hand", + ] + + # Action modality keys (output from GR00T model) + action_keys = [ + "action.left_arm", + "action.right_arm", + "action.left_hand", + "action.right_hand", + ] + + # Language annotation key + language_keys = ["annotation.human.task_description"] + + # Observation and action indices + observation_indices = [0] + action_indices = list(range(16)) + + def modality_config(self) -> dict[str, ModalityConfig]: + """Define modality configurations for video, state, action, and language.""" + video_modality = ModalityConfig( + delta_indices=self.observation_indices, + modality_keys=self.video_keys, + ) + + state_modality = ModalityConfig( + delta_indices=self.observation_indices, + modality_keys=self.state_keys, + ) + + action_modality = ModalityConfig( + delta_indices=self.action_indices, + modality_keys=self.action_keys, + ) + + language_modality = ModalityConfig( + delta_indices=self.observation_indices, + modality_keys=self.language_keys, + ) + + return { + "video": video_modality, + "state": state_modality, + "action": action_modality, + "language": language_modality, + } + + def transform(self): + """Define the transform pipeline for processing observations and actions.""" + transforms = [ + # Video transforms + VideoToTensor(apply_to=self.video_keys), + # Disabled: camera already outputs 224×224 via TiledCameraCfg. + # To avoid VideoToTensor size-check errors, either: + # 1. Disable input size validation in VideoToTensor, OR + # 2. Set modality meta height/width to 224 to match actual input. + # Re-enable VideoCrop/VideoResize if camera resolution changes. + # VideoCrop(apply_to=self.video_keys, scale=0.95), + # VideoResize( + # apply_to=self.video_keys, + # height=224, + # width=224, + # interpolation="linear", + # ), + VideoColorJitter( + apply_to=self.video_keys, + brightness=0.3, + contrast=0.4, + saturation=0.5, + hue=0.08, + ), + VideoToNumpy(apply_to=self.video_keys), + # State transforms + StateActionToTensor(apply_to=self.state_keys), + StateActionSinCosTransform(apply_to=self.state_keys), + # Action transforms + StateActionToTensor(apply_to=self.action_keys), + StateActionTransform( + apply_to=self.action_keys, + normalization_modes={key: "min_max" for key in self.action_keys}, + ), + # Concat transforms + ConcatTransform( + video_concat_order=self.video_keys, + state_concat_order=self.state_keys, + action_concat_order=self.action_keys, + ), + # Model-specific transform + GR00TTransform( + state_horizon=len(self.observation_indices), + action_horizon=len(self.action_indices), + max_state_dim=64, + max_action_dim=32, + ), + ] + return ComposedModalityTransform(transforms=transforms) + + +# -------------------------------------------------------------------------- +# Register data configs into GR00T's DATA_CONFIG_MAP +# -------------------------------------------------------------------------- + +# This allows load_data_config("policy.gr00t_config:IsaacLabDataConfig") to work +DATA_CONFIG_MAP["isaaclab_g1_dex3"] = IsaacLabDataConfig() diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/isaaclab_ppo_gr00t_assemble_trocar.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/isaaclab_ppo_gr00t_assemble_trocar.yaml new file mode 100644 index 00000000000..b130a12a8a5 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/isaaclab_ppo_gr00t_assemble_trocar.yaml @@ -0,0 +1,298 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +defaults: + - override hydra/job_logging: stdout + +hydra: + run: + dir: . + output_subdir: null + +cluster: + num_nodes: 1 + component_placement: + actor,env,rollout: all + +runner: + task_type: embodied + logger: + log_path: "../results" + project_name: rlinf + experiment_name: "test_gr00t" + logger_backends: ["tensorboard"] # wandb, swanlab + + max_epochs: 1000 + max_steps: -1 + + only_eval: False + eval_policy_path: null # Optional: .pt file or None, if None, will use the checkpoint in rollout.model.model_path + val_check_interval: -1 + save_interval: 2 + seq_length: 4096 + max_prompt_length: 30 + + resume_dir: null + +algorithm: + normalize_advantages: True + kl_penalty: kl # how to estimate kl divergence: kl or kl_penalty + group_size: 1 + reward_coef: 1.0 + rollout_epoch: 2 + eval_rollout_epoch: 1 # set eval_rollout_epoch > 0 when enable runner.only_eval or runner.val_check_interval > 0 + + reward_type: chunk_level + logprob_type: chunk_level + entropy_type: chunk_level + + update_epoch: 4 + adv_type: gae + loss_type: actor_critic + loss_agg_func: "token-mean" + kl_beta: 0.0 + entropy_bonus: 0 + clip_ratio_high: 0.2 + clip_ratio_low: 0.2 + clip_ratio_c: 3.0 + value_clip: 0.2 + huber_delta: 10.0 + + gamma: 0.99 + gae_lambda: 0.95 + + filter_rewards: False + rewards_lower_bound: 0.1 + rewards_upper_bound: 0.9 + # params for generation + sampling_params: + do_sample: True + temperature_train: 1.0 + temperature_eval: 0.6 + top_k: 50 + top_p: 1.0 + repetition_penalty: 1.0 + add_BOS: False + + # length argument for autoregressive sampling + # max length means max amount of tokens to generate + length_params: + max_new_token: null + max_length: 1024 + min_length: 1 + +# --------------------------------------------------------------------------- +# Environment +# --------------------------------------------------------------------------- +env: + group_name: "EnvGroup" + channel: + name: "env_buffer_list" + queue_name: "obs_buffer" + queue_size: 0 + enable_offload: False + + train: + env_type: isaaclab + total_num_envs: 4 + auto_reset: False + ignore_terminations: False + use_rel_reward: True + seed: 0 + group_size: 1 + reward_coef: 1.0 + use_fixed_reset_state_ids: True + max_steps_per_rollout_epoch: 256 + max_episode_steps: 256 + video_cfg: + save_video: False + info_on_video: True + video_base_dir: ${runner.logger.log_path}/video/train + init_params: + id: "Isaac-Assemble-Trocar-G129-Dex3-v0" + num_envs: null + max_episode_steps: ${env.train.max_episode_steps} + task_description: "assemble trocar from tray" + + # ======================================================================== + # IsaacLab -> RLinf -> GR00T observation/action mapping configuration + # This section defines how IsaacLab observations are converted to GR00T format + # ======================================================================== + isaaclab: &isaaclab_config # YAML anchor for reuse in eval + # Task description for language conditioning + task_description: "assemble trocar from tray" + + # --- IsaacLab -> RLinf observation mapping --- + # main_images: single camera key for main view + main_images: "front_camera" + # extra_view_images: list of camera keys to stack as (B, N, H, W, C) + extra_view_images: + - "left_wrist_camera" + - "right_wrist_camera" + # states: list of state specs with optional slicing + # Each entry can be a string (use full tensor) or dict with "key" and "slice" + states: + - key: "robot_joint_state" + slice: [15, 29] # G129 shoulder joints + - key: "robot_dex3_joint_state" + # slice: null # Use full tensor + + # --- RLinf -> GR00T format conversion --- + gr00t_mapping: + video: + main_images: "video.room_view" + extra_view_images: + - "video.left_wrist_view" + - "video.right_wrist_view" + state: + # Slice concatenated states into GR00T state keys + # Total states: 14 (shoulder) + 14 (dex3) = 28 dims + - gr00t_key: "state.left_arm" + slice: [0, 7] + - gr00t_key: "state.right_arm" + slice: [7, 14] + - gr00t_key: "state.left_hand" + slice: [14, 21] + - gr00t_key: "state.right_hand" + slice: [21, 28] + + # --- GR00T -> IsaacLab action conversion --- + action_mapping: + prefix_pad: 15 # Pad zeros at front for G129 body joints (not controlled) + suffix_pad: 0 + + # --- GR00T model configuration (single source of truth) --- + # actor.model.embodiment_tag and obs_converter_type reference these values via ${} + obs_converter_type: "dex3" + embodiment_tag: "new_embodiment" + embodiment_tag_id: 31 + data_config_class: "gr00t_config:IsaacLabDataConfig" + + eval: + env_type: isaaclab + total_num_envs: 4 + auto_reset: True + ignore_terminations: True + use_rel_reward: True + seed: 0 + group_size: 1 + reward_coef: 1.0 + use_fixed_reset_state_ids: True + max_steps_per_rollout_epoch: 256 + max_episode_steps: 256 + video_cfg: + save_video: True + info_on_video: True + video_base_dir: ${runner.logger.log_path}/video/eval + init_params: + id: "Isaac-Assemble-Trocar-G129-Dex3-Eval-v0" + num_envs: null + max_episode_steps: ${env.eval.max_episode_steps} + task_description: "install trocar from box" + # Reuse IsaacLab config from train section via YAML anchor + isaaclab: *isaaclab_config + +# --------------------------------------------------------------------------- +# Rollout +# --------------------------------------------------------------------------- +rollout: + group_name: "RolloutGroup" + channel: + name: ${env.channel.name} + queue_name: "action_buffer" + queue_size: 0 + mode: "colocate" + backend: "huggingface" + enable_offload: True + pipeline_stage_num: 1 + + model: + model_path: "/mnt/ckpt/g1_install_trocar_sim_box_v3_60_train_bs32_1_gpus_cos_30k_tune_visual/" + precision: ${actor.model.precision} + obs_converter_type: ${env.train.isaaclab.obs_converter_type} + embodiment_tag: ${env.train.isaaclab.embodiment_tag} + +# --------------------------------------------------------------------------- +# Actor +# --------------------------------------------------------------------------- +actor: + group_name: "ActorGroup" + channel: + name: ${env.channel.name} + queue_name: "replay_buffer" + queue_size: 0 + training_backend: "fsdp" + micro_batch_size: 2 + global_batch_size: 4 + seed: 1234 + enable_offload: False + + model: + model_type: "gr00t" + model_path: "/mnt/ckpt/g1_install_trocar_sim_box_v3_60_train_bs32_1_gpus_cos_30k_tune_visual/" + precision: "bf16" + trust_remote_code: True + is_lora: false + action_dim: 28 + num_action_chunks: 1 + denoising_steps: 4 + policy_setup: "widowx_bridge" + obs_converter_type: ${env.train.isaaclab.obs_converter_type} + embodiment_tag: ${env.train.isaaclab.embodiment_tag} + add_value_head: True + rl_head_config: + joint_logprob: False + noise_method: "flow_sde" + ignore_last: False + safe_get_logprob: False + noise_anneal: False + noise_params: [0.7, 0.3, 400] + noise_level: 0.3 + add_value_head: ${actor.model.add_value_head} + chunk_critic_input: False + detach_critic_input: True + disable_dropout: True + use_vlm_value: False + value_vlm_mode: "mean_token" + padding_value: 850 + + optim: + lr: 5e-6 + value_lr: 1e-4 + adam_beta1: 0.9 + adam_beta2: 0.95 + adam_eps: 1.0e-08 + clip_grad: 1.0 + weight_decay: 0.01 + critic_warmup_steps: 0 + + fsdp_config: + strategy: "fsdp" + sharding_strategy: "full_shard" + gradient_checkpointing: False + cpu_offload: False + offload_pin_memory: False + reshard_after_forward: True + enable_gradient_accumulation: True + forward_prefetch: False + limit_all_gathers: False + backward_prefetch: null + use_orig_params: False + use_liger_kernel: False + fsdp_size: -1 + mixed_precision: + param_dtype: ${actor.model.precision} + reduce_dtype: ${actor.model.precision} + buffer_dtype: ${actor.model.precision} + amp: + enabled: False + precision: "bf16" + use_grad_scaler: False + +reward: + use_reward_model: False + +critic: + use_critic_model: False diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/robot_config.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/robot_config.py new file mode 100644 index 00000000000..81c60741b78 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/config/robot_config.py @@ -0,0 +1,147 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Robot configuration for the `install_trocar` task. + +This file is intentionally **minimal**: +- Supported robot: **Unitree G1 (29 DOF body)** +- Supported hands: **Dex3** + +The only public entry point expected by the task is +`G1RobotPresets.g1_29dof_dex3_base_fix(...)`. +""" + +import numpy as np + +from isaaclab.assets import ArticulationCfg +from isaaclab.utils import configclass + +from isaaclab_assets.robots.unitree import G129_CFG_WITH_DEX3_BASE_FIX + +# Joint indices in the full robot joint vector for observation extraction. +# Body joints: 29 DOF (legs, waist, arms, wrists) +G1_29DOF_BODY_JOINT_INDICES: list[int] = [ + 0, + 3, + 6, + 9, + 13, + 17, + 1, + 4, + 7, + 10, + 14, + 18, + 2, + 5, + 8, + 11, + 15, + 19, + 21, + 23, + 25, + 27, + 12, + 16, + 20, + 22, + 24, + 26, + 28, +] + +# Dex3 hand joints: 14 DOF (left + right) +G1_DEX3_JOINT_INDICES: list[int] = [31, 37, 41, 30, 36, 29, 35, 34, 40, 42, 33, 39, 32, 38] + +# Default joint positions for the supported setup (G1 29DOF + Dex3). +DEFAULT_JOINT_POS: dict[str, float] = { + # legs + "left_hip_pitch_joint": 0.0, + "left_hip_roll_joint": 0.0, + "left_hip_yaw_joint": 0.0, + "left_knee_joint": 0.0, + "left_ankle_pitch_joint": 0.0, + "left_ankle_roll_joint": 0.0, + "right_hip_pitch_joint": 0.0, + "right_hip_roll_joint": 0.0, + "right_hip_yaw_joint": 0.0, + "right_knee_joint": 0.0, + "right_ankle_pitch_joint": 0.0, + "right_ankle_roll_joint": 0.0, + # waist + "waist_yaw_joint": 0.0, + "waist_roll_joint": 0.0, + "waist_pitch_joint": 0.0, + # arms + "left_shoulder_pitch_joint": -0.754599, + "left_shoulder_roll_joint": 0.550010, + "left_shoulder_yaw_joint": -0.399298, + "left_elbow_joint": 0.278886, + "left_wrist_roll_joint": 0.320559, + "left_wrist_pitch_joint": -0.203525, + "left_wrist_yaw_joint": -0.387435, + "right_shoulder_pitch_joint": -0.340858, + "right_shoulder_roll_joint": -0.186152, + "right_shoulder_yaw_joint": 0.015023, + "right_elbow_joint": -0.777159, + "right_wrist_roll_joint": 0.019805, + "right_wrist_pitch_joint": 1.182285, + "right_wrist_yaw_joint": -0.022848, + # dex3 hands (left) + "left_hand_index_0_joint": -60.0 * np.pi / 180.0, + "left_hand_middle_0_joint": -60.0 * np.pi / 180.0, + "left_hand_thumb_0_joint": 0.0, + "left_hand_index_1_joint": -40.0 * np.pi / 180.0, + "left_hand_middle_1_joint": -40.0 * np.pi / 180.0, + "left_hand_thumb_1_joint": 0.0, + "left_hand_thumb_2_joint": 0.0, + # dexterous hand joint - right hand + "right_hand_index_0_joint": 60.0 * np.pi / 180.0, + "right_hand_middle_0_joint": 60.0 * np.pi / 180.0, + "right_hand_thumb_0_joint": 0.0, + "right_hand_index_1_joint": 40.0 * np.pi / 180.0, + "right_hand_middle_1_joint": 40.0 * np.pi / 180.0, + "right_hand_thumb_1_joint": 0.0, + "right_hand_thumb_2_joint": 0.0, +} + + +def make_g1_29dof_dex3_cfg( + *, + prim_path: str = "/World/envs/env_.*/Robot", + init_pos: tuple[float, float, float] = (-0.15, 0.0, 0.744), + init_rot: tuple[float, float, float, float] = (0, 0, 0.7071, 0.7071), + custom_joint_pos: dict[str, float] | None = None, + base_config: ArticulationCfg = G129_CFG_WITH_DEX3_BASE_FIX, +) -> ArticulationCfg: + """Create the only supported robot articulation cfg for this task.""" + joint_pos = DEFAULT_JOINT_POS.copy() + if custom_joint_pos: + joint_pos.update(custom_joint_pos) + return base_config.replace( + prim_path=prim_path, + init_state=ArticulationCfg.InitialStateCfg( + pos=init_pos, + rot=init_rot, + joint_pos=joint_pos, + joint_vel={".*": 0.0}, + ), + ) + + +@configclass +class G1RobotPresets: + """G1 robot preset configuration collection""" + + @classmethod + def g1_29dof_dex3_base_fix( + cls, + init_pos: tuple[float, float, float] = (-0.15, 0.0, 0.76), + init_rot: tuple[float, float, float, float] = (0, 0, 0.7071, 0.7071), + ) -> ArticulationCfg: + """pick-place task configuration - dex3 hand""" + return make_g1_29dof_dex3_cfg(init_pos=init_pos, init_rot=init_rot) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/g129_dex3_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/g129_dex3_env_cfg.py new file mode 100644 index 00000000000..50e58134f14 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/g129_dex3_env_cfg.py @@ -0,0 +1,444 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab_physx.physics import PhysxCfg + +import isaaclab.envs.mdp as base_mdp +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg +from isaaclab.envs import ManagerBasedRLEnvCfg, ViewerCfg +from isaaclab.managers import EventTermCfg, SceneEntityCfg +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg +from isaaclab.utils import configclass + +from isaaclab_tasks.manager_based.manipulation.assemble_trocar import mdp + +from isaaclab_tasks.manager_based.manipulation.assemble_trocar.config import ( # isort: skip + CameraPresets, + G1RobotPresets, +) + +joint_names = [ + "left_hip_pitch_joint", + "right_hip_pitch_joint", + "left_hip_roll_joint", + "right_hip_roll_joint", + "left_hip_yaw_joint", + "right_hip_yaw_joint", + "left_knee_joint", + "right_knee_joint", + "left_ankle_pitch_joint", + "right_ankle_pitch_joint", + "left_ankle_roll_joint", + "right_ankle_roll_joint", + "waist_yaw_joint", + "waist_roll_joint", + "waist_pitch_joint", + "left_shoulder_pitch_joint", + "left_shoulder_roll_joint", + "left_shoulder_yaw_joint", + "left_elbow_joint", + "left_wrist_roll_joint", + "left_wrist_pitch_joint", + "left_wrist_yaw_joint", + "right_shoulder_pitch_joint", + "right_shoulder_roll_joint", + "right_shoulder_yaw_joint", + "right_elbow_joint", + "right_wrist_roll_joint", + "right_wrist_pitch_joint", + "right_wrist_yaw_joint", + "left_hand_thumb_0_joint", + "left_hand_thumb_1_joint", + "left_hand_thumb_2_joint", + "left_hand_middle_0_joint", + "left_hand_middle_1_joint", + "left_hand_index_0_joint", + "left_hand_index_1_joint", + "right_hand_thumb_0_joint", + "right_hand_thumb_1_joint", + "right_hand_thumb_2_joint", + "right_hand_middle_0_joint", + "right_hand_middle_1_joint", + "right_hand_index_0_joint", + "right_hand_index_1_joint", +] +offset_dict = { + "left_elbow_joint": -0.3, + "right_elbow_joint": -0.3, +} + +HEALTHCARE_S3 = "https://omniverse-content-production.s3-us-west-2.amazonaws.com/Assets/Isaac/Healthcare/0.5.0/132c82d" +USD_ROOT = f"{HEALTHCARE_S3}/Props/LightWheel" + + +@configclass +class AssembleTrocarSceneCfg(InteractiveSceneCfg): + """Scene configuration for the assemble_trocar task (robot + objects + lights).""" + + # humanoid robot configuration + robot: ArticulationCfg = G1RobotPresets.g1_29dof_dex3_base_fix( + init_pos=(-1.84919, 1.94, 0.81168), init_rot=(0.0, 0.0, 0.0, 1.0) + ) + # add camera configuration + front_camera = CameraPresets.g1_front_camera() + left_wrist_camera = CameraPresets.left_dex3_wrist_camera() + right_wrist_camera = CameraPresets.right_dex3_wrist_camera() + + scene = AssetBaseCfg( + prim_path="/World/envs/env_.*/Scene", + spawn=UsdFileCfg( + usd_path=f"{USD_ROOT}/scene03.usd", + ), + ) + + trocar_1 = RigidObjectCfg( + prim_path="/World/envs/env_.*/trocar_1", + spawn=UsdFileCfg( + usd_path=f"{USD_ROOT}/Assets/Trocar002/Trocar002-xform-wo.usd", + collision_props=sim_utils.CollisionPropertiesCfg( + collision_enabled=True, + contact_offset=0.001, + rest_offset=-0.001, + ), + ), + init_state=RigidObjectCfg.InitialStateCfg( + pos=[-1.60202, 1.91362, 0.87183], + rot=[-0.0, 0.70711, 0.70711, 0.0], + ), + ) + + trocar_2 = RigidObjectCfg( + prim_path="/World/envs/env_.*/trocar_2", + spawn=UsdFileCfg( + usd_path=( + f"{USD_ROOT}/Assets/" + "DisposableLaparoscopicPunctureDevice001/" + "DisposableLaparoscopicPunctureDevice005-xform.usd" + ), + rigid_props=sim_utils.RigidBodyPropertiesCfg( + rigid_body_enabled=True, + disable_gravity=False, + ), + ), + init_state=RigidObjectCfg.InitialStateCfg( + rot=[-0.71475, -0.000243, 0.05853, 0.69692], pos=[-1.50635, 1.90997, 0.8631] + ), + ) + tray = ArticulationCfg( + prim_path="/World/envs/env_.*/surgical_tray", + spawn=UsdFileCfg( + usd_path=f"{USD_ROOT}/Assets/SurgicalTray001/SurgicalTray001.usd", + ), + init_state=ArticulationCfg.InitialStateCfg(pos=[-1.54919, 2.03365, 0.84554], rot=[0.0, 0.0, -0.70711, 0.70711]), + actuators={}, # Empty dict for passive articulation (no motors) + ) + + # Lights + light = AssetBaseCfg( + prim_path="/World/light", + spawn=sim_utils.DomeLightCfg( + color=(0.75, 0.75, 0.75), + intensity=1000.0, + ), + ) + + +## +# MDP settings +## +@configclass +class ActionsCfg: + """defines the action configuration related to robot control, using direct joint angle control""" + + joint_pos = mdp.JointPositionActionCfg( + asset_name="robot", + joint_names=joint_names, + scale=1.0, + use_default_offset=False, + offset=offset_dict, + preserve_order=True, + ) + + +@configclass +class ObservationsCfg: + """defines all available observation information""" + + @configclass + class PolicyCfg(ObsGroup): + """policy group observation configuration class + defines all state observation values for policy decision + inherit from ObsGroup base class + """ + + # robot joint state observation + robot_joint_state = ObsTerm(func=mdp.get_robot_body_joint_states) + # dex3 hand joint state observation + robot_dex3_joint_state = ObsTerm(func=mdp.get_robot_dex3_joint_states) + + def __post_init__(self): + """post initialization function + set the basic attributes of the observation group + """ + self.enable_corruption = False # disable observation value corruption + self.concatenate_terms = False # disable observation item connection + + @configclass + class CameraImagesCfg(ObsGroup): + """Observations from the robot's cameras.""" + + front_camera = ObsTerm( + func=base_mdp.image, + params={"sensor_cfg": SceneEntityCfg("front_camera"), "data_type": "rgb", "normalize": False}, + ) + left_wrist_camera = ObsTerm( + func=base_mdp.image, + params={"sensor_cfg": SceneEntityCfg("left_wrist_camera"), "data_type": "rgb", "normalize": False}, + ) + right_wrist_camera = ObsTerm( + func=base_mdp.image, + params={"sensor_cfg": SceneEntityCfg("right_wrist_camera"), "data_type": "rgb", "normalize": False}, + ) + + def __post_init__(self): + self.concatenate_terms = False + + # observation groups + # create policy observation group instance + policy: PolicyCfg = PolicyCfg() + camera_images: CameraImagesCfg = CameraImagesCfg() + + +@configclass +class TerminationsCfg: + """Termination conditions for the environment.""" + + # Time out termination + time_out = DoneTerm(func=mdp.time_out, time_out=True) + + # Task success termination (all stages completed) + task_success = DoneTerm( + func=mdp.task_success_termination, + time_out=False, # This is a success termination, not a failure + params={ + "print_log": False, + "success_stage": 4, + }, + ) + object_drop = DoneTerm( + func=mdp.object_drop_termination, + time_out=True, # Treat as timeout/failure + params={ + "drop_height_threshold": 0.5, # Objects below this Z height are considered dropped + "asset_cfg1": SceneEntityCfg("trocar_1"), + "asset_cfg2": SceneEntityCfg("trocar_2"), + }, + ) + + +@configclass +class RewardsCfg: + """Reward configuration for sparse reward mode. + + Each stage gives 1.0 reward on completion -> Total reward for full task = 4.0 + This ensures clear reward signal for each stage transition. + + ``update_stage`` runs first (weight=0) to advance the task stage before any + reward term reads it, removing implicit ordering dependencies. + """ + + # Stage machine — weight=0, runs before all reward terms to update task stage + update_stage = RewTerm( + func=mdp.update_task_stage, + weight=0.0, + params={ + "asset_cfg1": SceneEntityCfg("trocar_1"), + "asset_cfg2": SceneEntityCfg("trocar_2"), + "table_height": 0.85483, + "lift_threshold": 0.15, + "tip_align_threshold": 0.015, + "insertion_dist_threshold": 0.05, + "insertion_angle_threshold": 0.15, + "placement_x_min": -1.8, + "placement_x_max": -1.4, + "placement_y_min": 1.5, + "placement_y_max": 1.8, + "print_log": False, + }, + ) + + # Stage 0: Lift trocars + lift_trocars = RewTerm( + func=mdp.lift_trocars_reward, + weight=1.0, + params={ + "table_height": 0.85483, + "lift_threshold": 0.15, + "asset_cfg1": SceneEntityCfg("trocar_1"), + "asset_cfg2": SceneEntityCfg("trocar_2"), + "use_sparse_reward": True, + "print_log": False, + }, + ) + + # Stage 1: Tip alignment (find hole) + tip_alignment = RewTerm( + func=mdp.trocar_tip_alignment_reward, + weight=1.0, # Give 1.0 reward when stage 1->2 completes + params={ + "tip_dist_std": 0.02, # Std for tip distance reward shaping + "asset_cfg1": SceneEntityCfg("trocar_1"), + "asset_cfg2": SceneEntityCfg("trocar_2"), + "use_sparse_reward": True, + "print_log": False, + }, + ) + + # Stage 2: Insertion (push in) + insert_trocars = RewTerm( + func=mdp.trocar_insertion_reward, + weight=1.0, # Give 1.0 reward when stage 2->3 completes + params={ + "angle_std": 0.2, # Std for angle alignment reward + "angle_threshold": 0.10, # ~5.7 degrees tolerance for parallelism + "center_dist_std": 0.05, # Std for center distance reward + "asset_cfg1": SceneEntityCfg("trocar_1"), + "asset_cfg2": SceneEntityCfg("trocar_2"), + "use_sparse_reward": True, + "print_log": False, + }, + ) + + # Stage 3: Placement (place in tray) + placement_trocars = RewTerm( + func=mdp.trocar_placement_reward, + weight=1.0, # Give 1.0 reward when stage 3->4 completes + params={ + "x_min": -1.8, + "x_max": -1.4, + "y_min": 1.5, + "y_max": 1.8, + "asset_cfg1": SceneEntityCfg("trocar_1"), + "asset_cfg2": SceneEntityCfg("trocar_2"), + "use_sparse_reward": True, + "print_log": False, + }, + ) + + +@configclass +class EventCfg: + """Event configuration for scene reset.""" + + # Reset scene when episode terminates (timeout or success) + reset_scene = EventTermCfg(func=base_mdp.reset_scene_to_default, mode="reset") + + # Reset task stage tracker when environment resets + reset_task_stage = EventTermCfg(func=mdp.reset_task_stage, mode="reset") + + # Random rotation for tray and trocars + reset_tray_random_rotation = EventTermCfg( + func=mdp.reset_tray_with_random_rotation, + mode="reset", + params={ + "tray_cfg": SceneEntityCfg("tray"), + "trocar_1_cfg": SceneEntityCfg("trocar_1"), + "trocar_2_cfg": SceneEntityCfg("trocar_2"), + "rotation_range": [0, 10], + }, + ) + + +@configclass +class G1AssembleTrocarEnvCfg(ManagerBasedRLEnvCfg): + """Unitree G1 robot assemble trocar environment configuration class + inherits from ManagerBasedRLEnvCfg, defines all configuration parameters for the entire environment + """ + + # scene settings + scene: AssembleTrocarSceneCfg = AssembleTrocarSceneCfg( + num_envs=1, + env_spacing=6.0, + replicate_physics=True, + ) + # viewer settings + viewer: ViewerCfg = ViewerCfg( + eye=(-0.5, 2.4, 1.6), + lookat=(-5.4, 0.2, -1.2), + cam_prim_path="/OmniverseKit_Persp", + ) + # basic settings + observations: ObservationsCfg = ObservationsCfg() + actions: ActionsCfg = ActionsCfg() + # MDP settings + terminations: TerminationsCfg = TerminationsCfg() + events: EventCfg = EventCfg() + commands = None + rewards: RewardsCfg = RewardsCfg() + curriculum = None + + num_rerenders_on_reset: int = 1 + + def __post_init__(self): + """Post initialization.""" + # general settings + self.decimation = 4 + self.episode_length_s = 20.0 + # simulation settings + self.sim.dt = 1 / 200 + self.sim.render_interval = self.decimation + self.sim.physics = PhysxCfg(bounce_threshold_velocity=0.01) + self.sim.render.enable_translucency = True + self.sim.render.carb_settings = { + "rtx.raytracing.fractionalCutoutOpacity": True, + } + self.sim.render.rendering_mode = "quality" + self.sim.render.antialiasing_mode = "DLAA" + + +@configclass +class EventCfgFixTrayRotation(EventCfg): + """Event configuration with a deterministic-but-different yaw per env index. + + This is useful for eval with many parallel envs: + - env 0..N-1 get different yaw angles, + - for a fixed global seed, the set of N angles is reproducible across runs/resets. + + Notes: + - Determinism is tied to torch's global seed (set by env reset seed in IsaacLab). + - Angle unit is degrees. + """ + + reset_tray_random_rotation = EventTermCfg( + func=mdp.reset_tray_with_random_rotation, + mode="reset", + params={ + "tray_cfg": SceneEntityCfg("tray"), + "trocar_1_cfg": SceneEntityCfg("trocar_1"), + "trocar_2_cfg": SceneEntityCfg("trocar_2"), + "rotation_range": [0, 10], + "deterministic_per_env": True, + # Use torch.initial_seed() by default to follow the env reset seed. + "deterministic_seed": None, + }, + ) + + +@configclass +class G1AssembleTrocarEvalEnvCfg(G1AssembleTrocarEnvCfg): + """Eval-friendly env cfg. + + This is currently an alias of `G1AssembleTrocarEnvCfg`, but registered under a + separate Gym id for compatibility with RLinf configs. + """ + + # Override events to enforce deterministic per-env tray yaw on every reset. + events: EventCfgFixTrayRotation = EventCfgFixTrayRotation() diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/__init__.py new file mode 100644 index 00000000000..d428ed46f7b --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""MDP utilities for the assemble_trocar task.""" + +from isaaclab.utils.module import lazy_export + +lazy_export() diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/events.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/events.py new file mode 100644 index 00000000000..92214471ac0 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/events.py @@ -0,0 +1,253 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Custom event functions for pick place surgical environment.""" + +from __future__ import annotations + +import logging +import math +from typing import TYPE_CHECKING + +import torch + +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils.math import quat_apply, quat_mul + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + +logger = logging.getLogger(__name__) + +__all__ = [ + "reset_tray_with_random_rotation", + "reset_robot_to_default_joint_positions", + "reset_task_stage", +] + + +def reset_task_stage( + env: ManagerBasedRLEnv, + env_ids: torch.Tensor, + print_log: bool = False, +) -> None: + """Reset task stage to 0 for specified environments. + + This should be called during environment reset events. + Also resets all locked reward caches to maintain continuity. + + Args: + env: The environment instance + env_ids: Indices of environments to reset + print_log: If True, log debug information. + """ + from .rewards import get_assemble_trocar_state + + s = get_assemble_trocar_state(env) + s.task_stage[env_ids] = 0 + + # Reset dense-reward locked caches + s.lift_reward_locked[env_ids] = 0 + s.tip_reward_locked[env_ids] = 0 + s.insertion_reward_locked[env_ids] = 0 + s.placement_reward_locked[env_ids] = 0 + + # Reset sparse-reward previous-stage trackers + s.prev_stage_lift[env_ids] = 0 + s.prev_stage_tip[env_ids] = 0 + s.prev_stage_insert[env_ids] = 0 + s.prev_stage_place[env_ids] = 0 + + # Reset debug throttle + s.last_debug_print_step = -1 + + if print_log: + logger.debug("Reset task stage for %d environment(s)", len(env_ids)) + + +def reset_tray_with_random_rotation( + env: ManagerBasedRLEnv, + env_ids: torch.Tensor, + tray_cfg: SceneEntityCfg, + trocar_1_cfg: SceneEntityCfg, + trocar_2_cfg: SceneEntityCfg, + rotation_range: tuple[float, float] | float = (-5.0, 5.0), # (min, max) degrees or ±value + deterministic_per_env: bool = False, + deterministic_seed: int | None = None, +): + """Reset tray with random rotation while keeping relative positions of trocars. + + This function: + 1. Applies a random yaw rotation within rotation_range to the tray + 2. Rotates trocar_1 and trocar_2 around the tray center to maintain relative positions + 3. Uses separate pose/velocity writes to ensure instant teleportation (no interpolation) + + Args: + env: The environment instance. + env_ids: The environment indices to reset. + tray_cfg: Scene entity config for the tray. + trocar_1_cfg: Scene entity config for trocar_1. + trocar_2_cfg: Scene entity config for trocar_2. + rotation_range: Rotation angle range in degrees. Can be: + - tuple (min, max): Random rotation between min and max degrees + - float value: Random rotation between -value and +value degrees + Examples: (0, 10), (-5, 15), 5.0 (equivalent to (-5, 5)) + """ + if len(env_ids) == 0: + return + + # Parse rotation_range parameter + if isinstance(rotation_range, (tuple, list)): + # User provided (min, max) range + min_angle_deg, max_angle_deg = rotation_range[0], rotation_range[1] + else: + # User provided single value (symmetric range ±value) + min_angle_deg, max_angle_deg = -rotation_range, rotation_range + + # Get assets + tray = env.scene[tray_cfg.name] + trocar_1 = env.scene[trocar_1_cfg.name] + trocar_2 = env.scene[trocar_2_cfg.name] + + # Get default poses and velocities (local coordinates relative to env origin) + tray_default_pose = tray.data.default_root_pose.torch[env_ids].clone() + trocar_1_default_pose = trocar_1.data.default_root_pose.torch[env_ids].clone() + trocar_2_default_pose = trocar_2.data.default_root_pose.torch[env_ids].clone() + + env_origins = env.scene.env_origins[env_ids] # (num_envs, 3) + + # Convert local coordinate to world coordinate + tray_default_pose[:, :3] += env_origins + trocar_1_default_pose[:, :3] += env_origins + trocar_2_default_pose[:, :3] += env_origins + + # Tray center position (pivot point for rotation) - now in world coordinates + tray_center = tray_default_pose[:, :3] # (num_envs, 3) + + # Generate yaw angles (in radians) + # Convert degrees to radians + min_angle_rad = min_angle_deg * math.pi / 180.0 + max_angle_rad = max_angle_deg * math.pi / 180.0 + + # Generate angles uniformly distributed in [min_angle, max_angle] + if deterministic_per_env: + # Derive a stable "random" number per env id, so each env gets a distinct yaw, + # but it is repeatable across resets/runs given the same seed + env_id. + # + # If deterministic_seed is not provided, we tie it to torch's global seed. + # IsaacLab typically seeds torch during env reset with the provided seed. + if deterministic_seed is None: + deterministic_seed = int(torch.initial_seed()) + u = _deterministic_uniform_0_1_from_ids(env, env_ids, deterministic_seed) # (num_envs,) + else: + u = torch.rand(len(env_ids), device=env.device) + random_yaw = u * (max_angle_rad - min_angle_rad) + min_angle_rad # (num_envs,) + + # Create rotation quaternion for yaw (rotation around Z-axis) + # XYZW: quat = [x, y, z, w] = [0, 0, sin(θ/2), cos(θ/2)] + half_angle = random_yaw / 2.0 + delta_quat = torch.zeros(len(env_ids), 4, device=env.device) + delta_quat[:, 2] = torch.sin(half_angle) # z + delta_quat[:, 3] = torch.cos(half_angle) # w + + # Apply rotation to tray quaternion + tray_new_quat = quat_mul(delta_quat, tray_default_pose[:, 3:7]) + + # Update tray pose + tray_new_pose = tray_default_pose.clone() + tray_new_pose[:, 3:7] = tray_new_quat + + # Rotate trocar positions around tray center + trocar_1_relative_pos = trocar_1_default_pose[:, :3] - tray_center + trocar_2_relative_pos = trocar_2_default_pose[:, :3] - tray_center + + # Rotate relative positions using the delta quaternion + trocar_1_new_relative_pos = quat_apply(delta_quat, trocar_1_relative_pos) + trocar_2_new_relative_pos = quat_apply(delta_quat, trocar_2_relative_pos) + + # New absolute poses + trocar_1_new_pose = trocar_1_default_pose.clone() + trocar_2_new_pose = trocar_2_default_pose.clone() + + trocar_1_new_pose[:, :3] = tray_center + trocar_1_new_relative_pos + trocar_2_new_pose[:, :3] = tray_center + trocar_2_new_relative_pos + + # Also rotate trocar orientations + trocar_1_new_pose[:, 3:7] = quat_mul(delta_quat, trocar_1_default_pose[:, 3:7]) + trocar_2_new_pose[:, 3:7] = quat_mul(delta_quat, trocar_2_default_pose[:, 3:7]) + + zero_velocity = torch.zeros(len(env_ids), 6, device=env.device) # [lin_vel(3), ang_vel(3)] + + tray.write_root_pose_to_sim_index(root_pose=tray_new_pose, env_ids=env_ids) + trocar_1.write_root_pose_to_sim_index(root_pose=trocar_1_new_pose, env_ids=env_ids) + trocar_2.write_root_pose_to_sim_index(root_pose=trocar_2_new_pose, env_ids=env_ids) + + tray.write_root_velocity_to_sim_index(root_velocity=zero_velocity, env_ids=env_ids) + trocar_1.write_root_velocity_to_sim_index(root_velocity=zero_velocity, env_ids=env_ids) + trocar_2.write_root_velocity_to_sim_index(root_velocity=zero_velocity, env_ids=env_ids) + + +def _deterministic_uniform_0_1_from_ids( + env: ManagerBasedRLEnv, + ids: torch.Tensor, + seed: int, +) -> torch.Tensor: + """Deterministically map env ids -> floats in [0, 1) via a seeded lookup table. + + We generate a length-(env.num_envs) random table with a local torch.Generator + seeded by `seed`, then return table[ids]. This is deterministic and avoids + uint64 bitwise ops (which may not be supported on CPU). + """ + device = env.device + num_envs = int(env.num_envs) + seed = int(seed) + + cache = getattr(env, "_deterministic_u_table_cache", None) + cache_key = (seed, num_envs, str(device)) + if cache is None or cache.get("key") != cache_key: + gen = torch.Generator(device=device) + gen.manual_seed(seed & 0xFFFFFFFFFFFFFFFF) + u_table = torch.rand((num_envs,), generator=gen, device=device, dtype=torch.float32) + cache = {"key": cache_key, "u_table": u_table} + setattr(env, "_deterministic_u_table_cache", cache) + + return cache["u_table"][ids] + + +def reset_robot_to_default_joint_positions( + env: ManagerBasedRLEnv, + env_ids: torch.Tensor, + robot_cfg: SceneEntityCfg, +): + """Reset robot joint positions directly to default values. + + This function directly writes joint positions and velocities to the simulation, + bypassing the PD controller. This prevents the "drive to target" behavior + that causes arms to swing from 0 position to the target position. + + Args: + env: The environment instance. + env_ids: The environment indices to reset. + robot_cfg: Scene entity config for the robot. + """ + if len(env_ids) == 0: + return + + # Get robot asset + robot = env.scene[robot_cfg.name] + + # Get default joint positions and velocities + default_joint_pos = robot.data.default_joint_pos.torch[env_ids].clone() + default_joint_vel = robot.data.default_joint_vel.torch[env_ids].clone() + + # Directly write joint state to simulation (bypasses PD controller) + robot.write_joint_position_to_sim_index(position=default_joint_pos, env_ids=env_ids) + robot.write_joint_velocity_to_sim_index(velocity=default_joint_vel, env_ids=env_ids) + + # Also reset root pose and velocity + default_root_pose = robot.data.default_root_pose.torch[env_ids].clone() + default_root_vel = robot.data.default_root_vel.torch[env_ids].clone() + robot.write_root_pose_to_sim_index(root_pose=default_root_pose, env_ids=env_ids) + robot.write_root_velocity_to_sim_index(root_velocity=default_root_vel, env_ids=env_ids) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/observations.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/observations.py new file mode 100644 index 00000000000..06c037ba3d6 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/observations.py @@ -0,0 +1,119 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +G1 29DOF (body) + Dex3 joint state helpers for the assemble_trocar task. + +Notes: +- DDS has been removed (simulation-only observations). +- These functions are designed to be used as Isaac Lab observation terms. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import torch + +from isaaclab_tasks.manager_based.manipulation.assemble_trocar.config import ( + G1_29DOF_BODY_JOINT_INDICES, + G1_DEX3_JOINT_INDICES, +) + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +# Observation cache: index tensors + preallocated output buffers (body joints) +_body_obs_cache = { + "device": None, + "batch": None, + "idx_t": None, + "idx_batch": None, + "pos_buf": None, + "vel_buf": None, + "torque_buf": None, + "combined_buf": None, +} + + +def get_robot_body_joint_states(env: ManagerBasedRLEnv) -> torch.Tensor: + """Return body joint states as a single tensor: [pos(29) | vel(29) | torque(29)].""" + robot_data = env.scene["robot"].data + joint_pos = robot_data.joint_pos.torch + joint_vel = robot_data.joint_vel.torch + joint_torque = robot_data.applied_torque.torch + device = joint_pos.device + batch = joint_pos.shape[0] + + global _body_obs_cache + if _body_obs_cache["device"] != device or _body_obs_cache["idx_t"] is None: + _body_obs_cache["idx_t"] = torch.tensor(G1_29DOF_BODY_JOINT_INDICES, dtype=torch.long, device=device) + _body_obs_cache["device"] = device + _body_obs_cache["batch"] = None + + idx_t = _body_obs_cache["idx_t"] + n = idx_t.numel() + + if _body_obs_cache["batch"] != batch or _body_obs_cache["idx_batch"] is None: + _body_obs_cache["idx_batch"] = idx_t.unsqueeze(0).expand(batch, n) + _body_obs_cache["pos_buf"] = torch.empty(batch, n, device=device, dtype=joint_pos.dtype) + _body_obs_cache["vel_buf"] = torch.empty(batch, n, device=device, dtype=joint_pos.dtype) + _body_obs_cache["torque_buf"] = torch.empty(batch, n, device=device, dtype=joint_pos.dtype) + _body_obs_cache["combined_buf"] = torch.empty(batch, n * 3, device=device, dtype=joint_pos.dtype) + _body_obs_cache["batch"] = batch + + idx_batch = _body_obs_cache["idx_batch"] + pos_buf = _body_obs_cache["pos_buf"] + vel_buf = _body_obs_cache["vel_buf"] + torque_buf = _body_obs_cache["torque_buf"] + combined_buf = _body_obs_cache["combined_buf"] + + torch.gather(joint_pos, 1, idx_batch, out=pos_buf) + torch.gather(joint_vel, 1, idx_batch, out=vel_buf) + torch.gather(joint_torque, 1, idx_batch, out=torque_buf) + + combined_buf[:, 0:n].copy_(pos_buf) + combined_buf[:, n : 2 * n].copy_(vel_buf) + combined_buf[:, 2 * n : 3 * n].copy_(torque_buf) + return combined_buf + + +# Observation cache: index tensors + preallocated output buffers (Dex3 hand joints) +_dex3_obs_cache = { + "device": None, + "batch": None, + "idx_t": None, + "idx_batch": None, + "pos_buf": None, +} + + +def get_robot_dex3_joint_states(env: ManagerBasedRLEnv) -> torch.Tensor: + """Return Dex3 joint positions [batch, 14].""" + joint_pos = env.scene["robot"].data.joint_pos.torch + device = joint_pos.device + batch = joint_pos.shape[0] + + global _dex3_obs_cache + if _dex3_obs_cache["device"] != device or _dex3_obs_cache["idx_t"] is None: + _dex3_obs_cache["idx_t"] = torch.tensor(G1_DEX3_JOINT_INDICES, dtype=torch.long, device=device) + _dex3_obs_cache["device"] = device + _dex3_obs_cache["batch"] = None + + idx_t = _dex3_obs_cache["idx_t"] + n = idx_t.numel() + + if _dex3_obs_cache["batch"] != batch or _dex3_obs_cache["idx_batch"] is None: + _dex3_obs_cache["idx_batch"] = idx_t.unsqueeze(0).expand(batch, n) + _dex3_obs_cache["pos_buf"] = torch.empty(batch, n, device=device, dtype=joint_pos.dtype) + _dex3_obs_cache["batch"] = batch + + idx_batch = _dex3_obs_cache["idx_batch"] + pos_buf = _dex3_obs_cache["pos_buf"] + + torch.gather(joint_pos, 1, idx_batch, out=pos_buf) + + return pos_buf diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/rewards.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/rewards.py new file mode 100644 index 00000000000..504d9caba67 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/rewards.py @@ -0,0 +1,634 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import logging +from dataclasses import dataclass, field +from typing import TYPE_CHECKING + +import torch + +from isaaclab.assets import RigidObject +from isaaclab.managers import SceneEntityCfg +from isaaclab.utils.math import quat_apply + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + +logger = logging.getLogger(__name__) + +__all__ = [ + "AssembleTrocarState", + "update_task_stage", + "lift_trocars_reward", + "trocar_tip_alignment_reward", + "trocar_insertion_reward", + "trocar_placement_reward", +] + + +@dataclass +class AssembleTrocarState: + """Namespaced task state for the assemble-trocar environment. + + Holds per-env stage tracking, reward caches, and debug bookkeeping. + Attached to the env as ``env.assemble_trocar_state`` and initialised + lazily on first access via :func:`get_assemble_trocar_state`. + + Stage semantics: + 0 - Initial (need to lift) + 1 - Lifted (need to find hole / tip alignment) + 2 - Hole found (need to insert / push in) + 3 - Inserted (need to place) + 4 - Placed (task complete) + """ + + task_stage: torch.Tensor = field(default_factory=lambda: torch.empty(0)) + # Sparse-reward previous-stage trackers (one per reward term) + prev_stage_lift: torch.Tensor = field(default_factory=lambda: torch.empty(0)) + prev_stage_tip: torch.Tensor = field(default_factory=lambda: torch.empty(0)) + prev_stage_insert: torch.Tensor = field(default_factory=lambda: torch.empty(0)) + prev_stage_place: torch.Tensor = field(default_factory=lambda: torch.empty(0)) + # Dense-reward locked caches + lift_reward_locked: torch.Tensor = field(default_factory=lambda: torch.empty(0)) + tip_reward_locked: torch.Tensor = field(default_factory=lambda: torch.empty(0)) + insertion_reward_locked: torch.Tensor = field(default_factory=lambda: torch.empty(0)) + placement_reward_locked: torch.Tensor = field(default_factory=lambda: torch.empty(0)) + # Cached tip offsets (populated on first call to get_trocar_tip_position) + tip_offset_trocar_1: torch.Tensor | None = None + tip_offset_trocar_2: torch.Tensor | None = None + # Debug throttle + last_debug_print_step: int = -1 + + +def get_assemble_trocar_state(env: ManagerBasedRLEnv) -> AssembleTrocarState: + """Get or lazily initialise the :class:`AssembleTrocarState` on *env*.""" + if not hasattr(env, "assemble_trocar_state"): + s = AssembleTrocarState( + task_stage=torch.zeros(env.num_envs, dtype=torch.long, device=env.device), + prev_stage_lift=torch.zeros(env.num_envs, dtype=torch.long, device=env.device), + prev_stage_tip=torch.zeros(env.num_envs, dtype=torch.long, device=env.device), + prev_stage_insert=torch.zeros(env.num_envs, dtype=torch.long, device=env.device), + prev_stage_place=torch.zeros(env.num_envs, dtype=torch.long, device=env.device), + lift_reward_locked=torch.zeros(env.num_envs, device=env.device), + tip_reward_locked=torch.zeros(env.num_envs, device=env.device), + insertion_reward_locked=torch.zeros(env.num_envs, device=env.device), + placement_reward_locked=torch.zeros(env.num_envs, device=env.device), + ) + env.assemble_trocar_state = s + return env.assemble_trocar_state + + +def get_task_stage(env: ManagerBasedRLEnv) -> torch.Tensor: + """Return the current per-env task stage tensor.""" + return get_assemble_trocar_state(env).task_stage + + +def should_print_debug(env: ManagerBasedRLEnv, print_interval: int = 50, print_log: bool = True) -> bool: + """Check if debug info should be logged based on episode step counter.""" + if not print_log: + return False + if not hasattr(env, "episode_length_buf"): + return False + + current_step = env.episode_length_buf[0].item() + if current_step == 0 or current_step % print_interval != 0: + return False + + state = get_assemble_trocar_state(env) + if state.last_debug_print_step == current_step: + return False + + state.last_debug_print_step = current_step + return True + + +def update_task_stage( + env: ManagerBasedRLEnv, + asset_cfg1: SceneEntityCfg, + asset_cfg2: SceneEntityCfg, + table_height: float = 0.85483, + lift_threshold: float = 0.05, + tip_align_threshold: float = 0.015, + insertion_dist_threshold: float = 0.03, + insertion_angle_threshold: float = 0.15, + placement_x_min: float = -1.8, + placement_x_max: float = -1.4, + placement_y_min: float = 1.5, + placement_y_max: float = 1.8, + placement_z_min: float = 0.9, + print_log: bool = False, +) -> torch.Tensor: + """Update task stage based on current state. + + This function checks conditions and advances stages automatically. + Once a stage is completed, it never goes back. + Returns a zero-valued tensor (num_envs,) so it can be used as a + weight=0 reward term to run before the actual reward terms. + """ + state = get_assemble_trocar_state(env) + stage = state.task_stage + + obj1: RigidObject = env.scene[asset_cfg1.name] + obj2: RigidObject = env.scene[asset_cfg2.name] + + pos1 = obj1.data.root_pos_w.torch + pos2 = obj2.data.root_pos_w.torch + quat1 = obj1.data.root_quat_w.torch + quat2 = obj2.data.root_quat_w.torch + # Store old stage to detect changes (BEFORE any stage transitions) + old_stage = stage.clone() + + # Stage 0 -> 1: Check if lifted + target_z = table_height + lift_threshold + is_lifted_1 = pos1[:, 2] > target_z + is_lifted_2 = pos2[:, 2] > target_z + both_lifted = is_lifted_1 & is_lifted_2 + stage = torch.where((stage == 0) & both_lifted, torch.ones_like(stage), stage) + + # Stage 1 -> 2: Check if tips are aligned (hole found) + # Get tip positions + tip_pos1 = get_trocar_tip_position(env, asset_cfg1) + tip_pos2 = get_trocar_tip_position(env, asset_cfg2) + tip_dist = torch.norm(tip_pos1 - tip_pos2, dim=-1) + + # Tip alignment success + tip_aligned = tip_dist < tip_align_threshold + stage = torch.where((stage == 1) & tip_aligned, torch.full_like(stage, 2), stage) + + # Stage 2 -> 3: Check if inserted (parallel + center close) + # Get center distance + center_dist = torch.norm(pos1 - pos2, dim=-1) + + # Check alignment + target_axis1 = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1) + target_axis2 = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1) + axis1 = quat_apply(quat1, target_axis1) + axis2 = quat_apply(quat2, target_axis2) + dot_prod = torch.sum(axis1 * axis2, dim=-1) + abs_dot = torch.clamp(torch.abs(dot_prod), max=1.0) + angle = torch.acos(abs_dot) + + # Insertion success: parallel + center close + is_parallel = angle < insertion_angle_threshold + center_close = center_dist < insertion_dist_threshold + is_inserted = is_parallel & center_close + + stage = torch.where((stage == 2) & is_inserted, torch.full_like(stage, 3), stage) + + # Stage 3 -> 4: Check if placed in target zone + # Get environment origins to handle multi-env spatial offsets + env_origins = env.scene.env_origins # shape: (num_envs, 3) + + # Adjust target zone relative to each environment's origin + curr_x_min = env_origins[:, 0] + min(placement_x_min, placement_x_max) # (num_envs,) + curr_x_max = env_origins[:, 0] + max(placement_x_min, placement_x_max) + curr_y_min = env_origins[:, 1] + min(placement_y_min, placement_y_max) + curr_y_max = env_origins[:, 1] + max(placement_y_min, placement_y_max) + + in_zone_1 = ( + (pos1[:, 0] >= curr_x_min) + & (pos1[:, 0] <= curr_x_max) + & (pos1[:, 1] >= curr_y_min) + & (pos1[:, 1] <= curr_y_max) + & (pos1[:, 2] < placement_z_min) + ) + in_zone_2 = ( + (pos2[:, 0] >= curr_x_min) + & (pos2[:, 0] <= curr_x_max) + & (pos2[:, 1] >= curr_y_min) + & (pos2[:, 1] <= curr_y_max) + & (pos2[:, 2] < placement_z_min) + ) + both_in_zone = in_zone_1 & in_zone_2 + stage = torch.where((stage == 3) & both_in_zone, torch.full_like(stage, 4), stage) + + # Print stage transitions (AFTER all stage transitions - always print when stage changes) + if print_log and (stage != old_stage).any(): + for env_id in range(env.num_envs): + if stage[env_id] != old_stage[env_id]: + logger.debug("Env %d: Stage %d → %d", env_id, old_stage[env_id].item(), stage[env_id].item()) + + state.task_stage = stage + return torch.zeros(env.num_envs, device=env.device) + + +def lift_trocars_reward( + env: ManagerBasedRLEnv, + table_height: float = 0.85483, + lift_threshold: float = 0.05, + asset_cfg1: SceneEntityCfg = SceneEntityCfg("trocar_1"), + asset_cfg2: SceneEntityCfg = SceneEntityCfg("trocar_2"), + use_sparse_reward: bool = True, + print_log: bool = False, +) -> torch.Tensor: + """Reward for lifting both trocars above the table. + + Only active in Stage 0. Once completed, this reward is locked at the achieved value. + + Args: + use_sparse_reward: If True, only give reward (1.0) when stage transitions from 0->1. + If False, give continuous reward based on current state. + print_log: If True, log debug information. + """ + s = get_assemble_trocar_state(env) + stage = s.task_stage + + obj1: RigidObject = env.scene[asset_cfg1.name] + obj2: RigidObject = env.scene[asset_cfg2.name] + + pos1 = obj1.data.root_pos_w.torch + pos2 = obj2.data.root_pos_w.torch + target_z = table_height + lift_threshold + + is_lifted_1 = pos1[:, 2] > target_z + is_lifted_2 = pos2[:, 2] > target_z + both_lifted = is_lifted_1 & is_lifted_2 + + if use_sparse_reward: + stage_just_completed = (s.prev_stage_lift == 0) & (stage >= 1) + reward = torch.where( + stage_just_completed, + torch.ones(env.num_envs, device=env.device) / env.step_dt, + torch.zeros(env.num_envs, device=env.device), + ) + s.prev_stage_lift = stage.clone() + else: + current_reward = both_lifted.float() + s.lift_reward_locked = torch.where( + (stage >= 1) & (s.lift_reward_locked == 0), + current_reward, + s.lift_reward_locked, + ) + reward = torch.where(stage == 0, current_reward, s.lift_reward_locked) + + if should_print_debug(env, print_log=print_log): + mode_str = "Sparse" if use_sparse_reward else "Dense" + logger.debug( + " Stage: %d | Lift (%s): %.2f | z1: %.3f | z2: %.3f", + stage[0].item(), + mode_str, + reward[0].item(), + pos1[0, 2], + pos2[0, 2], + ) + + return reward + + +def get_trocar_tip_position( + env: ManagerBasedRLEnv, + asset_cfg: SceneEntityCfg = SceneEntityCfg("trocar_1"), +) -> torch.Tensor: + """Get trocar tip position (White_pos or Red_pos) in world coordinates. + + Calculates tip world position using trocar root's dynamic position and rotation, + plus the tip's relative offset. + + Args: + env: Environment instance + asset_cfg: Trocar asset configuration (trocar_1 or trocar_2) + + Returns: + torch.Tensor: Shape (num_envs, 3) - Position in world coordinates + """ + from pxr import Gf, Usd, UsdGeom + + import isaaclab.utils.math as math_utils + + # Cache the tip offset to avoid recalculating every step. + # The local offset from root to tip is a static geometric property of the USD + # asset and is identical across all replicated envs. We read it once from env_0's + # USD prim, then apply it per-env at runtime using each env's dynamic root pose. + s = get_assemble_trocar_state(env) + cache_attr = f"tip_offset_{asset_cfg.name}" + tip_offset_local = getattr(s, cache_attr, None) + + if tip_offset_local is None: + usd_stage = env.scene.stage + + if asset_cfg.name == "trocar_1": + tip_path = "/World/envs/env_0/trocar_1/Trocar002/White_pos" + root_path = "/World/envs/env_0/trocar_1" + elif asset_cfg.name == "trocar_2": + tip_path = "/World/envs/env_0/trocar_2/DisposableLaparoscopicPunctureDevice001/Red_pos" + root_path = "/World/envs/env_0/trocar_2" + else: + raise ValueError(f"Invalid asset configuration: {asset_cfg.name}") + + tip_prim = usd_stage.GetPrimAtPath(tip_path) + root_prim = usd_stage.GetPrimAtPath(root_path) + + if not tip_prim.IsValid(): + logger.warning("Tip prim not found at %s, using zero offset", tip_path) + tip_offset_local = torch.zeros(3, dtype=torch.float32, device=env.device) + else: + tip_xform = UsdGeom.Xformable(tip_prim) + root_xform = UsdGeom.Xformable(root_prim) + + tip_world_transform = tip_xform.ComputeLocalToWorldTransform(Usd.TimeCode.Default()) + root_world_transform = root_xform.ComputeLocalToWorldTransform(Usd.TimeCode.Default()) + + tip_world_pos = tip_world_transform.ExtractTranslation() + root_world_pos = root_world_transform.ExtractTranslation() + + root_rotation_mat = root_world_transform.ExtractRotationMatrix() + root_rotation_quat = root_rotation_mat.ExtractRotation().GetQuat() + + tip_offset_world = Gf.Vec3d( + tip_world_pos[0] - root_world_pos[0], + tip_world_pos[1] - root_world_pos[1], + tip_world_pos[2] - root_world_pos[2], + ) + + root_quat_inv = root_rotation_quat.GetInverse() + tip_offset_local_gf = root_quat_inv.Transform(tip_offset_world) + + tip_offset_local = torch.tensor( + [tip_offset_local_gf[0], tip_offset_local_gf[1], tip_offset_local_gf[2]], + dtype=torch.float32, + device=env.device, + ) + + logger.debug("Cached tip offset for %s: %s", asset_cfg.name, tip_offset_local) + + setattr(s, cache_attr, tip_offset_local) + + obj: RigidObject = env.scene[asset_cfg.name] + root_pos_w = obj.data.root_pos_w.torch # Shape: (num_envs, 3) + root_quat_w = obj.data.root_quat_w.torch # Shape: (num_envs, 4) XYZW + + tip_offset_local_batch = tip_offset_local.unsqueeze(0).repeat(env.num_envs, 1) + + tip_offset_world = math_utils.quat_apply(root_quat_w, tip_offset_local_batch) + tip_pos_world = root_pos_w + tip_offset_world + + return tip_pos_world # Shape: (num_envs, 3) + + +def trocar_tip_alignment_reward( + env: ManagerBasedRLEnv, + tip_dist_std: float = 0.02, # Std for tip distance reward + asset_cfg1: SceneEntityCfg = SceneEntityCfg("trocar_1"), + asset_cfg2: SceneEntityCfg = SceneEntityCfg("trocar_2"), + use_sparse_reward: bool = True, + print_log: bool = False, +) -> torch.Tensor: + """Reward for aligning trocar tips (Stage 1: Finding the hole). + + Reward based on tip distance - encourages bringing tips close together. + + Only active in Stage 1. Once completed (stage >= 2), this reward is locked at the achieved value. + + Args: + env: Environment instance + tip_dist_std: Standard deviation for tip distance reward shaping + asset_cfg1: Configuration for trocar 1 + asset_cfg2: Configuration for trocar 2 + use_sparse_reward: If True, only give reward (1.0) when stage >= 2. + If False, give continuous reward based on tip distance. + print_log: If True, print debug information. + + Returns: + torch.Tensor: Reward tensor (num_envs,) + """ + s = get_assemble_trocar_state(env) + stage = s.task_stage + + tip_pos1 = get_trocar_tip_position(env, asset_cfg1) + tip_pos2 = get_trocar_tip_position(env, asset_cfg2) + tip_dist = torch.norm(tip_pos1 - tip_pos2, dim=-1) + + if use_sparse_reward: + stage_just_completed = (s.prev_stage_tip == 1) & (stage >= 2) + reward = torch.where( + stage_just_completed, + torch.ones(env.num_envs, device=env.device) / env.step_dt, + torch.zeros(env.num_envs, device=env.device), + ) + s.prev_stage_tip = stage.clone() + else: + tip_reward = torch.exp(-torch.square(tip_dist) / (2 * tip_dist_std**2)) + s.tip_reward_locked = torch.where( + (stage >= 2) & (s.tip_reward_locked == 0), + tip_reward, + s.tip_reward_locked, + ) + reward = torch.where( + stage < 1, + torch.zeros(env.num_envs, device=env.device), + torch.where(stage == 1, tip_reward, s.tip_reward_locked), + ) + + # Debug info + if should_print_debug(env, print_log=print_log) and stage[0].item() == 1: + mode_str = "Sparse" if use_sparse_reward else "Dense" + logger.debug( + " Stage 1 (Find Hole, %s): tip_pos_1=(%.3f, %.3f, %.3f)" + " | tip_pos_2=(%.3f, %.3f, %.3f) | tip_d=%.4f | reward=%.3f", + mode_str, + tip_pos1[0, 0], + tip_pos1[0, 1], + tip_pos1[0, 2], + tip_pos2[0, 0], + tip_pos2[0, 1], + tip_pos2[0, 2], + tip_dist[0].item(), + reward[0].item(), + ) + + return reward + + +def trocar_insertion_reward( + env: ManagerBasedRLEnv, + angle_std: float = 0.2, # Std for angle alignment reward + angle_threshold: float = 0.15, # Tolerance for parallelism (radians) + center_dist_std: float = 0.05, # Std for center distance reward + asset_cfg1: SceneEntityCfg = SceneEntityCfg("trocar_1"), + asset_cfg2: SceneEntityCfg = SceneEntityCfg("trocar_2"), + use_sparse_reward: bool = True, + print_log: bool = False, +) -> torch.Tensor: + """Reward for inserting trocar_2 into trocar_1 (Stage 2: Pushing in). + + Reward based on: + 1. Orientation alignment (parallelism) + 2. Center distance (pushing in) + + Only active in Stage 2. Once completed (stage >= 3), this reward is locked at the achieved value. + + Args: + env: Environment instance + angle_std: Standard deviation for angle reward shaping + angle_threshold: Angle threshold for parallelism (radians) + center_dist_std: Standard deviation for center distance reward shaping + asset_cfg1: Configuration for trocar 1 + asset_cfg2: Configuration for trocar 2 + use_sparse_reward: If True, only give reward (1.0) when stage >= 3. + If False (default), give continuous reward based on alignment and distance. + print_log: If True, print debug information. + Returns: + torch.Tensor: Reward tensor (num_envs,) + """ + s = get_assemble_trocar_state(env) + stage = s.task_stage + + obj1: RigidObject = env.scene[asset_cfg1.name] + obj2: RigidObject = env.scene[asset_cfg2.name] + + pos1 = obj1.data.root_pos_w.torch + quat1 = obj1.data.root_quat_w.torch + pos2 = obj2.data.root_pos_w.torch + quat2 = obj2.data.root_quat_w.torch + center_dist = torch.norm(pos1 - pos2, dim=-1) + + target_axis1 = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1) + target_axis2 = torch.tensor([0.0, 0.0, -1.0], device=env.device).repeat(env.num_envs, 1) + + axis1 = quat_apply(quat1, target_axis1) + axis2 = quat_apply(quat2, target_axis2) + + dot_prod = torch.sum(axis1 * axis2, dim=-1) + abs_dot = torch.clamp(torch.abs(dot_prod), max=1.0) + angle = torch.acos(abs_dot) + is_parallel = angle < angle_threshold + + if use_sparse_reward: + stage_just_completed = (s.prev_stage_insert == 2) & (stage >= 3) + reward = torch.where( + stage_just_completed, + torch.ones(env.num_envs, device=env.device) / env.step_dt, + torch.zeros(env.num_envs, device=env.device), + ) + s.prev_stage_insert = stage.clone() + else: + excess_angle = torch.clamp(angle - angle_threshold, min=0.0) + align_reward = torch.exp(-torch.square(excess_angle) / (2 * angle_std**2)) + center_reward = torch.exp(-torch.square(center_dist) / (2 * center_dist_std**2)) + center_reward = torch.where(is_parallel, center_reward, torch.zeros_like(center_reward)) + insertion_reward = align_reward * center_reward + + s.insertion_reward_locked = torch.where( + (stage >= 3) & (s.insertion_reward_locked == 0), + insertion_reward, + s.insertion_reward_locked, + ) + reward = torch.where( + stage < 2, + torch.zeros(env.num_envs, device=env.device), + torch.where(stage == 2, insertion_reward, s.insertion_reward_locked), + ) + + # Debug info + if should_print_debug(env, print_log=print_log) and stage[0].item() == 2: + mode_str = "Sparse" if use_sparse_reward else "Dense" + logger.debug( + " Stage 2 (Push In, %s): angle=%.3f | center_d=%.4f | is_parallel=%s | reward=%.3f", + mode_str, + angle[0].item(), + center_dist[0].item(), + is_parallel[0].item(), + reward[0].item(), + ) + + return reward + + +def trocar_placement_reward( + env: ManagerBasedRLEnv, + x_min: float = -1.8, + x_max: float = -1.4, + y_min: float = 1.5, + y_max: float = 1.8, + z_min: float = 0.9, + asset_cfg1: SceneEntityCfg = SceneEntityCfg("trocar_1"), + asset_cfg2: SceneEntityCfg = SceneEntityCfg("trocar_2"), + use_sparse_reward: bool = True, + print_log: bool = False, +) -> torch.Tensor: + """Reward for placing both trocars in the target tray region (Stage 3). + + Only active in Stage 3. Once completed (stage >= 4), this reward is locked at the achieved value. + + Args: + env: Environment instance + x_min, x_max: X bounds of target zone (relative to env origin) + y_min, y_max: Y bounds of target zone (relative to env origin) + z_min: Z threshold (below this is considered placed) + asset_cfg1: Configuration for trocar 1 + asset_cfg2: Configuration for trocar 2 + use_sparse_reward: If True, only give reward (1.0) when stage >= 4. + If False (default), give continuous reward based on placement status. + print_log: If True, print debug information. + + Returns: + torch.Tensor: Reward tensor (num_envs,) + """ + s = get_assemble_trocar_state(env) + stage = s.task_stage + + obj1: RigidObject = env.scene[asset_cfg1.name] + obj2: RigidObject = env.scene[asset_cfg2.name] + + pos1 = obj1.data.root_pos_w.torch + pos2 = obj2.data.root_pos_w.torch + env_origins = env.scene.env_origins + + curr_x_min = env_origins[:, 0] + min(x_min, x_max) + curr_x_max = env_origins[:, 0] + max(x_min, x_max) + curr_y_min = env_origins[:, 1] + min(y_min, y_max) + curr_y_max = env_origins[:, 1] + max(y_min, y_max) + + in_zone_1 = ( + (pos1[:, 0] >= curr_x_min) + & (pos1[:, 0] <= curr_x_max) + & (pos1[:, 1] >= curr_y_min) + & (pos1[:, 1] <= curr_y_max) + & (pos1[:, 2] < z_min) + ) + in_zone_2 = ( + (pos2[:, 0] >= curr_x_min) + & (pos2[:, 0] <= curr_x_max) + & (pos2[:, 1] >= curr_y_min) + & (pos2[:, 1] <= curr_y_max) + & (pos2[:, 2] < z_min) + ) + both_in_zone = in_zone_1 & in_zone_2 + + if use_sparse_reward: + stage_just_completed = (s.prev_stage_place == 3) & (stage >= 4) + reward = torch.where( + stage_just_completed, + torch.ones(env.num_envs, device=env.device) / env.step_dt, + torch.zeros(env.num_envs, device=env.device), + ) + s.prev_stage_place = stage.clone() + else: + placement_reward = both_in_zone.float() + s.placement_reward_locked = torch.where( + (stage >= 4) & (s.placement_reward_locked == 0), + placement_reward, + s.placement_reward_locked, + ) + reward = torch.where( + stage < 3, + torch.zeros(env.num_envs, device=env.device), + torch.where(stage == 3, placement_reward, s.placement_reward_locked), + ) + + # Debug info + if should_print_debug(env, print_log=print_log) and stage[0].item() == 3: + mode_str = "Sparse" if use_sparse_reward else "Dense" + logger.debug( + " Stage 3 (Placement, %s): in_zone=%s | z1=%.3f | z2=%.3f", + mode_str, + both_in_zone[0].item(), + pos1[0, 2], + pos2[0, 2], + ) + + return reward diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/terminations.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/terminations.py new file mode 100644 index 00000000000..12b70ae473b --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/assemble_trocar/mdp/terminations.py @@ -0,0 +1,80 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING + +import torch + +from isaaclab.assets import RigidObject +from isaaclab.managers import SceneEntityCfg + +from .rewards import get_task_stage + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + +logger = logging.getLogger(__name__) + + +def object_drop_termination( + env: ManagerBasedRLEnv, + drop_height_threshold: float = 0.5, + asset_cfg1: SceneEntityCfg = SceneEntityCfg("trocar_1"), + asset_cfg2: SceneEntityCfg = SceneEntityCfg("trocar_2"), + print_log: bool = False, +) -> torch.Tensor: + """Termination function that triggers when objects drop below threshold. + + This can be used as an alternative to auto-reset, marking the episode as terminated + so the training framework handles the reset. + + Args: + env: The environment instance + drop_height_threshold: Height below which objects are considered dropped + asset_cfg1: Configuration for first trocar + asset_cfg2: Configuration for second trocar + print_log: If True, print debug information. + Returns: + Boolean tensor indicating which environments should terminate due to drops + """ + # Get rigid objects + obj1: RigidObject = env.scene[asset_cfg1.name] + obj2: RigidObject = env.scene[asset_cfg2.name] + + # Get positions + pos1 = obj1.data.root_pos_w.torch + pos2 = obj2.data.root_pos_w.torch + # Check if either object has dropped + dropped_1 = pos1[:, 2] < drop_height_threshold + dropped_2 = pos2[:, 2] < drop_height_threshold + + dropped = dropped_1 | dropped_2 + + if print_log and dropped.any(): + logger.debug("Drop termination triggered for %d environment(s)", dropped.sum().item()) + + return dropped + + +def task_success_termination( + env: ManagerBasedRLEnv, + success_stage: int = 4, + print_log: bool = False, +) -> torch.Tensor: + """Termination condition: task is complete when stage reaches 4. + + Returns: + torch.Tensor: Boolean tensor indicating which environments should terminate (num_envs,) + """ + stage = get_task_stage(env) + task_complete = stage >= success_stage + + if print_log and task_complete.any(): + logger.info("Task completed in %d environment(s)!", task_complete.sum().item()) + + return task_complete