diff --git a/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py b/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py index 4433b824235..275ab1ca5ac 100644 --- a/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py +++ b/source/isaaclab_assets/isaaclab_assets/robots/universal_robots.py @@ -113,7 +113,7 @@ "shoulder_lift_joint": -1.5707, "elbow_joint": 1.5707, "wrist_1_joint": -1.5707, - "wrist_2_joint": 1.5707, + "wrist_2_joint": -1.5707, "wrist_3_joint": 0.0, } diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/__init__.py new file mode 100644 index 00000000000..6c07c69c16e --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/__init__.py @@ -0,0 +1,42 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +import gymnasium as gym +import os + +from . import agents + +## +# Register Gym environments. +## + +## +# Joint Position Control +## + +gym.register( + id="Isaac-Lift-Cube-UR10SG-Short-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + kwargs={ + "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10SGCubeLiftEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10LiftCubePPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml", + }, + disable_env_checker=True, +) + +gym.register( + id="Isaac-Lift-Cube-UR10SG-Long-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + kwargs={ + "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10SGLongCubeLiftEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10LiftCubePPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml", + }, + disable_env_checker=True, +) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/__init__.py new file mode 100644 index 00000000000..2e924fbf1b1 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rl_games_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rl_games_ppo_cfg.yaml new file mode 100644 index 00000000000..339905b645d --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rl_games_ppo_cfg.yaml @@ -0,0 +1,84 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +params: + seed: 42 + + # environment wrapper clipping + env: + clip_observations: 100.0 + clip_actions: 100.0 + + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + network: + name: actor_critic + separate: False + space: + continuous: + mu_activation: None + sigma_activation: None + + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: True + mlp: + units: [256, 128, 64] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None + + load_checkpoint: False # flag which sets whether to load the checkpoint + load_path: '' # path to the checkpoint to load + + config: + name: UR10SG_lift + env_name: rlgpu + device: 'cuda:0' + device_name: 'cuda:0' + multi_gpu: False + ppo: True + mixed_precision: False + normalize_input: True + normalize_value: True + value_bootstrap: False + num_actors: -1 + reward_shaper: + scale_value: 0.01 + normalize_advantage: True + gamma: 0.99 + tau: 0.95 + learning_rate: 1e-4 + lr_schedule: adaptive + schedule_type: legacy + kl_threshold: 0.01 + score_to_win: 100000000 + max_epochs: 1500 + save_best_after: 100 + save_frequency: 50 + print_stats: True + grad_norm: 1.0 + entropy_coef: 0.001 + truncate_grads: True + e_clip: 0.2 + horizon_length: 24 + minibatch_size: 24576 + mini_epochs: 8 + critic_coef: 4 + clip_value: True + clip_actions: False + seq_len: 4 + bounds_loss_coef: 0.0001 diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rsl_rl_ppo_cfg.py new file mode 100644 index 00000000000..f66727c1364 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/rsl_rl_ppo_cfg.py @@ -0,0 +1,38 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class UR10LiftCubePPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 24 + max_iterations = 1500 + save_interval = 50 + experiment_name = "franka_lift" + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, + actor_hidden_dims=[256, 128, 64], + critic_hidden_dims=[256, 128, 64], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.006, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-4, + schedule="adaptive", + gamma=0.98, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/sb3_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/sb3_ppo_cfg.yaml new file mode 100644 index 00000000000..91ae4f0d9f0 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/sb3_ppo_cfg.yaml @@ -0,0 +1,34 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +# Reference: https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo.yml#L32 +seed: 42 + +# epoch * n_steps * nenvs: 500×512*8*8 +n_timesteps: 16384000 +policy: 'MlpPolicy' +n_steps: 64 +# mini batch size: num_envs * nsteps / nminibatches 2048×512÷2048 +batch_size: 192 +gae_lambda: 0.95 +gamma: 0.99 +n_epochs: 8 +ent_coef: 0.00 +vf_coef: 0.0001 +learning_rate: !!float 3e-4 +clip_range: 0.2 +policy_kwargs: + activation_fn: 'nn.ELU' + net_arch: + pi: [256, 128, 64] + vf: [256, 128, 64] +target_kl: 0.01 +max_grad_norm: 1.0 + +# # Uses VecNormalize class to normalize obs +# normalize_input: True +# # Uses VecNormalize class to normalize rew +# normalize_value: True +# clip_obs: 5 diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/skrl_ppo_cfg.yaml new file mode 100644 index 00000000000..1a7216cb6b4 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/agents/skrl_ppo_cfg.yaml @@ -0,0 +1,85 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: False + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: OBSERVATIONS + layers: [256, 128, 64] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [256, 128, 64] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html +agent: + class: PPO + rollouts: 24 + learning_epochs: 8 + mini_batches: 4 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 1.0e-04 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.01 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.001 + value_loss_scale: 2.0 + kl_threshold: 0.0 + rewards_shaper_scale: 0.01 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "ur10sg_lift" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 36000 + environment_info: log diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/joint_pos_env_cfg.py new file mode 100644 index 00000000000..2d71ac74b15 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/ur10_SG/joint_pos_env_cfg.py @@ -0,0 +1,125 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab_assets.robots.universal_robots import UR10_LONG_SUCTION_CFG, UR10_SHORT_SUCTION_CFG + +from isaaclab.assets import RigidObjectCfg, SurfaceGripperCfg +from isaaclab.sensors import FrameTransformerCfg +from isaaclab.sensors.frame_transformer.frame_transformer_cfg import OffsetCfg +from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg +from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg +from isaaclab.utils import configclass +from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR + +from isaaclab_tasks.manager_based.manipulation.lift import mdp +from isaaclab_tasks.manager_based.manipulation.lift.lift_env_cfg import LiftEnvCfg + +## +# Pre-defined configs +## +from isaaclab.markers.config import FRAME_MARKER_CFG # isort: skip + + +@configclass +class UR10SGCubeLiftEnvCfg(LiftEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + + # Set UR10 as robot + self.scene.robot = UR10_SHORT_SUCTION_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + + # define the surface gripper + self.scene.surface_gripper = SurfaceGripperCfg( + prim_path="{ENV_REGEX_NS}/Robot/ee_link/SurfaceGripper", + max_grip_distance=0.05, + shear_force_limit=5000.0, + coaxial_force_limit=5000.0, + retry_interval=0.05, + ) + + # Set actions for the specific robot type (UR10) + self.actions.arm_action = mdp.JointPositionActionCfg( + asset_name="robot", joint_names=[".*_joint"], scale=0.5, use_default_offset=True + ) + self.actions.gripper_action = mdp.SurfaceGripperBinaryActionCfg( + asset_name="surface_gripper", + open_command=-1.0, + close_command=1.0, + ) + # Set the body name for the end effector + self.commands.object_pose.body_name = "ee_link" + + # Set Cube as object + self.scene.object = RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/Object", + init_state=RigidObjectCfg.InitialStateCfg(pos=[0.5, 0, 0.055], rot=[1, 0, 0, 0]), + spawn=UsdFileCfg( + usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Blocks/DexCube/dex_cube_instanceable.usd", + scale=(0.8, 0.8, 0.8), + rigid_props=RigidBodyPropertiesCfg( + solver_position_iteration_count=16, + solver_velocity_iteration_count=1, + max_angular_velocity=1000.0, + max_linear_velocity=1000.0, + max_depenetration_velocity=5.0, + disable_gravity=False, + ), + ), + ) + + # Listens to the required transforms + marker_cfg = FRAME_MARKER_CFG.copy() + marker_cfg.markers["frame"].scale = (0.1, 0.1, 0.1) + marker_cfg.prim_path = "/Visuals/FrameTransformer" + + self.scene.ee_frame = FrameTransformerCfg( + prim_path="{ENV_REGEX_NS}/Robot/base_link", + debug_vis=True, + visualizer_cfg=marker_cfg, + target_frames=[ + FrameTransformerCfg.FrameCfg( + prim_path="{ENV_REGEX_NS}/Robot/ee_link", + name="end_effector", + offset=OffsetCfg( + pos=[0.1585, 0.0, 0.0], + ), + ), + ], + ) + + +@configclass +class UR10SGCubeLiftEnvCfg_PLAY(UR10SGCubeLiftEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + # make a smaller scene for play + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + # disable randomization for play + self.observations.policy.enable_corruption = False + + +@configclass +class UR10SGLongCubeLiftEnvCfg(UR10SGCubeLiftEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + + # Set UR10 as robot + self.scene.robot = UR10_LONG_SUCTION_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + + +@configclass +class UR10SGLongCubeLiftEnvCfg_PLAY(UR10SGLongCubeLiftEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + # make a smaller scene for play + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + # disable randomization for play + self.observations.policy.enable_corruption = False diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/lift_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/lift_env_cfg.py index 3a4f458854d..0d0b6a735ec 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/lift_env_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/lift_env_cfg.py @@ -89,7 +89,7 @@ class ActionsCfg: # will be set by agent env cfg arm_action: mdp.JointPositionActionCfg | mdp.DifferentialInverseKinematicsActionCfg = MISSING - gripper_action: mdp.BinaryJointPositionActionCfg = MISSING + gripper_action: mdp.BinaryJointPositionActionCfg | mdp.SurfaceGripperBinaryActionCfg = MISSING @configclass