H1 RewardsCfg 설정
Reward Function
Reward Configuration Code
@configclass
class H1Rewards(RewardsCfg):
"""Reward terms for the MDP."""
termination_penalty = RewTerm(func=mdp.is_terminated, weight=-200.0)
lin_vel_z_l2 = None
track_lin_vel_xy_exp = RewTerm(
func=mdp.track_lin_vel_xy_yaw_frame_exp,
weight=1.0,
params={"command_name": "base_velocity", "std": 0.5},
)
track_ang_vel_z_exp = RewTerm(
func=mdp.track_ang_vel_z_world_exp, weight=1.0, params={"command_name": "base_velocity", "std": 0.5}
)
feet_air_time = RewTerm(
func=mdp.feet_air_time_positive_biped,
weight=0.25,
params={
"command_name": "base_velocity",
"sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*ankle_link"),
"threshold": 0.4,
},
)
feet_slide = RewTerm(
func=mdp.feet_slide,
weight=-0.25,
params={
"sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*ankle_link"),
"asset_cfg": SceneEntityCfg("robot", body_names=".*ankle_link"),
},
)
# Penalize ankle joint limits
dof_pos_limits = RewTerm(
func=mdp.joint_pos_limits, weight=-1.0, params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*_ankle")}
)
# Penalize deviation from default of the joints that are not essential for locomotion
joint_deviation_hip = RewTerm(
func=mdp.joint_deviation_l1,
weight=-0.2,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*_hip_yaw", ".*_hip_roll"])},
)
joint_deviation_arms = RewTerm(
func=mdp.joint_deviation_l1,
weight=-0.2,
params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*_shoulder_.*", ".*_elbow"])},
)
joint_deviation_torso = RewTerm(
func=mdp.joint_deviation_l1, weight=-0.1, params={"asset_cfg": SceneEntityCfg("robot", joint_names="torso")}
Reward Function Code
def feet_air_time(
env: ManagerBasedRLEnv, command_name: str, sensor_cfg: SceneEntityCfg, threshold: float
) -> torch.Tensor:
"""Reward long steps taken by the feet using L2-kernel.
This function rewards the agent for taking steps that are longer than a threshold. This helps ensure
that the robot lifts its feet off the ground and takes steps. The reward is computed as the sum of
the time for which the feet are in the air.
If the commands are small (i.e. the agent is not supposed to take a step), then the reward is zero.
"""
# extract the used quantities (to enable type-hinting)
contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
# compute the reward
first_contact = contact_sensor.compute_first_contact(env.step_dt)[:, sensor_cfg.body_ids]
last_air_time = contact_sensor.data.last_air_time[:, sensor_cfg.body_ids]
reward = torch.sum((last_air_time - threshold) * first_contact, dim=1)
# no reward for zero command
reward *= torch.norm(env.command_manager.get_command(command_name)[:, :2], dim=1) > 0.1
return reward
def feet_air_time_positive_biped(env, command_name: str, threshold: float, sensor_cfg: SceneEntityCfg) -> torch.Tensor:
"""Reward long steps taken by the feet for bipeds.
This function rewards the agent for taking steps up to a specified threshold and also keep one foot at
a time in the air.
If the commands are small (i.e. the agent is not supposed to take a step), then the reward is zero.
"""
contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
# compute the reward
air_time = contact_sensor.data.current_air_time[:, sensor_cfg.body_ids]
contact_time = contact_sensor.data.current_contact_time[:, sensor_cfg.body_ids]
in_contact = contact_time > 0.0
in_mode_time = torch.where(in_contact, contact_time, air_time)
single_stance = torch.sum(in_contact.int(), dim=1) == 1
reward = torch.min(torch.where(single_stance.unsqueeze(-1), in_mode_time, 0.0), dim=1)[0]
reward = torch.clamp(reward, max=threshold)
# no reward for zero command
reward *= torch.norm(env.command_manager.get_command(command_name)[:, :2], dim=1) > 0.1
return reward
def feet_slide(env, sensor_cfg: SceneEntityCfg, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
"""Penalize feet sliding.
This function penalizes the agent for sliding its feet on the ground. The reward is computed as the
norm of the linear velocity of the feet multiplied by a binary contact sensor. This ensures that the
agent is penalized only when the feet are in contact with the ground.
"""
# Penalize feet sliding
contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
contacts = contact_sensor.data.net_forces_w_history[:, :, sensor_cfg.body_ids, :].norm(dim=-1).max(dim=1)[0] > 1.0
asset = env.scene[asset_cfg.name]
body_vel = asset.data.body_lin_vel_w[:, asset_cfg.body_ids, :2]
reward = torch.sum(body_vel.norm(dim=-1) * contacts, dim=1)
return reward
def track_lin_vel_xy_yaw_frame_exp(
env, std: float, command_name: str, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")
) -> torch.Tensor:
"""Reward tracking of linear velocity commands (xy axes) in the gravity aligned robot frame using exponential kernel."""
# extract the used quantities (to enable type-hinting)
asset = env.scene[asset_cfg.name]
vel_yaw = quat_apply_inverse(yaw_quat(asset.data.root_quat_w), asset.data.root_lin_vel_w[:, :3])
lin_vel_error = torch.sum(
torch.square(env.command_manager.get_command(command_name)[:, :2] - vel_yaw[:, :2]), dim=1
)
return torch.exp(-lin_vel_error / std**2)
def track_ang_vel_z_world_exp(
env, command_name: str, std: float, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")
) -> torch.Tensor:
"""Reward tracking of angular velocity commands (yaw) in world frame using exponential kernel."""
# extract the used quantities (to enable type-hinting)
asset = env.scene[asset_cfg.name]
ang_vel_error = torch.square(env.command_manager.get_command(command_name)[:, 2] - asset.data.root_ang_vel_w[:, 2])
return torch.exp(-ang_vel_error / std**2)
def stand_still_joint_deviation_l1(
env, command_name: str, command_threshold: float = 0.06, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")
) -> torch.Tensor:
"""Penalize offsets from the default joint positions when the command is very small."""
command = env.command_manager.get_command(command_name)
# Penalize motion when command is nearly zero.
return mdp.joint_deviation_l1(env, asset_cfg) * (torch.norm(command[:, :2], dim=1) < command_threshold)