However, I get one strange bug (only happen for pick_and_place but not for reach, push and slide), so I feel the bug is with the gripper: I set the control_type as "end_effector". Then I create the env and use env.step(ee_pos) where the ee_pos is exactly the current pose, but the robot doesn't move at all. Then I found that the set_ee_pose() function used return False and the message say "ee pose could not be set". Since the pose I use is exactly the current pose, so I cannot understand why the moveit cannot plan a path to reach it. I guess some collision might happen within the robot?
#!/usr/bin/env python3
"""A small example script that shows how to use the `ros_gazebo_gym`_ package to train a
(SAC) agent to solve a `Panda`_ environment using `Stable Baselines3`_.
.. _ros_gazebo_gym: https://github.com/rickstaa/ros-gazebo-gym
"""
import os
import re
import time
import gymnasium as gym
import numpy as np
import rospkg
import rospy
import torch
from ros_gazebo_gym.common.helpers import (
list_2_human_text,
to_pascal_case,
to_snake_case,
)
from ros_gazebo_gym.core.helpers import ros_exit_gracefully
from ros_gazebo_gym.task_envs.task_envs_list import ENVS
from stable_baselines3 import SAC
import pickle
if __name__ == "__main__": # noqa: C901
rospy.init_node(
"playground",
anonymous=True,
)
# Retrieve input arguments.
try:
control_type = rospy.get_param("~control_type")
except KeyError:
control_type = "effort"
try:
env_type = rospy.get_param("~environment_type")
print("Env Type: ", env_type)
except KeyError:
env_type = "slide"
try:
print(ENVS.keys())
print(f"Panda{to_pascal_case(env_type)}")
env_id = (
"ros_gazebo_gym:"
+ [
env
for env in ENVS.keys()
if env.startswith(f"Panda{to_pascal_case(env_type)}")
][0]
)
print("env_id: ", env_id)
except IndexError:
valid_env_ids = list(ENVS.keys())
valid_env_cmds = [
re.sub(r"panda_|(-v\d+)", "", to_snake_case(env)) for env in valid_env_ids
]
valid_env_str = [
f"'{cmd}' (ros_gazebo_gym:{id})"
for cmd, id in zip(valid_env_cmds, valid_env_ids)
]
rospy.logerr(
f"Could not find 'environment_type' 'Panda{to_pascal_case(env_type)}'. "
f"Valid options are: {list_2_human_text(valid_env_str)}."
)
ros_exit_gracefully(
shutdown_message=f"Shutting down {rospy.get_name()}", exit_code=1
)
# Initialize the ros_gazebo_gym Panda environment.
rospy.loginfo(f"Creating ros_gazebo_gym '{env_id}' gymnasium environment...")
env = gym.make(
env_id,
control_type=control_type,
action_space_dtype=np.float32,
observation_space_dtype=np.float32,
)
# Initialize the logging system.
rospack = rospkg.RosPack()
pkg_path = rospack.get_path("ros_gazebo_gym_examples")
outdir = os.path.join(pkg_path, "training_results")
last_time_steps = np.ndarray(0)
# Load parameters from the ROS param server.
# NOTE: Parameters are stored in a yaml files inside the config directory and
# loaded at runtime by the launch file.
alpha = rospy.get_param("/ros_gazebo_gym_panda_example_params/alpha")
gamma = rospy.get_param("/ros_gazebo_gym_panda_example_params/gamma")
n_episodes = rospy.get_param("/ros_gazebo_gym_panda_example_params/n_episodes")
n_steps = rospy.get_param("/ros_gazebo_gym_panda_example_params/n_steps")
inference = rospy.get_param("/ros_gazebo_gym_panda_example_params/inference")
inference_n_episodes = rospy.get_param(
"/ros_gazebo_gym_panda_example_params/inference_n_episodes"
)
total_timesteps = n_steps * n_episodes
obs, _ = env.reset()
ee = env.get_ee_pose().pose
obs, _, _, _, _ = env.step(np.array([ee.position.x, ee.position.y, ee.position.z,
ee.orientation.x, ee.orientation.y, ee.orientation.z, ee.orientation.w,
0.08, 5]))
<!--
This launch can be used to train a Soft Actor-Critic (SAC) algorithm on the panda task environments found in the
ros-gazebo-gym package. The training parameters can be set in the ros-gazebo-gym-examples/config/panda_example_training_params.yaml
Control arguments:
- control_type: The control type used for controlling the panda robot (Options: trajectory, position, effort, end_effector).
- environment_type: The panda task environment (Options: Reach, PickAndPlace, Slide, Push).
-->
<launch>
<!--Control arguments-->
<!-- The control type used for controlling the panda robot (Options: trajectory, position, effort, end_effector)-->
<arg name="control_type" default="end_effector"/>
<!--The panda task environment.
NOTE: Options: Reach, PickAndPlace, Slide, Push
Valid options are: 'reach' (ros_gazebo_gym:PandaReach-v1), 'pick_and_place' (ros_gazebo_gym:PandaPickAndPlace-v1),
'push' (ros_gazebo_gym:PandaPush-v1) & 'slide' (ros_gazebo_gym:PandaSlide-v1).
-->
<arg name="environment_type" default="pick_and_place"/>
<!--Retrieve ros_gazebo_gym panda environment training parameters-->
<include file="$(find ros_gazebo_gym_examples)/launch/load_panda_example_training_params.launch.xml"/>
<!--Launch the training system-->
<node pkg="ros_gazebo_gym_examples" name="playground" type="playground.py" output="screen">
<param name="control_type" value="$(arg control_type)"/>
<param name="environment_type" value="$(arg environment_type)"/>
</node>
</launch>