Comments (3)
Hi @zhaohubo
yes, if the ActionType
is one of the following 3
gym-pybullet-drones/gym_pybullet_drones/utils/enums.py
Lines 35 to 39 in 3634125
from gym-pybullet-drones.
Hi @JacopoPan
I have successfully conducted experiments with two drones. Now, I want to expand my experiments by including more drones, for example, three in total. Here is the modified code I have implemented. When the ActionType is set to "one_d_rpm", the leader-follower functionality works successfully. However, when I try to implement 3D flight by changing the ActionType to "rpm/pid/vel", the training results are not satisfactory. I'm wondering if there is an error in my model modifications or if the algorithm parameters need to be adjusted. It would be greatly appreciated if you could assist me with this as it is crucial for my project.
`
import sys
import os
import time
import argparse
from datetime import datetime
import subprocess
import pdb
import math
import numpy as np
import pybullet as p
import pickle
import matplotlib.pyplot as plt
import gym
from gym import error, spaces, utils
from gym.utils import seeding
from gym.spaces import Box, Dict
import torch
import torch.nn as nn
from ray.rllib.models.torch.fcnet import FullyConnectedNetwork
import ray
from ray import tune
from ray.tune.logger import DEFAULT_LOGGERS
from ray.tune import register_env
from ray.rllib.agents import ppo
from ray.rllib.agents.ppo import PPOTrainer, PPOTFPolicy
from ray.rllib.examples.policy.random_policy import RandomPolicy
from ray.rllib.utils.test_utils import check_learning_achieved
from ray.rllib.agents.callbacks import DefaultCallbacks
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.models import ModelCatalog
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.env.multi_agent_env import ENV_STATE
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from gym_pybullet_drones.envs.BaseAviary import DroneModel, Physics
from gym_pybullet_drones.envs.multi_agent_rl.FlockAviary import FlockAviary
from gym_pybullet_drones.envs.multi_agent_rl.LeaderFollowerAviary import LeaderFollowerAviary
from gym_pybullet_drones.envs.multi_agent_rl.MeetupAviary import MeetupAviary
from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType
from gym_pybullet_drones.utils.Logger import Logger
import shared_constants
OWN_OBS_VEC_SIZE = None # Modified at runtime
ACTION_VEC_SIZE = None # Modified at runtime
class CustomTorchCentralizedCriticModel(TorchModelV2, nn.Module):
def __init__(self, obs_space, action_space, num_outputs, model_config, name):
TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
nn.Module.__init__(self)
self.action_model = FullyConnectedNetwork(
Box(low=-1, high=1, shape=(OWN_OBS_VEC_SIZE,)),
action_space,
num_outputs,
model_config,
name + "_action"
)
self.value_model = FullyConnectedNetwork(
obs_space,
action_space,
1,
model_config,
name + "_vf"
)
self._model_in = None
def forward(self, input_dict, state, seq_lens):
self._model_in = [input_dict["obs_flat"], state, seq_lens]
return self.action_model({"obs": input_dict["obs"]["own_obs"]}, state, seq_lens)
def value_function(self):
value_out, _ = self.value_model({"obs": self._model_in[0]}, self._model_in[1], self._model_in[2])
return torch.reshape(value_out, [-1])
class FillInActions(DefaultCallbacks):
def on_postprocess_trajectory(self, worker, episode, agent_id, policy_id, policies, postprocessed_batch,
original_batches, **kwargs):
to_update = postprocessed_batch[SampleBatch.CUR_OBS]
action_encoder = ModelCatalog.get_preprocessor_for_space(
# Box(-np.inf, np.inf, (ACTION_VEC_SIZE,), np.float32) # Unbounded
Box(-1, 1, (ACTION_VEC_SIZE,), np.float32) # Bounded
)
opponent_actionss = []
num_agents = len(original_batches)
for other_id in range(num_agents):
if other_id != agent_id:
_, opponent_batch = original_batches[other_id]
# opponent_actions = np.array([action_encoder.transform(a) for a in opponent_batch[SampleBatch.ACTIONS]]) # Unbounded
opponent_actions = np.array(
[action_encoder.transform(np.clip(a, -1, 1)) for a in
opponent_batch[SampleBatch.ACTIONS]]) # Bounded
opponent_actionss.append(opponent_actions)
opponent_actionss = np.array(opponent_actionss)
num_agent_actions = (num_agents - 1) * ACTION_VEC_SIZE
opponent_actionss = opponent_actionss.reshape(-1, num_agent_actions)
to_update[:, -num_agent_actions:] = opponent_actionss
def central_critic_observer(agent_obs, **kw):
new_obs = {
0: {
"own_obs": agent_obs[0],
"opponent_obs": {
"key1": agent_obs[1],
"key2": agent_obs[2]
},
"opponent_action": np.zeros(ACTION_VEC_SIZE), # Filled in by FillInActions
},
1: {
"own_obs": agent_obs[1],
"opponent_obs": {
"key1": agent_obs[0],
"key2": agent_obs[2]
},
"opponent_action": np.zeros(ACTION_VEC_SIZE), # Filled in by FillInActions
},
2: {
"own_obs": agent_obs[2],
"opponent_obs": {
"key1": agent_obs[0],
"key2": agent_obs[1]
},
"opponent_action": np.zeros(ACTION_VEC_SIZE), # Filled in by FillInActions
},
}
return new_obs
if name == "main":
#### Define and parse (optional) arguments for the script ##
parser = argparse.ArgumentParser(description='Multi-agent reinforcement learning experiments script')
parser.add_argument('--num_drones', default=3, type=int, help='Number of drones (default: 3)', metavar='')
parser.add_argument('--env', default='leaderfollower', type=str, choices=['leaderfollower', 'flock', 'meetup'],
help='Help (default: ..)', metavar='')
parser.add_argument('--obs', default='kin', type=ObservationType, help='Help (default: ..)', metavar='')
parser.add_argument('--act', default='pid', type=ActionType, help='Help (default: ..)', metavar='')
parser.add_argument('--algo', default='cc', type=str, choices=['cc'], help='Help (default: ..)', metavar='')
parser.add_argument('--workers', default=0, type=int, help='Help (default: ..)', metavar='')
ARGS = parser.parse_args()
#### Save directory ########################################
filename = os.path.dirname(os.path.abspath(__file__)) + '/results/save-' + ARGS.env + '-' + str(
ARGS.num_drones) + '-' + ARGS.algo + '-' + ARGS.obs.value + '-' + ARGS.act.value + '-' + datetime.now().strftime(
"%m.%d.%Y_%H.%M.%S")
if not os.path.exists(filename):
os.makedirs(filename + '/')
#### Print out current git commit hash #####################
git_commit = subprocess.check_output(["git", "describe", "--tags"]).strip()
with open(filename + '/git_commit.txt', 'w+') as f:
f.write(str(git_commit))
#### Constants, and errors #################################
if ARGS.obs == ObservationType.KIN:
OWN_OBS_VEC_SIZE = 12
elif ARGS.obs == ObservationType.RGB:
print("[ERROR] ObservationType.RGB for multi-agent systems not yet implemented")
exit()
else:
print("[ERROR] unknown ObservationType")
exit()
if ARGS.act in [ActionType.ONE_D_RPM, ActionType.ONE_D_DYN, ActionType.ONE_D_PID]:
ACTION_VEC_SIZE = 1
elif ARGS.act in [ActionType.RPM, ActionType.DYN, ActionType.VEL]:
ACTION_VEC_SIZE = 4
elif ARGS.act == ActionType.PID:
ACTION_VEC_SIZE = 3
else:
print("[ERROR] unknown ActionType")
exit()
#### Uncomment to debug slurm scripts ######################
# exit()
#### Initialize Ray Tune ###################################
ray.shutdown()
ray.init(ignore_reinit_error=True)
#### Register the custom centralized critic model ##########
ModelCatalog.register_custom_model("cc_model", CustomTorchCentralizedCriticModel)
#### Register the environment ##############################
temp_env_name = "this-aviary-v0"
if ARGS.env == 'flock':
register_env(temp_env_name, lambda _: FlockAviary(num_drones=ARGS.num_drones,
aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
obs=ARGS.obs,
act=ARGS.act
)
)
elif ARGS.env == 'leaderfollower':
register_env(temp_env_name, lambda _: LeaderFollowerAviary(num_drones=ARGS.num_drones,
aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
obs=ARGS.obs,
act=ARGS.act
)
)
elif ARGS.env == 'meetup':
register_env(temp_env_name, lambda _: MeetupAviary(num_drones=ARGS.num_drones,
aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
obs=ARGS.obs,
act=ARGS.act
)
)
else:
print("[ERROR] environment not yet implemented")
exit()
#### Unused env to extract the act and obs spaces ##########
if ARGS.env == 'flock':
temp_env = FlockAviary(num_drones=ARGS.num_drones,
aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
obs=ARGS.obs,
act=ARGS.act
)
elif ARGS.env == 'leaderfollower':
temp_env = LeaderFollowerAviary(num_drones=ARGS.num_drones,
aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
obs=ARGS.obs,
act=ARGS.act
)
elif ARGS.env == 'meetup':
temp_env = MeetupAviary(num_drones=ARGS.num_drones,
aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
obs=ARGS.obs,
act=ARGS.act
)
else:
print("[ERROR] environment not yet implemented")
exit()
observer_space = Dict({
"own_obs": temp_env.observation_space[0],
"opponent_obs": Dict(
{
"key1": temp_env.observation_space[0],
"key2": temp_env.observation_space[0]
# other_id: temp_env.observation_space[0]
# for other_id in range(num_agents)
# if other_id != agent_id
}
),
"opponent_action": temp_env.action_space[0],
})
action_space = temp_env.action_space[0]
#### Note ##################################################
# RLlib will create ``num_workers + 1`` copies of the
# environment since one copy is needed for the driver process.
# To avoid paying the extra overhead of the driver copy,
# which is needed to access the env's action and observation spaces,
# you can defer environment initialization until ``reset()`` is called
#### Set up the trainer's config ###########################
config = ppo.DEFAULT_CONFIG.copy() # For the default config, see github.com/ray-project/ray/blob/master/rllib/agents/trainer.py
config = {
"env": temp_env_name,
"num_workers": 0 + ARGS.workers,
"num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")), # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0
"batch_mode": "complete_episodes",
"callbacks": FillInActions,
"framework": "torch",
}
#### Set up the model parameters of the trainer's config ###
config["model"] = {
"custom_model": "cc_model",
}
#### Set up the multiagent params of the trainer's config ##
config["multiagent"] = {
"policies": {
"pol0": (None, observer_space, action_space, {"agent_id": 0, }),
"pol1": (None, observer_space, action_space, {"agent_id": 1, }),
"pol2": (None, observer_space, action_space, {"agent_id": 2, }),
},
"policy_mapping_fn": lambda x: "pol0" if x == 0 else ("pol1" if x == 1 else "pol2"),
# "pol0" if x == 0 else "pol1", # # Function mapping agent ids to policy ids
"observation_fn": central_critic_observer, # See rllib/evaluation/observation_function.py for more info
}
#### Ray Tune stopping conditions ##########################
stop = {
"timesteps_total": 500000, # 100000 ~= 10'
# "episode_reward_mean": -50,
# "training_iteration": 0,
}
#### Train #################################################
results = tune.run(
"PPO",
stop=stop,
config=config,
verbose=True,
checkpoint_at_end=True,
local_dir=filename,
)
# check_learning_achieved(results, 1.0)
#### Save agent ############################################
checkpoints = results.get_trial_checkpoints_paths(trial=results.get_best_trial('episode_reward_mean',
mode='max'
),
metric='episode_reward_mean'
)
with open(filename + '/checkpoint.txt', 'w+') as f:
f.write(checkpoints[0][0])
#### Shut down Ray #########################################
ray.shutdown()
`
from gym-pybullet-drones.
Hi @zhaohubo
I haven't tried a 3 drone example but, as you are doing, of course, you should start by extending the centralized critic.
If it works in the 1D case, I'd think that that part is correct, however, the 3D case with rpm
inputs might be too difficult, what are the results with pid
? Have you modified the reward function as well?
from gym-pybullet-drones.
Related Issues (20)
- RPM Motor Mapping HOT 6
- Some camera associated issues HOT 1
- What does pycffirmware do HOT 1
- run learn.py
- rgb and GL HOT 2
- path planning algorithms HOT 1
- Ctrl Freq and Simulation Freq Questions HOT 3
- Clarification on Each Dimension's Meaning for ActionType.VEL HOT 3
- Discrete action space implementation based on BaseRLAviary HOT 1
- Visualize drone cameras in explorer HOT 1
- Location of Paper on Dynamics Code HOT 1
- -1 to 1 action space meaning HOT 1
- Pybullet drones
- No module named 'gym_pybullet_drones.envs.VisionAviary'
- Units HOT 1
- High frequency in RPMs when include action buffer in observation space can couse problems in real hardware HOT 2
- Why might my rewards be inversely proportional to the target height in the HoverAviary environment? HOT 2
- Error while running velocity.py and fly.py examples HOT 1
- ray 1.9 error while installing gym-pybullet-drones HOT 1
- Sim2real transfer for betaflight HOT 2
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from gym-pybullet-drones.