Comments (10)
@lllllllllaa,
I was facing a different issue while executing the above mentioned code. Kindly find the gist of it here and provide the required inputs. Thank you!
from tf-keras.
Hi, here is the code for agent class:
class Agent:
def __init__(self, gamma=0.99, gae_lambda=0.95, policy_clip=0.2, batch_size=64, n_epochs=10, chkpt_dir=''):
self.gamma = gamma
self.policy_clip = policy_clip
self.n_epochs = n_epochs
self.gae_lambda = gae_lambda
self.chkpt_dir = chkpt_dir
self.actor = None
self.critic = None
self.actor_loss = []
self.critic_loss = []
self.entropy = []
self.value_pred = []
self.states = np.asarray([])
self.probs = np.asarray([])
self.vals = np.asarray([])
self.actions = np.asarray([])
self.rewards = np.asarray([])
self.dones = np.asarray([])
self.actions_prob = []
self.holds_prob = []
self.sells_prob = []
self.advantage = np.asarray([])
self.batch_size = batch_size
def store_transition(self, state, action, probs, vals, reward, done):
if len(self.states) == 0:
self.states = np.asarray(np.expand_dims(state.copy(), axis=1))
self.actions = np.asarray(np.expand_dims(action.copy(), axis=1))
self.probs = np.asarray(np.expand_dims(probs.copy(), axis=1))
self.vals = np.asarray(np.expand_dims(vals.copy(), axis=1))
self.rewards = np.asarray(np.expand_dims(reward.copy(), axis=1))
self.dones = np.asarray(np.expand_dims(done.copy(), axis=1))
else:
self.states = np.concatenate((self.states, np.asarray(np.expand_dims(state.copy(), axis=1))), axis=1)
self.actions = np.concatenate((self.actions, np.expand_dims(action.copy(), axis=1)), axis=1)
self.probs = np.concatenate((self.probs, np.expand_dims(probs.copy(), axis=1)), axis=1)
self.vals = np.concatenate((self.vals, np.expand_dims(vals.copy(), axis=1)), axis=1)
self.rewards = np.concatenate((self.rewards, np.asarray(np.expand_dims(reward.copy(), axis=1))), axis=1)
self.dones = np.concatenate((self.dones, np.asarray(np.expand_dims(done.copy(), axis=1))), axis=1)
def create_model(self, input_shape, n_actions, num_layers, d_model, num_heads, dff, dims, dropout_rate, alpha=0.0003,
decay_steps=1000, decay_rate=0.97, staircase=True):
shape = np.zeros(input_shape)
alpha_decay = tf.keras.optimizers.schedules.ExponentialDecay(
alpha,
decay_steps=decay_steps,
decay_rate=decay_rate,
staircase=staircase)
self.actor = Actor(n_actions, num_layers, d_model, num_heads, dff, dims, dropout_rate)
# self.actor.compile(optimizer=tf.keras.optimizers.Adam(alpha_decay))
self.actor(shape)
self.critic = Critic(num_layers, d_model, num_heads, dff, dims, dropout_rate)
# self.critic.compile(optimizer=tf.keras.optimizers.Adam(alpha_decay))
self.critic(shape)
def get_model(self):
return self.actor, self.critic
def set_model(self, actor, critic):
self.actor = actor
self.critic = critic
def set_model_weights(self, actor_weights, critic_weights):
self.actor.set_weights(actor_weights)
self.critic.set_weights(critic_weights)
def load_weights(self, actor_name, critic_name):
print('... loading weights ...')
self.actor.load_weights(self.chkpt_dir + actor_name)
# self.actor.compile()
self.critic.load_weights(self.chkpt_dir + critic_name)
# self.critic.compile()
def load_models(self, actor_name, critic_name):
print('... loading models ...')
self.actor = tf.keras.models.load_model(self.chkpt_dir + actor_name)
self.critic = tf.keras.models.load_model(self.chkpt_dir + critic_name)
def save_models_ckpt(self):
print('... saving checkpoint models ...')
self.actor.save(self.chkpt_dir + 'actor.keras')
self.critic.save(self.chkpt_dir + 'critic.keras')
def save_models(self, n_steps):
print('... saving models ...')
self.actor.save('models/actor'+str(n_steps)+'.keras')
self.critic.save('models/critic'+str(n_steps)+'.keras')
def set_lr(self, alpha=0.0003, decay_steps=10, decay_rate=0.97, staircase=False):
print('... setting learning rate ...')
alpha_decay = tf.keras.optimizers.schedules.ExponentialDecay(
alpha,
decay_steps=decay_steps,
decay_rate=decay_rate,
staircase=staircase)
self.actor.compile(optimizer=tf.keras.optimizers.Adam(alpha_decay))
self.critic.compile(optimizer=tf.keras.optimizers.Adam(alpha_decay))
def choose_action(self, observation):
# start = time.time()
state = tf.convert_to_tensor(observation)
probs = self.actor(state)
dist = tfp.distributions.Categorical(probs=probs)
action = dist.sample()
log_prob = dist.log_prob(action)
value = self.critic(state)
action = action.numpy()
value = value.numpy()
log_prob = log_prob.numpy()
# end = time.time()
# print("choose action " + str(end - start))
return action, log_prob, value
def choose_action_eval(self, observation, th):
# start = time.time()
state = tf.convert_to_tensor(observation)
probs = self.actor.predict(state, verbose=0)
# dist = tfp.distributions.Categorical(probs=probs)
action = tf.argmax(probs, axis=-1)#dist.sample()
# log_prob = dist.log_prob(action)
# value = self.critic.predict(state, verbose=None)
action = action.numpy()
for i in range(len(action)):
if probs[i, action[i]] < th:
action[i] = 1
# value = value
value = None
log_prob = None #log_prob.numpy()
# end = time.time()
# print("choose action " + str(end - start))
return action, log_prob, value
def calc_adv(self):
self.advantage = np.zeros(self.rewards.shape, dtype=np.float32)
# start = time.time()
last_gae = 0
for t in reversed(range(self.rewards.shape[1] - 1)):
nextnonterminal = 1.0 - self.dones[:, t + 1]
nextvalues = self.vals[:, t + 1, 0]
delta = self.rewards[:, t] + self.gamma * nextvalues * nextnonterminal - self.vals[:, t, 0]
self.advantage[:, t] = last_gae = delta + self.gamma * self.gae_lambda * nextnonterminal * last_gae
def prepare_shape(self):
self.states = np.reshape(self.states, (-1, self.states.shape[-2], self.states.shape[-1]))
self.actions = np.reshape(self.actions, (-1))
self.probs = np.reshape(self.probs, (-1))
self.vals = np.reshape(self.vals, (-1, 1))
self.advantage = np.reshape(self.advantage, (-1))
def record_action_dist(self):
n_states = self.states.shape[0]
self.actions_prob.append(np.asarray(np.where(self.actions == 0)).size / n_states * 100)
self.holds_prob.append(np.asarray(np.where(self.actions == 1)).size / n_states * 100)
self.sells_prob.append(np.asarray(np.where(self.actions == 2)).size / n_states * 100)
plt.plot(self.actions_prob, c='r')
plt.plot(self.holds_prob, c='k')
plt.plot(self.sells_prob, c='g')
plt.savefig('action_dist.jpg')
plt.cla()
def learn(self):
self.calc_adv()
self.prepare_shape()
self.record_action_dist()
episode_actor_loss = []
episode_critic_loss = []
episode_entropy = []
episode_value_pred = []
for i in range(self.n_epochs):
shuffled = np.arange(len(self.states))
np.random.shuffle(shuffled)
for i_start in range(math.ceil(self.states.shape[0] / self.batch_size)):
# start = time.time()
start_index = i_start*self.batch_size
end_index = min((i_start+1)*self.batch_size, self.states.shape[0])
with tf.GradientTape(persistent=True) as tape:
states = tf.convert_to_tensor(self.states[shuffled[start_index:end_index]].copy())
old_probs = tf.convert_to_tensor(self.probs[shuffled[start_index:end_index]].copy())
actions = tf.convert_to_tensor(self.actions[shuffled[start_index:end_index]].copy())
probs = self.actor(states)
dist = tfp.distributions.Categorical(probs=probs)
new_probs = dist.log_prob(actions)
critic_value = self.critic(states)
critic_value = tf.squeeze(critic_value, 1)
prob_ratio = tf.math.exp(new_probs - old_probs)
weighted_probs = self.advantage[shuffled[start_index:end_index]].copy() * prob_ratio
clipped_probs = tf.clip_by_value(prob_ratio,
1-self.policy_clip,
1+self.policy_clip)
weighted_clipped_probs = clipped_probs * self.advantage[shuffled[start_index:end_index]].copy()
actor_loss = -tf.math.minimum(weighted_probs,
weighted_clipped_probs)
# actor_loss = tf.math.reduce_mean(actor_loss)
entropy = tf.math.reduce_mean(probs * tf.math.log(probs), 1)
total_actor_loss = actor_loss + entropy
returns = self.advantage[shuffled[start_index:end_index]].copy() + self.vals[shuffled[start_index:end_index]].copy()
# critic_loss = tf.math.reduce_mean(tf.math.pow(
# returns-critic_value, 2))
critic_loss = tf.keras.losses.MSE(critic_value, returns)
# end = time.time()
# print("loss " + str(end - start))
# start = time.time()
actor_params = self.actor.trainable_variables
actor_grads = tape.gradient(total_actor_loss, actor_params)
critic_params = self.critic.trainable_variables
critic_grads = tape.gradient(critic_loss, critic_params)
self.actor.optimizer.apply_gradients(
zip(actor_grads, actor_params))
self.critic.optimizer.apply_gradients(
zip(critic_grads, critic_params))
episode_actor_loss.append(tf.math.reduce_mean(actor_loss.numpy()))
episode_critic_loss.append(tf.math.reduce_mean(critic_loss.numpy()))
episode_entropy.append(tf.math.reduce_mean(entropy))
episode_value_pred.append(tf.math.reduce_mean(critic_value.numpy()))
self.actor_loss.append(tf.math.reduce_mean(episode_actor_loss))
self.critic_loss.append(tf.math.reduce_mean(episode_critic_loss))
self.entropy.append(tf.math.reduce_mean(episode_entropy))
self.value_pred.append(tf.math.reduce_mean(episode_value_pred))
# end = time.time()
# print("grads " + str(end - start))
plt.plot(self.actor_loss)
plt.savefig('actor_loss.jpg')
plt.cla()
plt.plot(self.critic_loss)
plt.savefig('critic_loss.jpg')
plt.cla()
plt.plot(self.entropy)
plt.savefig('entropy.jpg')
plt.cla()
plt.plot(self.value_pred)
plt.savefig('value_pred.jpg')
plt.cla()
self.clear_memory()
def get_shapes(self):
print(self.states.shape)
print(self.actions.shape)
print(self.probs.shape)
print(self.vals.shape)
print(self.rewards.shape)
print(self.dones.shape)
print(self.advantage.shape)
def clear_memory(self):
self.states = []
self.probs = []
self.actions = []
self.rewards = []
self.dones = []
self.vals = []
self.advantage = []
gc.collect()
from tf-keras.
from tf-keras.
This issue is stale because it has been open for 14 days with no activity. It will be closed if no further activity occurs. Thank you.
from tf-keras.
@lllllllllaa,
Apologies for the delay.
I tried to execute the mentioned above code and it was failing due to an error. Also in the given code snippet you have defined the class and its methods but are not calling them anywhere. Kindly find the gist of it here. Thank you!
from tf-keras.
Hi tila,
sorry for the late reply, please try this code:
#@title PPO
def save(model):
#Mount Google Drive as folder
from google.colab import drive
drive.mount('/content/drive')
cmd = '/content/'+model+'.keras /content/drive/My\ Drive/'
!cp $cmd
def envs_init(filepath, sl, num_envs, min_len, offset, plot_index):
count = 0
while count < num_envs:
envs.append(DayTrade(name='a', day=np.zeros((1000, 24)), start_offset=0, plotting=0, plotting_index=100))
count += 1
return envs
def get_obs(envs, agent, eval=False, th=0.5):
observations_ = [envs[j].reset() for j in range(len(envs))]
num_obs = 0
score = 0
n_steps = 1
dones = np.zeros(len(envs))
# print(n_steps)
while not np.any(dones):
observations = observations_
if not eval:
actions, probs, vals = agent.choose_action(np.asarray(observations))
else:
actions, probs, vals = agent.choose_action_eval(np.asarray(observations), th)
returns = [envs[j].step(actions[j]) for j in range(len(envs))]
observations_ = [returns[j][0] for j in range(len(envs))]
rewards = [returns[j][1] for j in range(len(envs))]
dones = [returns[j][2] for j in range(len(envs))]
n_steps += 1
num_obs += 1
score += np.average(rewards)
if not eval:
agent.store_transition(observations, actions, probs, vals, rewards, dones)
return n_steps, score, num_obs
if __name__ == '__main__':
envs = envs_init('processed_A', sl, 1, 650, offset=0, plot_index=300)
batch_size = 512
n_epochs = 2
alpha = 0.0001
agent = Agent(n_actions=3, num_layers=4, d_model=25, num_heads=[8, 8, 8, 8],
dff=512, dims=[512, 256], dropout_rate=[0.2, 0.5],
gamma=0.99, alpha=0.0003, gae_lambda=0.95, policy_clip=0.2, batch_size=512, n_epochs=n_epochs)
# agent.load_models('actor.keras', 'critic.keras')
# agent.set_lr(alpha=0.00005, decay_steps=2000, decay_rate=0.97, staircase=False)
n_games = 2000
best_score = -10e8
score_history = []
val_score_hist = []
learn_iters = 0
n_steps = 0
for i in range(n_games):
steps, score, num_obs = get_obs(envs, agent, eval=False)
n_steps += steps
score /= num_obs
score_history.append(score)
plt.plot(score_history)
plt.savefig('score.jpg')
plt.cla()
_, val_score, val_num_obs = get_obs(val_envs, agent, eval=True)
val_score /= val_num_obs
val_score_hist.append(val_score)
plt.plot(val_score_hist)
plt.savefig('val_score.jpg')
plt.cla()
if val_score > best_score:
best_score = val_score
agent.save_models_ckpt()
# save('actor')
# save('critic')
# tf.summary.scalar('reward summary', data=avg_score, step=learn_iters)
print('episode', i, 'score %.4f' % score, 'val_score %.4f' % val_score,
'time_steps', n_steps, 'learning_steps', learn_iters)
agent.learn()
agent.save_models(n_steps)
learn_iters += 1
#@title Agent
class Agent:
def __init__(self, gamma=0.99, gae_lambda=0.95, policy_clip=0.2, batch_size=64, n_epochs=10, chkpt_dir=''):
self.gamma = gamma
self.policy_clip = policy_clip
self.n_epochs = n_epochs
self.gae_lambda = gae_lambda
self.chkpt_dir = chkpt_dir
self.actor = None
self.critic = None
self.actor_loss = []
self.critic_loss = []
self.entropy = []
self.value_pred = []
self.states = np.asarray([])
self.probs = np.asarray([])
self.vals = np.asarray([])
self.actions = np.asarray([])
self.rewards = np.asarray([])
self.dones = np.asarray([])
self.actions_prob = []
self.holds_prob = []
self.sells_prob = []
self.advantage = np.asarray([])
self.batch_size = batch_size
def store_transition(self, state, action, probs, vals, reward, done):
if len(self.states) == 0:
self.states = np.asarray(np.expand_dims(state.copy(), axis=1))
self.actions = np.asarray(np.expand_dims(action.copy(), axis=1))
self.probs = np.asarray(np.expand_dims(probs.copy(), axis=1))
self.vals = np.asarray(np.expand_dims(vals.copy(), axis=1))
self.rewards = np.asarray(np.expand_dims(reward.copy(), axis=1))
self.dones = np.asarray(np.expand_dims(done.copy(), axis=1))
else:
self.states = np.concatenate((self.states, np.asarray(np.expand_dims(state.copy(), axis=1))), axis=1)
self.actions = np.concatenate((self.actions, np.expand_dims(action.copy(), axis=1)), axis=1)
self.probs = np.concatenate((self.probs, np.expand_dims(probs.copy(), axis=1)), axis=1)
self.vals = np.concatenate((self.vals, np.expand_dims(vals.copy(), axis=1)), axis=1)
self.rewards = np.concatenate((self.rewards, np.asarray(np.expand_dims(reward.copy(), axis=1))), axis=1)
self.dones = np.concatenate((self.dones, np.asarray(np.expand_dims(done.copy(), axis=1))), axis=1)
def create_model(self, input_shape, n_actions, num_layers, d_model, num_heads, dff, dims, dropout_rate, alpha=0.0003,
decay_steps=1000, decay_rate=0.97, staircase=True):
shape = np.zeros(input_shape)
alpha_decay = tf.keras.optimizers.schedules.ExponentialDecay(
alpha,
decay_steps=decay_steps,
decay_rate=decay_rate,
staircase=staircase)
self.actor = Actor(n_actions, num_layers, d_model, num_heads, dff, dims, dropout_rate)
# self.actor.compile(optimizer=tf.keras.optimizers.Adam(alpha_decay))
self.actor(shape)
self.critic = Critic(num_layers, d_model, num_heads, dff, dims, dropout_rate)
# self.critic.compile(optimizer=tf.keras.optimizers.Adam(alpha_decay))
self.critic(shape)
def get_model(self):
return self.actor, self.critic
def set_model(self, actor, critic):
self.actor = actor
self.critic = critic
def set_model_weights(self, actor_weights, critic_weights):
self.actor.set_weights(actor_weights)
self.critic.set_weights(critic_weights)
def load_weights(self, actor_name, critic_name):
print('... loading weights ...')
self.actor.load_weights(self.chkpt_dir + actor_name)
# self.actor.compile()
self.critic.load_weights(self.chkpt_dir + critic_name)
# self.critic.compile()
def load_models(self, actor_name, critic_name):
print('... loading models ...')
self.actor = tf.keras.models.load_model(self.chkpt_dir + actor_name)
self.critic = tf.keras.models.load_model(self.chkpt_dir + critic_name)
def save_models_ckpt(self):
print('... saving checkpoint models ...')
self.actor.save(self.chkpt_dir + 'actor.keras')
self.critic.save(self.chkpt_dir + 'critic.keras')
def save_models(self, n_steps):
print('... saving models ...')
self.actor.save('models/actor'+str(n_steps)+'.keras')
self.critic.save('models/critic'+str(n_steps)+'.keras')
def set_lr(self, alpha=0.0003, decay_steps=10, decay_rate=0.97, staircase=False):
print('... setting learning rate ...')
alpha_decay = tf.keras.optimizers.schedules.ExponentialDecay(
alpha,
decay_steps=decay_steps,
decay_rate=decay_rate,
staircase=staircase)
self.actor.compile(optimizer=tf.keras.optimizers.Adam(alpha_decay))
self.critic.compile(optimizer=tf.keras.optimizers.Adam(alpha_decay))
def choose_action(self, observation):
# start = time.time()
state = tf.convert_to_tensor(observation)
probs = self.actor(state)
dist = tfp.distributions.Categorical(probs=probs)
action = dist.sample()
log_prob = dist.log_prob(action)
value = self.critic(state)
action = action.numpy()
value = value.numpy()
log_prob = log_prob.numpy()
# end = time.time()
# print("choose action " + str(end - start))
return action, log_prob, value
def choose_action_eval(self, observation, th):
# start = time.time()
state = tf.convert_to_tensor(observation)
probs = self.actor.predict(state, verbose=0)
# dist = tfp.distributions.Categorical(probs=probs)
action = tf.argmax(probs, axis=-1)#dist.sample()
# log_prob = dist.log_prob(action)
# value = self.critic.predict(state, verbose=None)
action = action.numpy()
for i in range(len(action)):
if probs[i, action[i]] < th:
action[i] = 1
# value = value
value = None
log_prob = None #log_prob.numpy()
# end = time.time()
# print("choose action " + str(end - start))
return action, log_prob, value
def calc_adv(self):
self.advantage = np.zeros(self.rewards.shape, dtype=np.float32)
# start = time.time()
last_gae = 0
for t in reversed(range(self.rewards.shape[1] - 1)):
nextnonterminal = 1.0 - self.dones[:, t + 1]
nextvalues = self.vals[:, t + 1, 0]
delta = self.rewards[:, t] + self.gamma * nextvalues * nextnonterminal - self.vals[:, t, 0]
self.advantage[:, t] = last_gae = delta + self.gamma * self.gae_lambda * nextnonterminal * last_gae
def prepare_shape(self):
self.states = np.reshape(self.states, (-1, self.states.shape[-2], self.states.shape[-1]))
self.actions = np.reshape(self.actions, (-1))
self.probs = np.reshape(self.probs, (-1))
self.vals = np.reshape(self.vals, (-1, 1))
self.advantage = np.reshape(self.advantage, (-1))
def record_action_dist(self):
n_states = self.states.shape[0]
self.actions_prob.append(np.asarray(np.where(self.actions == 0)).size / n_states * 100)
self.holds_prob.append(np.asarray(np.where(self.actions == 1)).size / n_states * 100)
self.sells_prob.append(np.asarray(np.where(self.actions == 2)).size / n_states * 100)
plt.plot(self.actions_prob, c='r')
plt.plot(self.holds_prob, c='k')
plt.plot(self.sells_prob, c='g')
plt.savefig('action_dist.jpg')
plt.cla()
def learn(self):
self.calc_adv()
self.prepare_shape()
self.record_action_dist()
episode_actor_loss = []
episode_critic_loss = []
episode_entropy = []
episode_value_pred = []
for i in range(self.n_epochs):
shuffled = np.arange(len(self.states))
np.random.shuffle(shuffled)
for i_start in range(math.ceil(self.states.shape[0] / self.batch_size)):
# start = time.time()
start_index = i_start*self.batch_size
end_index = min((i_start+1)*self.batch_size, self.states.shape[0])
with tf.GradientTape(persistent=True) as tape:
states = tf.convert_to_tensor(self.states[shuffled[start_index:end_index]].copy())
old_probs = tf.convert_to_tensor(self.probs[shuffled[start_index:end_index]].copy())
actions = tf.convert_to_tensor(self.actions[shuffled[start_index:end_index]].copy())
probs = self.actor(states)
dist = tfp.distributions.Categorical(probs=probs)
new_probs = dist.log_prob(actions)
critic_value = self.critic(states)
critic_value = tf.squeeze(critic_value, 1)
prob_ratio = tf.math.exp(new_probs - old_probs)
weighted_probs = self.advantage[shuffled[start_index:end_index]].copy() * prob_ratio
clipped_probs = tf.clip_by_value(prob_ratio,
1-self.policy_clip,
1+self.policy_clip)
weighted_clipped_probs = clipped_probs * self.advantage[shuffled[start_index:end_index]].copy()
actor_loss = -tf.math.minimum(weighted_probs,
weighted_clipped_probs)
# actor_loss = tf.math.reduce_mean(actor_loss)
entropy = tf.math.reduce_mean(probs * tf.math.log(probs), 1)
total_actor_loss = actor_loss + entropy
returns = self.advantage[shuffled[start_index:end_index]].copy() + self.vals[shuffled[start_index:end_index]].copy()
# critic_loss = tf.math.reduce_mean(tf.math.pow(
# returns-critic_value, 2))
critic_loss = tf.keras.losses.MSE(critic_value, returns)
# end = time.time()
# print("loss " + str(end - start))
# start = time.time()
actor_params = self.actor.trainable_variables
actor_grads = tape.gradient(total_actor_loss, actor_params)
critic_params = self.critic.trainable_variables
critic_grads = tape.gradient(critic_loss, critic_params)
self.actor.optimizer.apply_gradients(
zip(actor_grads, actor_params))
self.critic.optimizer.apply_gradients(
zip(critic_grads, critic_params))
episode_actor_loss.append(tf.math.reduce_mean(actor_loss.numpy()))
episode_critic_loss.append(tf.math.reduce_mean(critic_loss.numpy()))
episode_entropy.append(tf.math.reduce_mean(entropy))
episode_value_pred.append(tf.math.reduce_mean(critic_value.numpy()))
self.actor_loss.append(tf.math.reduce_mean(episode_actor_loss))
self.critic_loss.append(tf.math.reduce_mean(episode_critic_loss))
self.entropy.append(tf.math.reduce_mean(episode_entropy))
self.value_pred.append(tf.math.reduce_mean(episode_value_pred))
# end = time.time()
# print("grads " + str(end - start))
plt.plot(self.actor_loss)
plt.savefig('actor_loss.jpg')
plt.cla()
plt.plot(self.critic_loss)
plt.savefig('critic_loss.jpg')
plt.cla()
plt.plot(self.entropy)
plt.savefig('entropy.jpg')
plt.cla()
plt.plot(self.value_pred)
plt.savefig('value_pred.jpg')
plt.cla()
self.clear_memory()
def get_shapes(self):
print(self.states.shape)
print(self.actions.shape)
print(self.probs.shape)
print(self.vals.shape)
print(self.rewards.shape)
print(self.dones.shape)
print(self.advantage.shape)
def clear_memory(self):
self.states = []
self.probs = []
self.actions = []
self.rewards = []
self.dones = []
self.vals = []
self.advantage = []
gc.collect()
#@title Models
@tf.keras.saving.register_keras_serializable()
class SelfAttention(tf.keras.layers.Layer):
def __init__(self, d_model, num_heads):
super().__init__()
self.mha = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model)
self.layernorm = tf.keras.layers.LayerNormalization()
self.add = tf.keras.layers.Add()
def call(self, x):
attn_output = self.mha(
query=x,
value=x,
key=x)
x = self.add([x, attn_output])
x = self.layernorm(x)
return x
@tf.keras.saving.register_keras_serializable()
class FeedForward(tf.keras.layers.Layer):
def __init__(self, d_model, dff, dropout_rate=0.2):
super().__init__()
self.seq = tf.keras.Sequential([
tf.keras.layers.Dense(dff),
tf.keras.layers.PReLU(shared_axes=[1, 2]),
tf.keras.layers.Dense(d_model),
tf.keras.layers.Dropout(dropout_rate)
])
self.add = tf.keras.layers.Add()
self.layer_norm = tf.keras.layers.LayerNormalization()
def call(self, x):
x = self.add([x, self.seq(x)])
x = self.layer_norm(x)
return x
@tf.keras.saving.register_keras_serializable()
class EncoderLayer(tf.keras.layers.Layer):
def __init__(self, *, d_model, num_heads, dff, dropout_rate=0.2):
super().__init__()
self.self_attention = SelfAttention(num_heads, d_model)
self.ffn = FeedForward(d_model=d_model, dff=dff, dropout_rate=dropout_rate)
def call(self, x):
x = self.self_attention(x)
x = self.ffn(x)
return x
@tf.keras.saving.register_keras_serializable()
class FF_layer(tf.keras.layers.Layer):
def __init__(self, dim, dropout_rate):
super().__init__()
self.ff = tf.keras.layers.Dense(dim)
self.activation = tf.keras.layers.PReLU(shared_axes=[1])
self.dropout = tf.keras.layers.Dropout(dropout_rate)
def call(self, x):
x = self.ff(x)
x = self.activation(x)
x = self.dropout(x)
return x
@tf.keras.saving.register_keras_serializable()
class Encoder(tf.keras.Model):
def __init__(self, num_layers=4, d_model=24, num_heads=[8, 8, 8, 8, 8, 8],
dff=512, dims=[512, 256], dropout_rate=[0.2, 0.5], name='Encoder'):
super().__init__(name=name)
self.d_model = d_model
self.dims = dims
self.num_layers = num_layers
self.flatten = tf.keras.layers.Flatten()
self.enc_layers = [
EncoderLayer(d_model=self.d_model,
num_heads=num_heads[i],
dff=dff,
dropout_rate=dropout_rate[0])
for i in range(num_layers)]
self.ff = [FF_layer(dims[i], dropout_rate[1]) for i in range(len(dims))]
def call(self, x):
for i in range(self.num_layers):
x = self.enc_layers[i](x)
x = self.flatten(x)
for i in range(len(self.dims)):
x = self.ff[i](x)
return x # Shape `(batch_size, seq_len, d_model)
@tf.keras.saving.register_keras_serializable()
class PPO_model(tf.keras.Model):
def __init__(self, n_actions, num_layers=4, d_model=24, num_heads=[8, 8, 8, 8, 8, 8],
dff=512, dims=[512, 256], dropout_rate=[0.2, 0.5]):
super(PPO_model, self).__init__()
self.encoder = Encoder(num_layers, d_model, num_heads, dff, dims, dropout_rate)
self.actor_output_layer = tf.keras.layers.Dense(n_actions)
self.actor_activation = tf.keras.layers.Activation('softmax')
self.critic_output_layer = tf.keras.layers.Dense(1)
self.critic_activation = tf.keras.layers.Activation('linear')
def call(self, x):
x = self.encoder(x)
action = self.actor_output_layer(x)
action = self.actor_activation(action)
value = self.critic_output_layer(x)
value = self.critic_activation(value)
return action, value
@tf.keras.saving.register_keras_serializable()
class Critic(tf.keras.Model):
def __init__(self, num_layers=6, d_model=24, num_heads=[8, 8, 8, 8, 8, 8],
dff=512, dims=[512, 512], dropout_rate=[0.2, 0.5]):
super(Critic, self).__init__()
self.encoder = Encoder(num_layers, d_model, num_heads, dff, dims, dropout_rate)
self.output_layer = tf.keras.layers.Dense(1)
def call(self, x):
x = self.encoder(x)
x = self.output_layer(x)
return x
#@title Env
class DayTrade(gym.Env):
def __init__(self, name, day, window_size=90, open_index=13, close_index=16, start_offset=130, plotting=False, plotting_index=300):
assert len(day.shape) == 2
# spaces
self.action_space = gym.spaces.Box(low=-0, high=1, shape=(3, ), dtype=np.float32)
num_TH = 1000
self.observation_space = gym.spaces.Box(low=-num_TH, high=num_TH, shape=self.day_shape, dtype=np.float32)
def reset(self, seed=None, options=None):
super().reset(seed=seed, options=options)
observation = np.zeros((90, 24))
return observation
def step(self, action):
# print(np.argmax(action))
reward = 0
observation = np.zeros((90, 24))
# info = self._get_info()
return observation, reward, self._end#, info
def _get_info(self):
return dict(
total_reward=self._total_reward
)
def close(self):
plt.close()
from tf-keras.
This issue is stale because it has been open for 14 days with no activity. It will be closed if no further activity occurs. Thank you.
from tf-keras.
@lllllllllaa,
I request you to try the mentioned code on latest tensorflow2.15 and the keras3.0 version, Since the migration, there were many changes made, the error you are getting is due to one such change here cff6ac9
Thank you!
from tf-keras.
This issue is stale because it has been open for 14 days with no activity. It will be closed if no further activity occurs. Thank you.
from tf-keras.
This issue was closed because it has been inactive for 28 days. Please reopen if you'd like to work on this further.
from tf-keras.
Related Issues (20)
- Calling a model and using model.predict yields different results for the same input HOT 5
- StringLookup layer never releases memory on load HOT 3
- Add a dedicated file for Keras' security policy HOT 1
- Accurarcy() does not work, but 'accuracy' does HOT 8
- L1 penalty set small weights to 0 HOT 9
- `Cannot iterate over a scalar tensor.` with split_dataset HOT 4
- Add a dedicated file for Keras' security policy
- Feature: Flops calculation
- NotFoundError could not find registered transfer manager for platform Host -- check target linkage [Op:__inference__jit_compiled_convolution_op_26169] HOT 3
- Using Lambda layers to take different slices of a prevous layer's output causes earlier Lambda layers to be overwritten HOT 5
- Error while importing tf_keras HOT 11
- shape issue for y_pred for a custom made loss function HOT 3
- Add a target_width parameter to keras.utils.timeseries_dataset_from_array HOT 4
- UNIMPLEMENTED: Cast string to float is not supported; CANCELLED: Function was cancelled before it was started HOT 3
- TextVectorization: output_mode={multi_hot, count} promise int arrays but output floats
- Cloning a TextVectorization Layer with Split Function Doesn't Work HOT 6
- Mirrored strategy model.load_weights() failure HOT 2
- sparse_categorical_crossentropy with ignore_class=-1 makes loss to `nan` HOT 3
- Importing `tf_keras` to use Keras 2 in TensorFlow 2.16 fails HOT 9
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from tf-keras.