The question is, in both cases, the reward list and the action list is same. But the observation list in stack/unstack cases are different. I attached the first observation in stack/unstack case. I wonder what the reason is? Could you please explain it? Thanks in advance.
import gym
import d4rl_atari
import pickle
import numpy as np
import matplotlib.pyplot as plt
def test_stack():
env_s = gym.make('ms-pacman-expert-v0', stack=True) # -v{0, 1, 2, 3, 4} for datasets with the other random seeds
env_s.reset()
dataset_s = env_s.get_dataset()
ob_s = dataset_s['observations'][0]
# print(len(ob_s)) 1m
# print(ob_s[0].shape) (4,84,84)
re_s = dataset_s['rewards']
# print(re_s.shape) (1m,)
env = gym.make('ms-pacman-expert-v0', stack=False)
env.reset()
dataset = env.get_dataset()
ob = dataset['observations'][0,:]
re = dataset['rewards']
print(np.sum(re != re_s)) # 0, so reward sequence is same
a_s = dataset_s['actions']
a = dataset['actions']
print(np.sum(a_s != a)) #0, so action sequence is same
o_s = ob_s[0,:]
plt.imshow(o_s)
plt.show()
o = ob[0,:]
plt.imshow(o)
plt.show()
# print(np.sum(o_s != o))
if __name__ == '__main__':
test_stack()