I was trying to run the dpo command using the provided command. I installed the muffin library as instructed, but was getting the following issue. Updating to latest transformer library did not help. I was wondering if you had run into any issue like this during your experiments. I am using 4 a100 with 80gb of memory
Traceback (most recent call last):
File "/home/ubuntu/muffin/./muffin/train/train_mem_muffin.py", line 13, in
train()
File "/home/ubuntu/muffin/muffin/train/train_muffin.py", line 473, in train
trainer.train()
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/transformers/trainer.py", line 1555, in train
return inner_training_loop(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/transformers/trainer.py", line 2725, in training_step
loss = self.compute_loss(model, inputs)
File "/home/ubuntu/muffin/muffin/train/trainers.py", line 189, in compute_loss
concatenated_logp = forward_DPO(model,
File "/home/ubuntu/muffin/muffin/train/trainers.py", line 123, in forward_DPO
output = model(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/accelerate/utils/operations.py", line 581, in forward
return model_forward(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/accelerate/utils/operations.py", line 569, in call
return convert_to_fp32(self.model_forward(*args, **kwargs))
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 14, in decorate_autocast
return func(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 748, in forward
output = self._fsdp_wrapped_module(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
Traceback (most recent call last):
File "/home/ubuntu/muffin/./muffin/train/train_mem_muffin.py", line 13, in
return forward_call(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/accelerate/utils/operations.py", line 581, in forward
train()return model_forward(*args, **kwargs)
File "/home/ubuntu/muffin/muffin/train/train_muffin.py", line 473, in train
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/accelerate/utils/operations.py", line 569, in call
return convert_to_fp32(self.model_forward(*args, **kwargs))
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 14, in decorate_autocast
trainer.train()return func(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/transformers/trainer.py", line 1555, in train
File "/home/ubuntu/muffin/muffin/model/muffin.py", line 338, in forward
outputs = self.model(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/muffin/muffin/model/muffin.py", line 298, in forward
return inner_training_loop(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop
return super(Beit3LlavaLlamaModel, self).forward(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 912, in forward
layer_outputs = self._gradient_checkpointing_func(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 249, in checkpoint
return CheckpointFunction.apply(function, preserve, *args)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/autograd/function.py", line 506, in apply
tr_loss_step = self.training_step(model, inputs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/transformers/trainer.py", line 2725, in training_step
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 107, in forward
outputs = run_function(*args)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 748, in forward
output = self._fsdp_wrapped_module(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
loss = self.compute_loss(model, inputs)
File "/home/ubuntu/muffin/muffin/train/trainers.py", line 189, in compute_loss
concatenated_logp = forward_DPO(model,
File "/home/ubuntu/muffin/muffin/train/trainers.py", line 123, in forward_DPO
output = model(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 672, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/accelerate/utils/operations.py", line 581, in forward
return model_forward(*args, **kwargs)return forward_call(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/accelerate/utils/operations.py", line 569, in call
TypeError: forward() got an unexpected keyword argument 'position_ids'
return convert_to_fp32(self.model_forward(*args, **kwargs))
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 14, in decorate_autocast
return func(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 748, in forward
output = self._fsdp_wrapped_module(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/accelerate/utils/operations.py", line 581, in forward
return model_forward(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/accelerate/utils/operations.py", line 569, in call
return convert_to_fp32(self.model_forward(*args, **kwargs))
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 14, in decorate_autocast
return func(*args, **kwargs)
File "/home/ubuntu/muffin/muffin/model/muffin.py", line 338, in forward
outputs = self.model(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/muffin/muffin/model/muffin.py", line 298, in forward
return super(Beit3LlavaLlamaModel, self).forward(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 912, in forward
layer_outputs = self._gradient_checkpointing_func(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 249, in checkpoint
return CheckpointFunction.apply(function, preserve, *args)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/autograd/function.py", line 506, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 107, in forward
outputs = run_function(*args)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 748, in forward
output = self._fsdp_wrapped_module(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 672, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
TypeError: forward() got an unexpected keyword argument 'position_ids'
Traceback (most recent call last):
File "/home/ubuntu/muffin/./muffin/train/train_mem_muffin.py", line 13, in
train()
File "/home/ubuntu/muffin/muffin/train/train_muffin.py", line 473, in train
trainer.train()
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/transformers/trainer.py", line 1555, in train
return inner_training_loop(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/transformers/trainer.py", line 2725, in training_step
loss = self.compute_loss(model, inputs)
File "/home/ubuntu/muffin/muffin/train/trainers.py", line 189, in compute_loss
concatenated_logp = forward_DPO(model,
File "/home/ubuntu/muffin/muffin/train/trainers.py", line 123, in forward_DPO
output = model(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/accelerate/utils/operations.py", line 581, in forward
return model_forward(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/accelerate/utils/operations.py", line 569, in call
return convert_to_fp32(self.model_forward(*args, **kwargs))
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 14, in decorate_autocast
return func(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 748, in forward
output = self._fsdp_wrapped_module(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/accelerate/utils/operations.py", line 581, in forward
return model_forward(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/accelerate/utils/operations.py", line 569, in call
return convert_to_fp32(self.model_forward(*args, **kwargs))
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 14, in decorate_autocast
return func(*args, **kwargs)
File "/home/ubuntu/muffin/muffin/model/muffin.py", line 338, in forward
outputs = self.model(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/muffin/muffin/model/muffin.py", line 298, in forward
return super(Beit3LlavaLlamaModel, self).forward(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 912, in forward
layer_outputs = self._gradient_checkpointing_func(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 249, in checkpoint
return CheckpointFunction.apply(function, preserve, *args)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/autograd/function.py", line 506, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 107, in forward
outputs = run_function(*args)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 748, in forward
output = self._fsdp_wrapped_module(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 672, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
TypeError: forward() got an unexpected keyword argument 'position_ids'
Traceback (most recent call last):
File "/home/ubuntu/muffin/./muffin/train/train_mem_muffin.py", line 13, in
train()
File "/home/ubuntu/muffin/muffin/train/train_muffin.py", line 473, in train
trainer.train()
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/transformers/trainer.py", line 1555, in train
return inner_training_loop(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/transformers/trainer.py", line 2725, in training_step
loss = self.compute_loss(model, inputs)
File "/home/ubuntu/muffin/muffin/train/trainers.py", line 189, in compute_loss
concatenated_logp = forward_DPO(model,
File "/home/ubuntu/muffin/muffin/train/trainers.py", line 123, in forward_DPO
output = model(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/accelerate/utils/operations.py", line 581, in forward
return model_forward(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/accelerate/utils/operations.py", line 569, in call
return convert_to_fp32(self.model_forward(*args, **kwargs))
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 14, in decorate_autocast
return func(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 748, in forward
output = self._fsdp_wrapped_module(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/accelerate/utils/operations.py", line 581, in forward
return model_forward(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/accelerate/utils/operations.py", line 569, in call
return convert_to_fp32(self.model_forward(*args, **kwargs))
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 14, in decorate_autocast
return func(*args, **kwargs)
File "/home/ubuntu/muffin/muffin/model/muffin.py", line 338, in forward
outputs = self.model(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/muffin/muffin/model/muffin.py", line 298, in forward
return super(Beit3LlavaLlamaModel, self).forward(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 912, in forward
layer_outputs = self._gradient_checkpointing_func(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 249, in checkpoint
return CheckpointFunction.apply(function, preserve, *args)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/autograd/function.py", line 506, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 107, in forward
outputs = run_function(*args)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 748, in forward
output = self._fsdp_wrapped_module(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 672, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/home/ubuntu/mambaforge-pypy3/envs/muffin/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
TypeError: forward() got an unexpected keyword argument 'position_ids'