Which version of deepspeed are you using, I am using version 0.7.7 but it reports an error in Trainer.
Traceback (most recent call last):
File "run_ner.py", line 411, in <module>
main()
File "run_ner.py", line 346, in main
trainer.train(Traceback (most recent call last):
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/transformers/trainer.py", line 1113, in train
File "run_ner.py", line 411, in <module>
Traceback (most recent call last):
File "run_ner.py", line 411, in <module>
Traceback (most recent call last):
File "run_ner.py", line 411, in <module>
deepspeed_engine, optimizer, lr_scheduler = deepspeed_init(
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/transformers/integrations.py", line 517, in deepspeed_init
main()
File "run_ner.py", line 346, in main
main()
File "run_ner.py", line 346, in main
model, optimizer, _, lr_scheduler = deepspeed.initialize(
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/__init__.py", line 125, in initialize
trainer.train(
trainer.train(
engine = DeepSpeedEngine(args=args,
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/runtime/engine.py", line 290, in __init__
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/transformers/trainer.py", line 1113, in train
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/transformers/trainer.py", line 1113, in train
main()
File "run_ner.py", line 346, in main
self._configure_distributed_model(model)
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/runtime/engine.py", line 1106, in _configure_distributed_model
trainer.train(
deepspeed_engine, optimizer, lr_scheduler = deepspeed_init( File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/transformers/trainer.py", line 1113, in train
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/transformers/integrations.py", line 517, in deepspeed_init
deepspeed_engine, optimizer, lr_scheduler = deepspeed_init(
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/transformers/integrations.py", line 517, in deepspeed_init
self._broadcast_model() model, optimizer, _, lr_scheduler = deepspeed.initialize(
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/runtime/engine.py", line 1022, in _broadcast_model
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/__init__.py", line 125, in initialize
model, optimizer, _, lr_scheduler = deepspeed.initialize(
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/__init__.py", line 125, in initialize
engine = DeepSpeedEngine(args=args,
engine = DeepSpeedEngine(args=args,deepspeed_engine, optimizer, lr_scheduler = deepspeed_init( File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/runtime/engine.py", line 290, in __init__
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/runtime/engine.py", line 290, in __init__
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/transformers/integrations.py", line 517, in deepspeed_init
groups._get_broadcast_src_rank(),
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/utils/groups.py", line 338, in _get_broadcast_src_rank
self._configure_distributed_model(model)
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/runtime/engine.py", line 1106, in _configure_distributed_model
self._configure_distributed_model(model)
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/runtime/engine.py", line 1106, in _configure_distributed_model
model, optimizer, _, lr_scheduler = deepspeed.initialize(
return dist.get_global_rank(_get_data_parallel_group(), 0) File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/__init__.py", line 125, in initialize
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/comm/__init__.py", line 22, in get_global_rank
engine = DeepSpeedEngine(args=args,if hasattr(torch.distributed.distributed_c10d, "get_global_rank"): self._broadcast_model()
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/runtime/engine.py", line 1022, in _broadcast_model
AttributeError File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/runtime/engine.py", line 290, in __init__
: module 'deepspeed.comm.torch' has no attribute 'distributed'
self._broadcast_model()
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/runtime/engine.py", line 1022, in _broadcast_model
self._configure_distributed_model(model)
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/runtime/engine.py", line 1106, in _configure_distributed_model
groups._get_broadcast_src_rank(),
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/utils/groups.py", line 338, in _get_broadcast_src_rank
groups._get_broadcast_src_rank(),
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/utils/groups.py", line 338, in _get_broadcast_src_rank
return dist.get_global_rank(_get_data_parallel_group(), 0)self._broadcast_model()
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/comm/__init__.py", line 22, in get_global_rank
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/runtime/engine.py", line 1022, in _broadcast_model
return dist.get_global_rank(_get_data_parallel_group(), 0)if hasattr(torch.distributed.distributed_c10d, "get_global_rank"):
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/comm/__init__.py", line 22, in get_global_rank
AttributeError: module 'deepspeed.comm.torch' has no attribute 'distributed'
if hasattr(torch.distributed.distributed_c10d, "get_global_rank"):
groups._get_broadcast_src_rank(),
AttributeError File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/utils/groups.py", line 338, in _get_broadcast_src_rank
: module 'deepspeed.comm.torch' has no attribute 'distributed'
return dist.get_global_rank(_get_data_parallel_group(), 0)
File "/home/songyiping/anaconda3/envs/diverse/lib/python3.8/site-packages/deepspeed/comm/__init__.py", line 22, in get_global_rank
if hasattr(torch.distributed.distributed_c10d, "get_global_rank"):
AttributeError: module 'deepspeed.comm.torch' has no attribute 'distributed'
I have not made any changes to the code and think there may be a problem with one of the library versions, so I hope you can help me.