All the weights of T5ForConditionalGeneration were initialized from the model checkpoint at t5-small.
If your task is similar to the task the model of the checkpoint was trained on, you can already use T5ForConditionalGeneration for predictions without further training.
1% 1/78 [00:09<11:42, 9.13s/ba]
Traceback (most recent call last):
File "/content/drive/MyDrive/Finetune/run.py", line 619, in
main()
File "/content/drive/MyDrive/Finetune/run.py", line 401, in main
load_from_cache_file=not data_args.overwrite_cache,
File "/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py", line 2053, in map
desc=desc,
File "/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py", line 503, in wrapper
out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py", line 470, in wrapper
out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/datasets/fingerprint.py", line 406, in wrapper
out = func(self, *args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py", line 2408, in _map_single
offset=offset,
File "/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py", line 2291, in apply_function_on_filtered_inputs
function(*fn_args, effective_indices, **fn_kwargs) if with_indices else function(*fn_args, **fn_kwargs)
File "/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py", line 1991, in decorated
result = f(decorated_item, *args, **kwargs)
File "/content/drive/MyDrive/Finetune/run.py", line 380, in preprocess_function
labels = tokenizer(targets, max_length=max_target_length, padding=padding, truncation=True)
File "/content/drive/MyDrive/Finetune/transformers/tokenization_utils_base.py", line 2335, in call
**kwargs,
File "/content/drive/MyDrive/Finetune/transformers/tokenization_utils_base.py", line 2520, in batch_encode_plus
**kwargs,
File "/content/drive/MyDrive/Finetune/transformers/tokenization_utils_fast.py", line 378, in _batch_encode_plus
is_pretokenized=is_split_into_words,
TypeError: TextEncodeInput must be Union[TextInputSequence, Tuple[InputSequence, InputSequence]]