This is a great repository which provide finetune feature of ChatGLM3. But when I followed the process in the README to run tokenize_dataset_rows.py scripts it reported these errors:
`python tokenize_dataset_rows.py --jsonl_path ./alpaca_data.jsonl --save_path ./alpaca --max_seq_length 200
Downloading and preparing dataset generator/default to C:/Users/yt758/.cache/huggingface/datasets/generator/default-10116cbfdb8a1e8b/0.0.0...
HF google storage unreachable. Downloading and preparing it from source
Generating train split: 0 examples [00:00, ? examples/s]'(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /model/resolve/main/tokenizer_config.json (Caused by ProxyError('Unable to connect to proxy', SSLError(SSLZeroReturnError(6, 'TLS/SSL connection has been closed (EOF) (_ssl.c:1131)'))))"), '(Request ID: 09963958-bc38-4941-bfac-92e4491eae09)')' thrown while requesting HEAD https://huggingface.co/model/resolve/main/tokenizer_config.json
Generating train split: 0 examples [00:02, ? examples/s]urllib3.exceptions.SSLError: TLS/SSL connection has been closed (EOF) (_ssl.c:1131)
The above exception was the direct cause of the following exception:
urllib3.exceptions.ProxyError: ('Unable to connect to proxy', SSLError(SSLZeroReturnError(6, 'TLS/SSL connection has been closed (EOF) (_ssl.c:1131)')))
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\software\anaconda3\envs\t001\lib\site-packages\requests\adapters.py", line 486, in send
resp = conn.urlopen(
File "C:\software\anaconda3\envs\t001\lib\site-packages\urllib3\connectionpool.py", line 845, in urlopen
retries = retries.increment(
File "C:\software\anaconda3\envs\t001\lib\site-packages\urllib3\util\retry.py", line 515, in increment
raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /model/resolve/main/tokenizer_config.json (Caused by ProxyError('Unable to connect to proxy', SSLError(SSLZeroReturnError(6, 'TLS/SSL connection has been closed (EOF) (_ssl.c:1131)'))))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\software\anaconda3\envs\t001\lib\site-packages\datasets\builder.py", line 1608, in _prepare_split_single
for key, record in generator:
File "C:\software\anaconda3\envs\t001\lib\site-packages\datasets\packaged_modules\generator\generator.py", line 30, in _generate_examples
for idx, ex in enumerate(self.config.generator(**gen_kwargs)):
File "tokenize_dataset_rows.py", line 21, in read_jsonl
tokenizer = transformers.AutoTokenizer.from_pretrained(
File "C:\software\anaconda3\envs\t001\lib\site-packages\transformers\models\auto\tokenization_auto.py", line 643, in from_pretrained
tokenizer_config = get_tokenizer_config(pretrained_model_name_or_path, **kwargs)
File "C:\software\anaconda3\envs\t001\lib\site-packages\transformers\models\auto\tokenization_auto.py", line 487, in get_tokenizer_config
resolved_config_file = cached_file(
File "C:\software\anaconda3\envs\t001\lib\site-packages\transformers\utils\hub.py", line 417, in cached_file
resolved_file = hf_hub_download(
File "C:\software\anaconda3\envs\t001\lib\site-packages\huggingface_hub\utils_validators.py", line 118, in _inner_fn
return fn(*args, **kwargs)
File "C:\software\anaconda3\envs\t001\lib\site-packages\huggingface_hub\file_download.py", line 1233, in hf_hub_download
metadata = get_hf_file_metadata(
File "C:\software\anaconda3\envs\t001\lib\site-packages\huggingface_hub\utils_validators.py", line 118, in _inner_fn
return fn(*args, **kwargs)
File "C:\software\anaconda3\envs\t001\lib\site-packages\huggingface_hub\file_download.py", line 1613, in get_hf_file_metadata
r = _request_wrapper(
File "C:\software\anaconda3\envs\t001\lib\site-packages\huggingface_hub\file_download.py", line 418, in _request_wrapper
response = _request_wrapper(
File "C:\software\anaconda3\envs\t001\lib\site-packages\huggingface_hub\file_download.py", line 453, in _request_wrapper
return http_backoff(
File "C:\software\anaconda3\envs\t001\lib\site-packages\huggingface_hub\utils_http.py", line 274, in http_backoff
raise err
File "C:\software\anaconda3\envs\t001\lib\site-packages\huggingface_hub\utils_http.py", line 258, in http_backoff
response = session.request(method=method, url=url, **kwargs)
File "C:\software\anaconda3\envs\t001\lib\site-packages\requests\sessions.py", line 589, in request
resp = self.send(prep, **send_kwargs)
File "C:\software\anaconda3\envs\t001\lib\site-packages\requests\sessions.py", line 703, in send
r = adapter.send(request, **kwargs)
File "C:\software\anaconda3\envs\t001\lib\site-packages\huggingface_hub\utils_http.py", line 63, in send
return super().send(request, *args, **kwargs)
File "C:\software\anaconda3\envs\t001\lib\site-packages\requests\adapters.py", line 513, in send
raise ProxyError(e, request=request)
requests.exceptions.ProxyError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /model/resolve/main/tokenizer_config.json (Caused by ProxyError('Unable to connect to proxy', SSLError(SSLZeroReturnError(6, 'TLS/SSL connection has been closed (EOF) (_ssl.c:1131)'))))"), '(Request ID: 09963958-bc38-4941-bfac-92e4491eae09)')
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "tokenize_dataset_rows.py", line 48, in
main()
File "tokenize_dataset_rows.py", line 42, in main
dataset = datasets.Dataset.from_generator(
File "C:\software\anaconda3\envs\t001\lib\site-packages\datasets\arrow_dataset.py", line 1012, in from_generator
return GeneratorDatasetInputStream(
File "C:\software\anaconda3\envs\t001\lib\site-packages\datasets\io\generator.py", line 47, in read
self.builder.download_and_prepare(
File "C:\software\anaconda3\envs\t001\lib\site-packages\datasets\builder.py", line 872, in download_and_prepare
self._download_and_prepare(
File "C:\software\anaconda3\envs\t001\lib\site-packages\datasets\builder.py", line 1649, in _download_and_prepare
super()._download_and_prepare(
File "C:\software\anaconda3\envs\t001\lib\site-packages\datasets\builder.py", line 967, in _download_and_prepare
self._prepare_split(split_generator, **prepare_split_kwargs)
File "C:\software\anaconda3\envs\t001\lib\site-packages\datasets\builder.py", line 1488, in _prepare_split
for job_id, done, content in self._prepare_split_single(
File "C:\software\anaconda3\envs\t001\lib\site-packages\datasets\builder.py", line 1644, in _prepare_split_single
raise DatasetGenerationError("An error occurred while generating the dataset") from e
datasets.builder.DatasetGenerationError: An error occurred while generating the dataset`