I have a script that makes a clustering model and when i'm trying to extract data to bigquery with the function write_to_model
it fails after 10 minutes with the log:
---------------------------------------------------------------------------
ConnectionResetError Traceback (most recent call last)
File ~\anaconda3\envs\dbt-venv\lib\site-packages\urllib3\connectionpool.py:703, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
702 # Make the request on the httplib connection object.
--> 703 httplib_response = self._make_request(
704 conn,
705 method,
706 url,
707 timeout=timeout_obj,
708 body=body,
709 headers=headers,
710 chunked=chunked,
711 )
713 # If we're going to release the connection in ``finally:``, then
714 # the response doesn't need to know about the connection. Otherwise
715 # it will also try to release it and we'll have a double-release
716 # mess.
File ~\anaconda3\envs\dbt-venv\lib\site-packages\urllib3\connectionpool.py:398, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
397 else:
--> 398 conn.request(method, url, **httplib_request_kw)
400 # We are swallowing BrokenPipeError (errno.EPIPE) since the server is
401 # legitimately able to close the connection after sending a valid response.
402 # With this behaviour, the received response is still readable.
File ~\anaconda3\envs\dbt-venv\lib\site-packages\urllib3\connection.py:239, in HTTPConnection.request(self, method, url, body, headers)
238 headers["User-Agent"] = _get_default_user_agent()
--> 239 super(HTTPConnection, self).request(method, url, body=body, headers=headers)
File ~\anaconda3\envs\dbt-venv\lib\http\client.py:1285, in HTTPConnection.request(self, method, url, body, headers, encode_chunked)
1284 """Send a complete request to the server."""
-> 1285 self._send_request(method, url, body, headers, encode_chunked)
File ~\anaconda3\envs\dbt-venv\lib\http\client.py:1331, in HTTPConnection._send_request(self, method, url, body, headers, encode_chunked)
1330 body = _encode(body, 'body')
-> 1331 self.endheaders(body, encode_chunked=encode_chunked)
File ~\anaconda3\envs\dbt-venv\lib\http\client.py:1280, in HTTPConnection.endheaders(self, message_body, encode_chunked)
1279 raise CannotSendHeader()
-> 1280 self._send_output(message_body, encode_chunked=encode_chunked)
File ~\anaconda3\envs\dbt-venv\lib\http\client.py:1079, in HTTPConnection._send_output(self, message_body, encode_chunked)
1077 chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \
1078 + b'\r\n'
-> 1079 self.send(chunk)
1081 if encode_chunked and self._http_vsn == 11:
1082 # end chunked transfer
File ~\anaconda3\envs\dbt-venv\lib\http\client.py:1001, in HTTPConnection.send(self, data)
1000 try:
-> 1001 self.sock.sendall(data)
1002 except TypeError:
File ~\anaconda3\envs\dbt-venv\lib\ssl.py:1204, in SSLSocket.sendall(self, data, flags)
1203 while count < amount:
-> 1204 v = self.send(byte_view[count:])
1205 count += v
File ~\anaconda3\envs\dbt-venv\lib\ssl.py:1173, in SSLSocket.send(self, data, flags)
1170 raise ValueError(
1171 "non-zero flags not allowed in calls to send() on %s" %
1172 self.__class__)
-> 1173 return self._sslobj.write(data)
1174 else:
ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host
During handling of the above exception, another exception occurred:
ProtocolError Traceback (most recent call last)
File ~\anaconda3\envs\dbt-venv\lib\site-packages\requests\adapters.py:440, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
439 if not chunked:
--> 440 resp = conn.urlopen(
441 method=request.method,
442 url=url,
443 body=request.body,
444 headers=request.headers,
445 redirect=False,
446 assert_same_host=False,
447 preload_content=False,
448 decode_content=False,
449 retries=self.max_retries,
450 timeout=timeout
451 )
453 # Send the request.
454 else:
File ~\anaconda3\envs\dbt-venv\lib\site-packages\urllib3\connectionpool.py:785, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
783 e = ProtocolError("Connection aborted.", e)
--> 785 retries = retries.increment(
786 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
787 )
788 retries.sleep()
File ~\anaconda3\envs\dbt-venv\lib\site-packages\urllib3\util\retry.py:550, in Retry.increment(self, method, url, response, error, _pool, _stacktrace)
549 if read is False or not self._is_method_retryable(method):
--> 550 raise six.reraise(type(error), error, _stacktrace)
551 elif read is not None:
File ~\anaconda3\envs\dbt-venv\lib\site-packages\urllib3\packages\six.py:769, in reraise(tp, value, tb)
768 if value.__traceback__ is not tb:
--> 769 raise value.with_traceback(tb)
770 raise value
File ~\anaconda3\envs\dbt-venv\lib\site-packages\urllib3\connectionpool.py:703, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
702 # Make the request on the httplib connection object.
--> 703 httplib_response = self._make_request(
704 conn,
705 method,
706 url,
707 timeout=timeout_obj,
708 body=body,
709 headers=headers,
710 chunked=chunked,
711 )
713 # If we're going to release the connection in ``finally:``, then
714 # the response doesn't need to know about the connection. Otherwise
715 # it will also try to release it and we'll have a double-release
716 # mess.
File ~\anaconda3\envs\dbt-venv\lib\site-packages\urllib3\connectionpool.py:398, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
397 else:
--> 398 conn.request(method, url, **httplib_request_kw)
400 # We are swallowing BrokenPipeError (errno.EPIPE) since the server is
401 # legitimately able to close the connection after sending a valid response.
402 # With this behaviour, the received response is still readable.
File ~\anaconda3\envs\dbt-venv\lib\site-packages\urllib3\connection.py:239, in HTTPConnection.request(self, method, url, body, headers)
238 headers["User-Agent"] = _get_default_user_agent()
--> 239 super(HTTPConnection, self).request(method, url, body=body, headers=headers)
File ~\anaconda3\envs\dbt-venv\lib\http\client.py:1285, in HTTPConnection.request(self, method, url, body, headers, encode_chunked)
1284 """Send a complete request to the server."""
-> 1285 self._send_request(method, url, body, headers, encode_chunked)
File ~\anaconda3\envs\dbt-venv\lib\http\client.py:1331, in HTTPConnection._send_request(self, method, url, body, headers, encode_chunked)
1330 body = _encode(body, 'body')
-> 1331 self.endheaders(body, encode_chunked=encode_chunked)
File ~\anaconda3\envs\dbt-venv\lib\http\client.py:1280, in HTTPConnection.endheaders(self, message_body, encode_chunked)
1279 raise CannotSendHeader()
-> 1280 self._send_output(message_body, encode_chunked=encode_chunked)
File ~\anaconda3\envs\dbt-venv\lib\http\client.py:1079, in HTTPConnection._send_output(self, message_body, encode_chunked)
1077 chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \
1078 + b'\r\n'
-> 1079 self.send(chunk)
1081 if encode_chunked and self._http_vsn == 11:
1082 # end chunked transfer
File ~\anaconda3\envs\dbt-venv\lib\http\client.py:1001, in HTTPConnection.send(self, data)
1000 try:
-> 1001 self.sock.sendall(data)
1002 except TypeError:
File ~\anaconda3\envs\dbt-venv\lib\ssl.py:1204, in SSLSocket.sendall(self, data, flags)
1203 while count < amount:
-> 1204 v = self.send(byte_view[count:])
1205 count += v
File ~\anaconda3\envs\dbt-venv\lib\ssl.py:1173, in SSLSocket.send(self, data, flags)
1170 raise ValueError(
1171 "non-zero flags not allowed in calls to send() on %s" %
1172 self.__class__)
-> 1173 return self._sslobj.write(data)
1174 else:
ProtocolError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
File ~\anaconda3\envs\dbt-venv\lib\site-packages\google\api_core\retry.py:190, in retry_target(target, predicate, sleep_generator, deadline, on_error)
189 try:
--> 190 return target()
192 # pylint: disable=broad-except
193 # This function explicitly must deal with broad exceptions.
File ~\anaconda3\envs\dbt-venv\lib\site-packages\google\cloud\_http\__init__.py:482, in JSONConnection.api_request(self, method, path, query_params, data, content_type, headers, api_base_url, api_version, expect_json, _target_object, timeout, extra_api_info)
480 content_type = "application/json"
--> 482 response = self._make_request(
483 method=method,
484 url=url,
485 data=data,
486 content_type=content_type,
487 headers=headers,
488 target_object=_target_object,
489 timeout=timeout,
490 extra_api_info=extra_api_info,
491 )
493 if not 200 <= response.status_code < 300:
File ~\anaconda3\envs\dbt-venv\lib\site-packages\google\cloud\_http\__init__.py:341, in JSONConnection._make_request(self, method, url, data, content_type, headers, target_object, timeout, extra_api_info)
339 headers["User-Agent"] = self.user_agent
--> 341 return self._do_request(
342 method, url, headers, data, target_object, timeout=timeout
343 )
File ~\anaconda3\envs\dbt-venv\lib\site-packages\google\cloud\_http\__init__.py:379, in JSONConnection._do_request(self, method, url, headers, data, target_object, timeout)
348 """Low-level helper: perform the actual API request over HTTP.
349
350 Allows batch context managers to override and defer a request.
(...)
377 :returns: The HTTP response.
378 """
--> 379 return self.http.request(
380 url=url, method=method, headers=headers, data=data, timeout=timeout
381 )
File ~\anaconda3\envs\dbt-venv\lib\site-packages\google\auth\transport\requests.py:484, in AuthorizedSession.request(self, method, url, data, headers, max_allowed_time, timeout, **kwargs)
483 with TimeoutGuard(remaining_time) as guard:
--> 484 response = super(AuthorizedSession, self).request(
485 method,
486 url,
487 data=data,
488 headers=request_headers,
489 timeout=timeout,
490 **kwargs
491 )
492 remaining_time = guard.remaining_timeout
File ~\anaconda3\envs\dbt-venv\lib\site-packages\requests\sessions.py:529, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
528 send_kwargs.update(settings)
--> 529 resp = self.send(prep, **send_kwargs)
531 return resp
File ~\anaconda3\envs\dbt-venv\lib\site-packages\requests\sessions.py:645, in Session.send(self, request, **kwargs)
644 # Send the request
--> 645 r = adapter.send(request, **kwargs)
647 # Total elapsed time of the request (approximately)
File ~\anaconda3\envs\dbt-venv\lib\site-packages\requests\adapters.py:501, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
500 except (ProtocolError, socket.error) as err:
--> 501 raise ConnectionError(err, request=request)
503 except MaxRetryError as e:
ConnectionError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))
The above exception was the direct cause of the following exception:
RetryError Traceback (most recent call last)
File ~\anaconda3\envs\dbt-venv\lib\site-packages\dbt\adapters\bigquery\connections.py:174, in BigQueryConnectionManager.exception_handler(self, sql)
173 try:
--> 174 yield
176 except google.cloud.exceptions.BadRequest as e:
File ~\anaconda3\envs\dbt-venv\lib\site-packages\dbt\adapters\bigquery\connections.py:549, in BigQueryConnectionManager._retry_and_handle(self, msg, conn, fn)
548 with self.exception_handler(msg):
--> 549 return retry.retry_target(
550 target=fn,
551 predicate=_ErrorCounter(self.get_retries(conn)).count_error,
552 sleep_generator=self._retry_generator(),
553 deadline=None,
554 on_error=reopen_conn_on_error)
File ~\anaconda3\envs\dbt-venv\lib\site-packages\google\api_core\retry.py:190, in retry_target(target, predicate, sleep_generator, deadline, on_error)
189 try:
--> 190 return target()
192 # pylint: disable=broad-except
193 # This function explicitly must deal with broad exceptions.
File ~\anaconda3\envs\dbt-venv\lib\site-packages\dbt\adapters\bigquery\connections.py:378, in BigQueryConnectionManager.raw_execute.<locals>.fn()
377 def fn():
--> 378 return self._query_and_results(client, sql, conn, job_params)
File ~\anaconda3\envs\dbt-venv\lib\site-packages\dbt\adapters\bigquery\connections.py:534, in BigQueryConnectionManager._query_and_results(self, client, sql, conn, job_params, timeout)
533 job_config = google.cloud.bigquery.QueryJobConfig(**job_params)
--> 534 query_job = client.query(sql, job_config=job_config)
535 iterator = query_job.result(timeout=timeout)
File ~\anaconda3\envs\dbt-venv\lib\site-packages\google\cloud\bigquery\client.py:3390, in Client.query(self, query, job_config, job_id, job_id_prefix, location, project, retry, timeout, job_retry)
3388 return query_job
-> 3390 future = do_query()
3391 # The future might be in a failed state now, but if it's
3392 # unrecoverable, we'll find out when we ask for it's result, at which
3393 # point, we may retry.
File ~\anaconda3\envs\dbt-venv\lib\site-packages\google\cloud\bigquery\client.py:3367, in Client.query.<locals>.do_query()
3366 try:
-> 3367 query_job._begin(retry=retry, timeout=timeout)
3368 except core_exceptions.Conflict as create_exc:
3369 # The thought is if someone is providing their own job IDs and they get
3370 # their job ID generation wrong, this could end up returning results for
3371 # the wrong query. We thus only try to recover if job ID was not given.
File ~\anaconda3\envs\dbt-venv\lib\site-packages\google\cloud\bigquery\job\query.py:1298, in QueryJob._begin(self, client, retry, timeout)
1297 try:
-> 1298 super(QueryJob, self)._begin(client=client, retry=retry, timeout=timeout)
1299 except exceptions.GoogleAPICallError as exc:
File ~\anaconda3\envs\dbt-venv\lib\site-packages\google\cloud\bigquery\job\base.py:510, in _AsyncJob._begin(self, client, retry, timeout)
509 span_attributes = {"path": path}
--> 510 api_response = client._call_api(
511 retry,
512 span_name="BigQuery.job.begin",
513 span_attributes=span_attributes,
514 job_ref=self,
515 method="POST",
516 path=path,
517 data=self.to_api_repr(),
518 timeout=timeout,
519 )
520 self._set_properties(api_response)
File ~\anaconda3\envs\dbt-venv\lib\site-packages\google\cloud\bigquery\client.py:782, in Client._call_api(self, retry, span_name, span_attributes, job_ref, headers, **kwargs)
779 with create_span(
780 name=span_name, attributes=span_attributes, client=self, job_ref=job_ref
781 ):
--> 782 return call()
784 return call()
File ~\anaconda3\envs\dbt-venv\lib\site-packages\google\api_core\retry.py:283, in Retry.__call__.<locals>.retry_wrapped_func(*args, **kwargs)
280 sleep_generator = exponential_sleep_generator(
281 self._initial, self._maximum, multiplier=self._multiplier
282 )
--> 283 return retry_target(
284 target,
285 self._predicate,
286 sleep_generator,
287 self._deadline,
288 on_error=on_error,
289 )
File ~\anaconda3\envs\dbt-venv\lib\site-packages\google\api_core\retry.py:205, in retry_target(target, predicate, sleep_generator, deadline, on_error)
204 if deadline_datetime <= now:
--> 205 raise exceptions.RetryError(
206 "Deadline of {:.1f}s exceeded while calling target function".format(
207 deadline
208 ),
209 last_exc,
210 ) from last_exc
211 else:
RetryError: Deadline of 600.0s exceeded while calling target function, last exception: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))
During handling of the above exception, another exception occurred:
RuntimeException Traceback (most recent call last)
c:\Users\new user\Documents\dbt\dbt-models\fal_scripts\clustering.ipynb Cell 3' in <cell line: 1>()
----> 1 faldbt.write_to_model(df,'fct_superapp_clustering', mode='overwrite')
File ~\anaconda3\envs\dbt-venv\lib\site-packages\fal\telemetry\telemetry.py:338, in log_call.<locals>._log_call.<locals>.wrapper(*func_args, **func_kwargs)
335 start = datetime.datetime.now()
337 try:
--> 338 result = func(*func_args, **func_kwargs)
339 except Exception as e:
340 log_api(
341 action=f"{action}_error",
342 total_runtime=str(datetime.datetime.now() - start),
(...)
347 },
348 )
File ~\anaconda3\envs\dbt-venv\lib\site-packages\faldbt\project.py:506, in FalDbt.write_to_model(self, data, target_model_name, target_package_name, dtype, mode)
496 lib.write_target(
497 data,
498 self.project_dir,
(...)
502 profile_target=self._profile_target,
503 )
505 elif mode.lower().strip() == WriteToSourceModeEnum.OVERWRITE.value:
--> 506 lib.overwrite_target(
507 data,
508 self.project_dir,
509 self.profiles_dir,
510 target_model,
511 dtype,
512 profile_target=self._profile_target,
513 )
515 else:
516 raise Exception(f"write_to_model mode `{mode}` not supported")
File ~\anaconda3\envs\dbt-venv\lib\site-packages\faldbt\lib.py:185, in overwrite_target(data, project_dir, profiles_dir, target, dtype, profile_target)
179 relation = _build_table_from_target(target)
181 temporal_relation = _build_table_from_parts(
182 relation.database, relation.schema, f"{relation.identifier}__f__"
183 )
--> 185 results = _write_relation(
186 data,
187 project_dir,
188 profiles_dir,
189 temporal_relation,
190 dtype,
191 profile_target=profile_target,
192 )
193 try:
194 _replace_relation(
195 project_dir,
196 profiles_dir,
(...)
199 profile_target=profile_target,
200 )
File ~\anaconda3\envs\dbt-venv\lib\site-packages\faldbt\lib.py:265, in _write_relation(data, project_dir, profiles_dir, relation, dtype, profile_target)
259 _clean_cache(project_dir, profiles_dir, profile_target=profile_target)
261 insert_stmt = Insert(alchemy_table, values=row_dicts).compile(
262 bind=engine, compile_kwargs={"literal_binds": True}
263 )
--> 265 _, result = _execute_sql(
266 project_dir,
267 profiles_dir,
268 six.text_type(insert_stmt).strip(),
269 profile_target=profile_target,
270 )
271 return result
File ~\anaconda3\envs\dbt-venv\lib\site-packages\faldbt\lib.py:88, in _execute_sql(project_dir, profiles_dir, sql, profile_target)
86 result = None
87 with adapter.connection_named(name):
---> 88 response, execute_result = adapter.execute(sql, auto_begin=True, fetch=True)
90 table = ResultTable(
91 column_names=list(execute_result.column_names),
92 rows=[list(row) for row in execute_result],
93 )
95 result = RemoteRunResult(
96 raw_sql=sql,
97 compiled_sql=sql,
(...)
102 generated_at=datetime.utcnow(),
103 )
File ~\anaconda3\envs\dbt-venv\lib\site-packages\dbt\adapters\base\impl.py:225, in BaseAdapter.execute(self, sql, auto_begin, fetch)
211 @available.parse(lambda *a, **k: ('', empty_table()))
212 def execute(
213 self, sql: str, auto_begin: bool = False, fetch: bool = False
214 ) -> Tuple[Union[str, AdapterResponse], agate.Table]:
215 """Execute the given SQL. This is a thin wrapper around
216 ConnectionManager.execute.
217
(...)
223 :rtype: Tuple[Union[str, AdapterResponse], agate.Table]
224 """
--> 225 return self.connections.execute(
226 sql=sql,
227 auto_begin=auto_begin,
228 fetch=fetch
229 )
File ~\anaconda3\envs\dbt-venv\lib\site-packages\dbt\adapters\bigquery\connections.py:389, in BigQueryConnectionManager.execute(self, sql, auto_begin, fetch)
387 sql = self._add_query_comment(sql)
388 # auto_begin is ignored on bigquery, and only included for consistency
--> 389 query_job, iterator = self.raw_execute(sql, fetch=fetch)
391 if fetch:
392 table = self.get_table_from_response(iterator)
File ~\anaconda3\envs\dbt-venv\lib\site-packages\dbt\adapters\bigquery\connections.py:380, in BigQueryConnectionManager.raw_execute(self, sql, fetch, use_legacy_sql)
377 def fn():
378 return self._query_and_results(client, sql, conn, job_params)
--> 380 query_job, iterator = self._retry_and_handle(msg=sql, conn=conn, fn=fn)
382 return query_job, iterator
File ~\anaconda3\envs\dbt-venv\lib\site-packages\dbt\adapters\bigquery\connections.py:549, in BigQueryConnectionManager._retry_and_handle(self, msg, conn, fn)
546 return
548 with self.exception_handler(msg):
--> 549 return retry.retry_target(
550 target=fn,
551 predicate=_ErrorCounter(self.get_retries(conn)).count_error,
552 sleep_generator=self._retry_generator(),
553 deadline=None,
554 on_error=reopen_conn_on_error)
File ~\anaconda3\envs\dbt-venv\lib\contextlib.py:137, in _GeneratorContextManager.__exit__(self, typ, value, traceback)
135 value = typ()
136 try:
--> 137 self.gen.throw(typ, value, traceback)
138 except StopIteration as exc:
139 # Suppress StopIteration *unless* it's the same exception that
140 # was passed to throw(). This prevents a StopIteration
141 # raised inside the "with" statement from being suppressed.
142 return exc is not value
File ~\anaconda3\envs\dbt-venv\lib\site-packages\dbt\adapters\bigquery\connections.py:206, in BigQueryConnectionManager.exception_handler(self, sql)
204 if BQ_QUERY_JOB_SPLIT in exc_message:
205 exc_message = exc_message.split(BQ_QUERY_JOB_SPLIT)[0].strip()
--> 206 raise RuntimeException(exc_message)
RuntimeException: Runtime Error
Deadline of 600.0s exceeded while calling target function, last exception: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))