I'm trying to read a .hyper file using <div class="highlight highlight-source-pyth

<div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clip

OutOfBoundsDatetime Error about pantab HOT 5 CLOSED

nejohnson2 commented on June 10, 2024

OutOfBoundsDatetime Error

from pantab.

Comments (5)

WillAyd commented on June 10, 2024

Can you post the full traceback? I don't think that issue is coming from the pantab codebase. Would be good to see the root

from pantab.

nejohnson2 commented on June 10, 2024

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/arrays/datetimes.py:2236, in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object, allow_mixed)
   2235 try:
-> 2236     values, tz_parsed = conversion.datetime_to_datetime64(data.ravel("K"))
   2237     # If tzaware, these values represent unix timestamps, so we
   2238     #  return them as i8 to distinguish from wall times

File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/_libs/tslibs/conversion.pyx:360, in pandas._libs.tslibs.conversion.datetime_to_datetime64()

TypeError: Unrecognized value type: <class 'datetime.date'>

During handling of the above exception, another exception occurred:

OutOfBoundsDatetime                       Traceback (most recent call last)
Input In [3], in <cell line: 3>()
      1 filename = "/Users/fiore/Downloads/DIS ENT - OU Activity Indicator Results (All Data)_09062022.hyper"
      2 #filename = "/Users/fiore/Downloads/DIS ENT - OU Activity Indicator Results (All Data)_11012022.hyper"
----> 3 df = pantab.frames_from_hyper(filename)

File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pantab/_reader.py:136, in frames_from_hyper(source, hyper_process, use_float_na)
    134             for schema in connection.catalog.get_schema_names():
    135                 for table in connection.catalog.get_table_names(schema=schema):
--> 136                     result[table] = _read_table(
    137                         connection=connection,
    138                         table=table,
    139                         use_float_na=use_float_na,
    140                     )
    142 return result

File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pantab/_reader.py:85, in _read_table(connection, table, use_float_na)
     83 query = f"SELECT * from {table}"
     84 with connection.execute_query(query) as result:
---> 85     return _read_query_result(result, dtypes, use_float_na)

File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pantab/_reader.py:57, in _read_query_result(result, dtypes, use_float_na)
     54     if v == "date":
     55         dtypes[k] = "datetime64[ns]"
---> 57 df = df.astype(dtypes)
     58 df = df.fillna(value=np.nan)  # Replace any appearances of None
     60 return df

File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/generic.py:5898, in NDFrame.astype(self, dtype, copy, errors)
   5896             res_col = col.copy() if copy else col
   5897         else:
-> 5898             res_col = col.astype(dtype=cdt, copy=copy, errors=errors)
   5899         results.append(res_col)
   5901 elif is_extension_array_dtype(dtype) and self.ndim > 1:
   5902     # GH 18099/22869: columnwise conversion to extension dtype
   5903     # GH 24704: use iloc to handle duplicate column names
   5904     # TODO(EA2D): special case not needed with 2D EAs

File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/generic.py:5912, in NDFrame.astype(self, dtype, copy, errors)
   5905     results = [
   5906         self.iloc[:, i].astype(dtype, copy=copy)
   5907         for i in range(len(self.columns))
   5908     ]
   5910 else:
   5911     # else, only a single dtype is given
-> 5912     new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
   5913     return self._constructor(new_data).__finalize__(self, method="astype")
   5915 # GH 33113: handle empty frame or series

File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/internals/managers.py:419, in BaseBlockManager.astype(self, dtype, copy, errors)
    418 def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T:
--> 419     return self.apply("astype", dtype=dtype, copy=copy, errors=errors)

File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/internals/managers.py:304, in BaseBlockManager.apply(self, f, align_keys, ignore_failures, **kwargs)
    302         applied = b.apply(f, **kwargs)
    303     else:
--> 304         applied = getattr(b, f)(**kwargs)
    305 except (TypeError, NotImplementedError):
    306     if not ignore_failures:

File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/internals/blocks.py:580, in Block.astype(self, dtype, copy, errors)
    562 """
    563 Coerce to the new dtype.
    564 
   (...)
    576 Block
    577 """
    578 values = self.values
--> 580 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
    582 new_values = maybe_coerce_values(new_values)
    583 newb = self.make_block(new_values)

File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:1292, in astype_array_safe(values, dtype, copy, errors)
   1289     dtype = dtype.numpy_dtype
   1291 try:
-> 1292     new_values = astype_array(values, dtype, copy=copy)
   1293 except (ValueError, TypeError):
   1294     # e.g. astype_nansafe can fail on object-dtype of strings
   1295     #  trying to convert to float
   1296     if errors == "ignore":

File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:1237, in astype_array(values, dtype, copy)
   1234     values = values.astype(dtype, copy=copy)
   1236 else:
-> 1237     values = astype_nansafe(values, dtype, copy=copy)
   1239 # in pandas we don't store numpy str dtypes, so convert to object
   1240 if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str):

File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:1163, in astype_nansafe(arr, dtype, copy, skipna)
   1159 elif is_datetime64_dtype(dtype):
   1160     from pandas import to_datetime
   1162     return astype_nansafe(
-> 1163         to_datetime(arr).values,
   1164         dtype,
   1165         copy=copy,
   1166     )
   1167 elif is_timedelta64_dtype(dtype):
   1168     from pandas import to_timedelta

File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/tools/datetimes.py:1063, in to_datetime(arg, errors, dayfirst, yearfirst, utc, format, exact, unit, infer_datetime_format, origin, cache)
   1061 elif is_list_like(arg):
   1062     try:
-> 1063         cache_array = _maybe_cache(arg, format, cache, convert_listlike)
   1064     except OutOfBoundsDatetime:
   1065         # caching attempts to create a DatetimeIndex, which may raise
   1066         # an OOB. If that's the desired behavior, then just reraise...
   1067         if errors == "raise":

File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/tools/datetimes.py:197, in _maybe_cache(arg, format, cache, convert_listlike)
    195 unique_dates = unique(arg)
    196 if len(unique_dates) < len(arg):
--> 197     cache_dates = convert_listlike(unique_dates, format)
    198     cache_array = Series(cache_dates, index=unique_dates)
    199     # GH#39882 and GH#35888 in case of None and NaT we get duplicates

File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/tools/datetimes.py:402, in _convert_listlike_datetimes(arg, format, name, tz, unit, errors, infer_datetime_format, dayfirst, yearfirst, exact)
    400 assert format is None or infer_datetime_format
    401 utc = tz == "utc"
--> 402 result, tz_parsed = objects_to_datetime64ns(
    403     arg,
    404     dayfirst=dayfirst,
    405     yearfirst=yearfirst,
    406     utc=utc,
    407     errors=errors,
    408     require_iso8601=require_iso8601,
    409     allow_object=True,
    410 )
    412 if tz_parsed is not None:
    413     # We can take a shortcut since the datetime64 numpy array
    414     # is in UTC
    415     dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed))

File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/arrays/datetimes.py:2242, in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object, allow_mixed)
   2240         return values.view("i8"), tz_parsed
   2241     except (ValueError, TypeError):
-> 2242         raise err
   2244 if tz_parsed is not None:
   2245     # We can take a shortcut since the datetime64 numpy array
   2246     #  is in UTC
   2247     # Return i8 values to denote unix timestamps
   2248     return result.view("i8"), tz_parsed

File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/arrays/datetimes.py:2224, in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object, allow_mixed)
   2222 order: Literal["F", "C"] = "F" if flags.f_contiguous else "C"
   2223 try:
-> 2224     result, tz_parsed = tslib.array_to_datetime(
   2225         data.ravel("K"),
   2226         errors=errors,
   2227         utc=utc,
   2228         dayfirst=dayfirst,
   2229         yearfirst=yearfirst,
   2230         require_iso8601=require_iso8601,
   2231         allow_mixed=allow_mixed,
   2232     )
   2233     result = result.reshape(data.shape, order=order)
   2234 except ValueError as err:

File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/_libs/tslib.pyx:381, in pandas._libs.tslib.array_to_datetime()

File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/_libs/tslib.pyx:608, in pandas._libs.tslib.array_to_datetime()

File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/_libs/tslib.pyx:604, in pandas._libs.tslib.array_to_datetime()

File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/_libs/tslib.pyx:481, in pandas._libs.tslib.array_to_datetime()

File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/_libs/tslibs/np_datetime.pyx:120, in pandas._libs.tslibs.np_datetime.check_dts_bounds()

OutOfBoundsDatetime: Out of bounds nanosecond timestamp: 9999-12-31 00:00:00

from pantab.

WillAyd commented on June 10, 2024

Great thanks. So looks like pantab/_reader.py:57 is where this all happens internally.

I think we could add a new keyword like datetime_errors that gets passed to pandas at that point. Want to try a PR to add this?

from pantab.

pdillens commented on June 10, 2024

Hi,
is by chance someone woking on this topic? Because, I have the same issue.

Cheers Pascal

from pantab.

WillAyd commented on June 10, 2024

This should be resolved with the pandas 4.0 release, which uses the arrow array format to store dates and therefore does not have the same limitations on date ranges that pandas historically had

from pantab.

OutOfBoundsDatetime Error about pantab HOT 5 CLOSED

Comments (5)

Related Issues (20)

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent

Jobs