Comments (5)
Can you post the full traceback? I don't think that issue is coming from the pantab codebase. Would be good to see the root
from pantab.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/arrays/datetimes.py:2236, in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object, allow_mixed)
2235 try:
-> 2236 values, tz_parsed = conversion.datetime_to_datetime64(data.ravel("K"))
2237 # If tzaware, these values represent unix timestamps, so we
2238 # return them as i8 to distinguish from wall times
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/_libs/tslibs/conversion.pyx:360, in pandas._libs.tslibs.conversion.datetime_to_datetime64()
TypeError: Unrecognized value type: <class 'datetime.date'>
During handling of the above exception, another exception occurred:
OutOfBoundsDatetime Traceback (most recent call last)
Input In [3], in <cell line: 3>()
1 filename = "/Users/fiore/Downloads/DIS ENT - OU Activity Indicator Results (All Data)_09062022.hyper"
2 #filename = "/Users/fiore/Downloads/DIS ENT - OU Activity Indicator Results (All Data)_11012022.hyper"
----> 3 df = pantab.frames_from_hyper(filename)
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pantab/_reader.py:136, in frames_from_hyper(source, hyper_process, use_float_na)
134 for schema in connection.catalog.get_schema_names():
135 for table in connection.catalog.get_table_names(schema=schema):
--> 136 result[table] = _read_table(
137 connection=connection,
138 table=table,
139 use_float_na=use_float_na,
140 )
142 return result
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pantab/_reader.py:85, in _read_table(connection, table, use_float_na)
83 query = f"SELECT * from {table}"
84 with connection.execute_query(query) as result:
---> 85 return _read_query_result(result, dtypes, use_float_na)
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pantab/_reader.py:57, in _read_query_result(result, dtypes, use_float_na)
54 if v == "date":
55 dtypes[k] = "datetime64[ns]"
---> 57 df = df.astype(dtypes)
58 df = df.fillna(value=np.nan) # Replace any appearances of None
60 return df
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/generic.py:5898, in NDFrame.astype(self, dtype, copy, errors)
5896 res_col = col.copy() if copy else col
5897 else:
-> 5898 res_col = col.astype(dtype=cdt, copy=copy, errors=errors)
5899 results.append(res_col)
5901 elif is_extension_array_dtype(dtype) and self.ndim > 1:
5902 # GH 18099/22869: columnwise conversion to extension dtype
5903 # GH 24704: use iloc to handle duplicate column names
5904 # TODO(EA2D): special case not needed with 2D EAs
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/generic.py:5912, in NDFrame.astype(self, dtype, copy, errors)
5905 results = [
5906 self.iloc[:, i].astype(dtype, copy=copy)
5907 for i in range(len(self.columns))
5908 ]
5910 else:
5911 # else, only a single dtype is given
-> 5912 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
5913 return self._constructor(new_data).__finalize__(self, method="astype")
5915 # GH 33113: handle empty frame or series
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/internals/managers.py:419, in BaseBlockManager.astype(self, dtype, copy, errors)
418 def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T:
--> 419 return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/internals/managers.py:304, in BaseBlockManager.apply(self, f, align_keys, ignore_failures, **kwargs)
302 applied = b.apply(f, **kwargs)
303 else:
--> 304 applied = getattr(b, f)(**kwargs)
305 except (TypeError, NotImplementedError):
306 if not ignore_failures:
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/internals/blocks.py:580, in Block.astype(self, dtype, copy, errors)
562 """
563 Coerce to the new dtype.
564
(...)
576 Block
577 """
578 values = self.values
--> 580 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
582 new_values = maybe_coerce_values(new_values)
583 newb = self.make_block(new_values)
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:1292, in astype_array_safe(values, dtype, copy, errors)
1289 dtype = dtype.numpy_dtype
1291 try:
-> 1292 new_values = astype_array(values, dtype, copy=copy)
1293 except (ValueError, TypeError):
1294 # e.g. astype_nansafe can fail on object-dtype of strings
1295 # trying to convert to float
1296 if errors == "ignore":
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:1237, in astype_array(values, dtype, copy)
1234 values = values.astype(dtype, copy=copy)
1236 else:
-> 1237 values = astype_nansafe(values, dtype, copy=copy)
1239 # in pandas we don't store numpy str dtypes, so convert to object
1240 if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str):
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:1163, in astype_nansafe(arr, dtype, copy, skipna)
1159 elif is_datetime64_dtype(dtype):
1160 from pandas import to_datetime
1162 return astype_nansafe(
-> 1163 to_datetime(arr).values,
1164 dtype,
1165 copy=copy,
1166 )
1167 elif is_timedelta64_dtype(dtype):
1168 from pandas import to_timedelta
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/tools/datetimes.py:1063, in to_datetime(arg, errors, dayfirst, yearfirst, utc, format, exact, unit, infer_datetime_format, origin, cache)
1061 elif is_list_like(arg):
1062 try:
-> 1063 cache_array = _maybe_cache(arg, format, cache, convert_listlike)
1064 except OutOfBoundsDatetime:
1065 # caching attempts to create a DatetimeIndex, which may raise
1066 # an OOB. If that's the desired behavior, then just reraise...
1067 if errors == "raise":
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/tools/datetimes.py:197, in _maybe_cache(arg, format, cache, convert_listlike)
195 unique_dates = unique(arg)
196 if len(unique_dates) < len(arg):
--> 197 cache_dates = convert_listlike(unique_dates, format)
198 cache_array = Series(cache_dates, index=unique_dates)
199 # GH#39882 and GH#35888 in case of None and NaT we get duplicates
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/tools/datetimes.py:402, in _convert_listlike_datetimes(arg, format, name, tz, unit, errors, infer_datetime_format, dayfirst, yearfirst, exact)
400 assert format is None or infer_datetime_format
401 utc = tz == "utc"
--> 402 result, tz_parsed = objects_to_datetime64ns(
403 arg,
404 dayfirst=dayfirst,
405 yearfirst=yearfirst,
406 utc=utc,
407 errors=errors,
408 require_iso8601=require_iso8601,
409 allow_object=True,
410 )
412 if tz_parsed is not None:
413 # We can take a shortcut since the datetime64 numpy array
414 # is in UTC
415 dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed))
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/arrays/datetimes.py:2242, in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object, allow_mixed)
2240 return values.view("i8"), tz_parsed
2241 except (ValueError, TypeError):
-> 2242 raise err
2244 if tz_parsed is not None:
2245 # We can take a shortcut since the datetime64 numpy array
2246 # is in UTC
2247 # Return i8 values to denote unix timestamps
2248 return result.view("i8"), tz_parsed
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/core/arrays/datetimes.py:2224, in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object, allow_mixed)
2222 order: Literal["F", "C"] = "F" if flags.f_contiguous else "C"
2223 try:
-> 2224 result, tz_parsed = tslib.array_to_datetime(
2225 data.ravel("K"),
2226 errors=errors,
2227 utc=utc,
2228 dayfirst=dayfirst,
2229 yearfirst=yearfirst,
2230 require_iso8601=require_iso8601,
2231 allow_mixed=allow_mixed,
2232 )
2233 result = result.reshape(data.shape, order=order)
2234 except ValueError as err:
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/_libs/tslib.pyx:381, in pandas._libs.tslib.array_to_datetime()
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/_libs/tslib.pyx:608, in pandas._libs.tslib.array_to_datetime()
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/_libs/tslib.pyx:604, in pandas._libs.tslib.array_to_datetime()
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/_libs/tslib.pyx:481, in pandas._libs.tslib.array_to_datetime()
File ~/opt/anaconda3/envs/main/lib/python3.9/site-packages/pandas/_libs/tslibs/np_datetime.pyx:120, in pandas._libs.tslibs.np_datetime.check_dts_bounds()
OutOfBoundsDatetime: Out of bounds nanosecond timestamp: 9999-12-31 00:00:00
from pantab.
Great thanks. So looks like pantab/_reader.py:57 is where this all happens internally.
I think we could add a new keyword like datetime_errors
that gets passed to pandas at that point. Want to try a PR to add this?
from pantab.
Hi,
is by chance someone woking on this topic? Because, I have the same issue.
Cheers Pascal
from pantab.
This should be resolved with the pandas 4.0 release, which uses the arrow array format to store dates and therefore does not have the same limitations on date ranges that pandas historically had
from pantab.
Related Issues (20)
- RuntimeError: module compiled against API version 0x10 but this version of numpy is 0xe .
- Use the DataFrame Protocol instead of pandas internals HOT 1
- Remove pandas 1.2 compat code HOT 2
- Unsupported Nullable Data Type VARCHAR(500) when Reading From Hyper HOT 6
- ModuleNotFoundError: No module named 'pantab' with version 3.0.3 HOT 4
- pantab 4.0 checklist HOT 2
- 4.X reader regressions HOT 2
- Segmentation Fault Error with frame_to_hyper Method HOT 4
- Polars Time Conversion Fails HOT 1
- Segfault on writes with pandas 2.2
- Remove Dependency on tableauhyperapi HOT 2
- Free memory after inserting data HOT 8
- Add support for writing INT8 values HOT 1
- RecordBatchReader to_hyper yields Process finished with exit code HOT 1
- `Date32` dates are off when written to hyper file HOT 2
- Use Arrow Name instead of enum value
- Failure to import 4.x HOT 7
- 4.1 release
- Make frames_from_hyper not generate hyperd.log HOT 1
- Old version installed HOT 1
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from pantab.