Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
import pandas as pd
vals = [
pd.Timestamp(date)
for date in [
"2020-12-07",
"2020-12-08",
"2020-12-09",
"2020-12-10",
"2020-12-11",
"2020-12-12",
]
]
example = pd.DataFrame({
'start':vals[::2],
'stop':vals[1::2],
})
example['interval'] = example.apply(lambda x: pd.Interval(x.start, x.stop), axis=1)
example.to_parquet('example.prqt', engine='pyarrow')
df = pd.read_parquet('example.prqt')
Issue Description
pandas successfully writes df with column of dtype interval[datetime64]
either to parquet file or to pyarrow table but cannot read it back
In case of using numeric dtype ( for example)
vals = list(range(6))
to_parquet
and read_parquet
are working as intended
Probably connected with #43689 and #40652
Obtained error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
/tmp/ipykernel_71061/1505077241.py in <module>
----> 1 df = pd.read_parquet('example.prqt')
~/anaconda3/envs/pandas_issue/lib/python3.8/site-packages/pandas/io/parquet.py in read_parquet(path, engine, columns, storage_options, use_nullable_dtypes, **kwargs)
491 impl = get_engine(engine)
492
--> 493 return impl.read(
494 path,
495 columns=columns,
~/anaconda3/envs/pandas_issue/lib/python3.8/site-packages/pandas/io/parquet.py in read(self, path, columns, use_nullable_dtypes, storage_options, **kwargs)
238 )
239 try:
--> 240 result = self.api.parquet.read_table(
241 path_or_handle, columns=columns, **kwargs
242 ).to_pandas(**to_pandas_kwargs)
~/anaconda3/envs/pandas_issue/lib/python3.8/site-packages/pyarrow/array.pxi in pyarrow.lib._PandasConvertible.to_pandas()
~/anaconda3/envs/pandas_issue/lib/python3.8/site-packages/pyarrow/table.pxi in pyarrow.lib.Table._to_pandas()
~/anaconda3/envs/pandas_issue/lib/python3.8/site-packages/pyarrow/pandas_compat.py in table_to_blockmanager(options, table, categories, ignore_metadata, types_mapper)
787 _check_data_column_metadata_consistency(all_columns)
788 columns = _deserialize_column_index(table, all_columns, column_indexes)
--> 789 blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes)
790
791 axes = [columns, index]
~/anaconda3/envs/pandas_issue/lib/python3.8/site-packages/pyarrow/pandas_compat.py in _table_to_blocks(options, block_table, categories, extension_columns)
1135 result = pa.lib.table_to_blocks(options, block_table, categories,
1136 list(extension_columns.keys()))
-> 1137 return [_reconstruct_block(item, columns, extension_columns)
1138 for item in result]
1139
~/anaconda3/envs/pandas_issue/lib/python3.8/site-packages/pyarrow/pandas_compat.py in <listcomp>(.0)
1135 result = pa.lib.table_to_blocks(options, block_table, categories,
1136 list(extension_columns.keys()))
-> 1137 return [_reconstruct_block(item, columns, extension_columns)
1138 for item in result]
1139
~/anaconda3/envs/pandas_issue/lib/python3.8/site-packages/pyarrow/pandas_compat.py in _reconstruct_block(item, columns, extension_columns)
747 raise ValueError("This column does not support to be converted "
748 "to a pandas ExtensionArray")
--> 749 pd_ext_arr = pandas_dtype.__from_arrow__(arr)
750 block = _int.make_block(pd_ext_arr, placement=placement)
751 else:
~/anaconda3/envs/pandas_issue/lib/python3.8/site-packages/pandas/core/dtypes/dtypes.py in __from_arrow__(self, array)
1240 results = []
1241 for arr in chunks:
-> 1242 left = np.asarray(arr.storage.field("left"), dtype=self.subtype)
1243 right = np.asarray(arr.storage.field("right"), dtype=self.subtype)
1244 iarr = IntervalArray.from_arrays(left, right, closed=array.type.closed)
AttributeError: 'pyarrow.lib.StructArray' object has no attribute 'storage'
Expected Behavior
Should be possible to read columns of dtype interval[datetime64]
either from parquet or from pyarrow table which where created with pandas in the first place
Installed Versions
INSTALLED VERSIONS
commit : bb1f651
python : 3.8.11.final.0
python-bits : 64
OS : Linux
OS-release : 5.13.0-27-generic
Version : #29~20.04.1-Ubuntu SMP Fri Jan 14 00:32:30 UTC 2022
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : None
LANG : en_US.UTF-8
LOCALE : en_US.UTF-8
pandas : 1.4.0
numpy : 1.22.2
pytz : 2021.3
dateutil : 2.8.2
pip : 21.2.4
setuptools : 58.0.4
Cython : None
pytest : None
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : None
IPython : 7.31.1
pandas_datareader: None
bs4 : None
bottleneck : None
fastparquet : None
fsspec : None
gcsfs : None
matplotlib : None
numba : None
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : 7.0.0
pyreadstat : None
pyxlsb : None
s3fs : None
scipy : None
sqlalchemy : None
tables : None
tabulate : None
xarray : None
xlrd : None
xlwt : None
zstandard : None