Closed
Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
import pandas as pd
import numpy as np
from scipy.spatial import Delaunay
n_verts = 10
pts = np.random.randint(1, n_verts, (n_verts, 2))
tris = Delaunay(pts)
A = pd.DataFrame(tris.simplices)
B = pd.DataFrame(pts)
pd.merge(A, B, left_on=[0], right_on=[0])
Issue Description
The example raises a KeyError on Windows and Pandas 2.0.
The example work if I convert A = pd.DataFrame(tris.simplices.astype(int))
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Cell In[26], line 3
1 A = pd.DataFrame(tris.simplices)
2 B = pd.DataFrame(pts)
----> 3 pd.merge(A, B, left_on=[0], right_on=[0])
File ~\AppData\Local\mambaforge\envs\tmp\Lib\site-packages\pandas\core\reshape\merge.py:156, in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
125 @Substitution("\nleft : DataFrame or named Series")
126 @Appender(_merge_doc, indents=0)
127 def merge(
(...)
140 validate: str | None = None,
141 ) -> DataFrame:
142 op = _MergeOperation(
143 left,
144 right,
(...)
154 validate=validate,
155 )
--> 156 return op.get_result(copy=copy)
File ~\AppData\Local\mambaforge\envs\tmp\Lib\site-packages\pandas\core\reshape\merge.py:803, in _MergeOperation.get_result(self, copy)
800 if self.indicator:
801 self.left, self.right = self._indicator_pre_merge(self.left, self.right)
--> 803 join_index, left_indexer, right_indexer = self._get_join_info()
805 result = self._reindex_and_concat(
806 join_index, left_indexer, right_indexer, copy=copy
807 )
808 result = result.__finalize__(self, method=self._merge_type)
File ~\AppData\Local\mambaforge\envs\tmp\Lib\site-packages\pandas\core\reshape\merge.py:1051, in _MergeOperation._get_join_info(self)
1047 join_index, right_indexer, left_indexer = _left_join_on_index(
1048 right_ax, left_ax, self.right_join_keys, sort=self.sort
1049 )
1050 else:
-> 1051 (left_indexer, right_indexer) = self._get_join_indexers()
1053 if self.right_index:
1054 if len(self.left) > 0:
File ~\AppData\Local\mambaforge\envs\tmp\Lib\site-packages\pandas\core\reshape\merge.py:1024, in _MergeOperation._get_join_indexers(self)
1022 def _get_join_indexers(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
1023 """return the join indexers"""
-> 1024 return get_join_indexers(
1025 self.left_join_keys, self.right_join_keys, sort=self.sort, how=self.how
1026 )
File ~\AppData\Local\mambaforge\envs\tmp\Lib\site-packages\pandas\core\reshape\merge.py:1645, in get_join_indexers(left_keys, right_keys, sort, how, **kwargs)
1640 # get left & right join labels and num. of levels at each location
1641 mapped = (
1642 _factorize_keys(left_keys[n], right_keys[n], sort=sort, how=how)
1643 for n in range(len(left_keys))
1644 )
-> 1645 zipped = zip(*mapped)
1646 llab, rlab, shape = (list(x) for x in zipped)
1648 # get flat i8 keys from label lists
File ~\AppData\Local\mambaforge\envs\tmp\Lib\site-packages\pandas\core\reshape\merge.py:1642, in <genexpr>(.0)
1638 return _get_no_sort_one_missing_indexer(left_n, False)
1640 # get left & right join labels and num. of levels at each location
1641 mapped = (
-> 1642 _factorize_keys(left_keys[n], right_keys[n], sort=sort, how=how)
1643 for n in range(len(left_keys))
1644 )
1645 zipped = zip(*mapped)
1646 llab, rlab, shape = (list(x) for x in zipped)
File ~\AppData\Local\mambaforge\envs\tmp\Lib\site-packages\pandas\core\reshape\merge.py:2382, in _factorize_keys(lk, rk, sort, how)
2378 # error: Item "ndarray" of "Union[Any, ndarray]" has no attribute
2379 # "_values_for_factorize"
2380 rk, _ = rk._values_for_factorize() # type: ignore[union-attr]
-> 2382 klass, lk, rk = _convert_arrays_and_get_rizer_klass(lk, rk)
2384 rizer = klass(max(len(lk), len(rk)))
2386 if isinstance(lk, BaseMaskedArray):
File ~\AppData\Local\mambaforge\envs\tmp\Lib\site-packages\pandas\core\reshape\merge.py:2449, in _convert_arrays_and_get_rizer_klass(lk, rk)
2447 klass = _factorizers[lk.dtype.type] # type: ignore[index]
2448 else:
-> 2449 klass = _factorizers[lk.dtype.type]
2451 else:
2452 klass = libhashtable.ObjectFactorizer
KeyError: <class 'numpy.intc'>
Expected Behavior
No KeyError and that the merge happens.
Installed Versions
INSTALLED VERSIONS
------------------
commit : 478d340667831908b5b4bf09a2787a11a14560c9
python : 3.11.2.final.0
python-bits : 64
OS : Windows
OS-release : 10
Version : 10.0.19045
machine : AMD64
processor : Intel64 Family 6 Model 140 Stepping 1, GenuineIntel
byteorder : little
LC_ALL : None
LANG : None
LOCALE : English_United Kingdom.1252
pandas : 2.0.0
numpy : 1.24.2
pytz : 2023.3
dateutil : 2.8.2
setuptools : 67.6.1
pip : 23.0.1
Cython : None
pytest : None
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : 3.1.2
IPython : 8.12.0
pandas_datareader: None
bs4 : 4.12.0
bottleneck : None
brotli :
fastparquet : None
fsspec : None
gcsfs : None
matplotlib : 3.7.1
numba : None
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : None
pyreadstat : None
pyxlsb : None
s3fs : None
scipy : 1.10.1
snappy : None
sqlalchemy : None
tables : None
tabulate : None
xarray : None
xlrd : None
zstandard : None
tzdata : 2023.3
qtpy : None
pyqt5 : None