Skip to content

ENH: Include column for ea comparison in asserters #50323

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Dec 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ Other enhancements
- :func:`timedelta_range` now supports a ``unit`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`49824`)
- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`)
- Added ``name`` parameter to :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_arrays` and :meth:`IntervalIndex.from_tuples` (:issue:`48911`)
- Improve exception message when using :func:`assert_frame_equal` on a :class:`DataFrame` to include the column that is compared (:issue:`50323`)
- Improved error message for :func:`merge_asof` when join-columns were duplicated (:issue:`50102`)
- Added :meth:`Index.infer_objects` analogous to :meth:`Series.infer_objects` (:issue:`50034`)
- Added ``copy`` parameter to :meth:`Series.infer_objects` and :meth:`DataFrame.infer_objects`, passing ``False`` will avoid making copies for series or columns that are already non-object or where no better dtype can be inferred (:issue:`50096`)
Expand Down
23 changes: 18 additions & 5 deletions pandas/_testing/asserters.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,7 @@ def assert_extension_array_equal(
check_exact: bool = False,
rtol: float = 1.0e-5,
atol: float = 1.0e-8,
obj: str = "ExtensionArray",
) -> None:
"""
Check that left and right ExtensionArrays are equal.
Expand All @@ -702,6 +703,11 @@ def assert_extension_array_equal(
Absolute tolerance. Only used when check_exact is False.

.. versionadded:: 1.1.0
obj : str, default 'ExtensionArray'
Specify object name being compared, internally used to show appropriate
assertion message.

.. versionadded:: 2.0.0

Notes
-----
Expand All @@ -719,7 +725,7 @@ def assert_extension_array_equal(
assert isinstance(left, ExtensionArray), "left is not an ExtensionArray"
assert isinstance(right, ExtensionArray), "right is not an ExtensionArray"
if check_dtype:
assert_attr_equal("dtype", left, right, obj="ExtensionArray")
assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}")

if (
isinstance(left, DatetimeLikeArrayMixin)
Expand All @@ -729,21 +735,24 @@ def assert_extension_array_equal(
# Avoid slow object-dtype comparisons
# np.asarray for case where we have a np.MaskedArray
assert_numpy_array_equal(
np.asarray(left.asi8), np.asarray(right.asi8), index_values=index_values
np.asarray(left.asi8),
np.asarray(right.asi8),
index_values=index_values,
obj=obj,
)
return

left_na = np.asarray(left.isna())
right_na = np.asarray(right.isna())
assert_numpy_array_equal(
left_na, right_na, obj="ExtensionArray NA mask", index_values=index_values
left_na, right_na, obj=f"{obj} NA mask", index_values=index_values
)

left_valid = left[~left_na].to_numpy(dtype=object)
right_valid = right[~right_na].to_numpy(dtype=object)
if check_exact:
assert_numpy_array_equal(
left_valid, right_valid, obj="ExtensionArray", index_values=index_values
left_valid, right_valid, obj=obj, index_values=index_values
)
else:
_testing.assert_almost_equal(
Expand All @@ -752,7 +761,7 @@ def assert_extension_array_equal(
check_dtype=bool(check_dtype),
rtol=rtol,
atol=atol,
obj="ExtensionArray",
obj=obj,
index_values=index_values,
)

Expand Down Expand Up @@ -909,6 +918,7 @@ def assert_series_equal(
right_values,
check_dtype=check_dtype,
index_values=np.asarray(left.index),
obj=str(obj),
)
else:
assert_numpy_array_equal(
Expand Down Expand Up @@ -955,6 +965,7 @@ def assert_series_equal(
atol=atol,
check_dtype=check_dtype,
index_values=np.asarray(left.index),
obj=str(obj),
)
elif is_extension_array_dtype_and_needs_i8_conversion(
left.dtype, right.dtype
Expand All @@ -964,6 +975,7 @@ def assert_series_equal(
right._values,
check_dtype=check_dtype,
index_values=np.asarray(left.index),
obj=str(obj),
)
elif needs_i8_conversion(left.dtype) and needs_i8_conversion(right.dtype):
# DatetimeArray or TimedeltaArray
Expand All @@ -972,6 +984,7 @@ def assert_series_equal(
right._values,
check_dtype=check_dtype,
index_values=np.asarray(left.index),
obj=str(obj),
)
else:
_testing.assert_almost_equal(
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/json/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def test_custom_asserts(self):
self.assert_frame_equal(a.to_frame(), a.to_frame())

b = pd.Series(data.take([0, 0, 1]))
msg = r"ExtensionArray are different"
msg = r"Series are different"
with pytest.raises(AssertionError, match=msg):
self.assert_series_equal(a, b)

Expand Down
33 changes: 33 additions & 0 deletions pandas/tests/util/test_assert_frame_equal.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,3 +366,36 @@ def test_assert_frame_equal_check_like_categorical_midx():
),
)
tm.assert_frame_equal(left, right, check_like=True)


def test_assert_frame_equal_ea_column_definition_in_exception_mask():
# GH#50323
df1 = DataFrame({"a": pd.Series([pd.NA, 1], dtype="Int64")})
df2 = DataFrame({"a": pd.Series([1, 1], dtype="Int64")})

msg = r'DataFrame.iloc\[:, 0\] \(column name="a"\) NA mask values are different'
with pytest.raises(AssertionError, match=msg):
tm.assert_frame_equal(df1, df2)


def test_assert_frame_equal_ea_column_definition_in_exception():
# GH#50323
df1 = DataFrame({"a": pd.Series([pd.NA, 1], dtype="Int64")})
df2 = DataFrame({"a": pd.Series([pd.NA, 2], dtype="Int64")})

msg = r'DataFrame.iloc\[:, 0\] \(column name="a"\) values are different'
with pytest.raises(AssertionError, match=msg):
tm.assert_frame_equal(df1, df2)

with pytest.raises(AssertionError, match=msg):
tm.assert_frame_equal(df1, df2, check_exact=True)


def test_assert_frame_equal_ts_column():
# GH#50323
df1 = DataFrame({"a": [pd.Timestamp("2019-12-31"), pd.Timestamp("2020-12-31")]})
df2 = DataFrame({"a": [pd.Timestamp("2020-12-31"), pd.Timestamp("2020-12-31")]})

msg = r'DataFrame.iloc\[:, 0\] \(column name="a"\) values are different'
with pytest.raises(AssertionError, match=msg):
tm.assert_frame_equal(df1, df2)
11 changes: 3 additions & 8 deletions pandas/tests/util/test_assert_series_equal.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas.core.dtypes.common import is_extension_array_dtype

import pandas as pd
from pandas import (
Categorical,
Expand Down Expand Up @@ -116,10 +114,7 @@ def test_less_precise(data1, data2, dtype, decimals):
s2 = Series([data2], dtype=dtype)

if decimals in (5, 10) or (decimals >= 3 and abs(data1 - data2) >= 0.0005):
if is_extension_array_dtype(dtype):
msg = "ExtensionArray are different"
else:
msg = "Series values are different"
msg = "Series values are different"
with pytest.raises(AssertionError, match=msg):
tm.assert_series_equal(s1, s2, rtol=rtol)
else:
Expand Down Expand Up @@ -237,9 +232,9 @@ def test_series_equal_categorical_values_mismatch(rtol):


def test_series_equal_datetime_values_mismatch(rtol):
msg = """numpy array are different
msg = """Series are different

numpy array values are different \\(100.0 %\\)
Series values are different \\(100.0 %\\)
\\[index\\]: \\[0, 1, 2\\]
\\[left\\]: \\[1514764800000000000, 1514851200000000000, 1514937600000000000\\]
\\[right\\]: \\[1549065600000000000, 1549152000000000000, 1549238400000000000\\]"""
Expand Down