Skip to content

Commit 8045c2d

Browse files
authored
DEPS: Clean unused dependencies (#61331)
* DEPS: Clean unused dependencies * Sync remove upper pin on xarray * Remove other dependencies unused * Fix xarray failures post xarray unpin * Fix downstream test * xfail based on version
1 parent 2e141aa commit 8045c2d

17 files changed

+39
-60
lines changed

‎ci/deps/actions-310-minimum_versions.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ dependencies:
2626

2727
# optional dependencies
2828
- beautifulsoup4=4.12.3
29-
- blosc=1.21.3
3029
- bottleneck=1.3.6
3130
- fastparquet=2024.2.0
3231
- fsspec=2024.2.0

‎ci/deps/actions-310.yaml

+1-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ dependencies:
2424

2525
# optional dependencies
2626
- beautifulsoup4>=4.12.3
27-
- blosc>=1.21.3
2827
- bottleneck>=1.3.6
2928
- fastparquet>=2024.2.0
3029
- fsspec>=2024.2.0
@@ -52,7 +51,7 @@ dependencies:
5251
- scipy>=1.12.0
5352
- sqlalchemy>=2.0.0
5453
- tabulate>=0.9.0
55-
- xarray>=2024.1.1, <=2024.9.0
54+
- xarray>=2024.1.1
5655
- xlrd>=2.0.1
5756
- xlsxwriter>=3.2.0
5857
- zstandard>=0.22.0

‎ci/deps/actions-311-downstream_compat.yaml

+1-4
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ dependencies:
2525

2626
# optional dependencies
2727
- beautifulsoup4>=4.12.3
28-
- blosc>=1.21.3
2928
- bottleneck>=1.3.6
3029
- fastparquet>=2024.2.0
3130
- fsspec>=2024.2.0
@@ -53,7 +52,7 @@ dependencies:
5352
- scipy>=1.12.0
5453
- sqlalchemy>=2.0.0
5554
- tabulate>=0.9.0
56-
- xarray>=2024.1.1, <=2024.9.0
55+
- xarray>=2024.1.1
5756
- xlrd>=2.0.1
5857
- xlsxwriter>=3.2.0
5958
- zstandard>=0.22.0
@@ -63,14 +62,12 @@ dependencies:
6362
- cftime
6463
- dask
6564
- ipython
66-
- geopandas-base
6765
- seaborn
6866
- scikit-learn
6967
- statsmodels
7068
- coverage
7169
- pandas-datareader
7270
- pyyaml
73-
- py
7471
- pip:
7572
- adbc-driver-postgresql>=0.10.0
7673
- adbc-driver-sqlite>=0.8.0

‎ci/deps/actions-311.yaml

+1-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ dependencies:
2424

2525
# optional dependencies
2626
- beautifulsoup4>=4.12.3
27-
- blosc>=1.21.3
2827
- bottleneck>=1.3.6
2928
- fastparquet>=2024.2.0
3029
- fsspec>=2024.2.0
@@ -52,7 +51,7 @@ dependencies:
5251
- scipy>=1.12.0
5352
- sqlalchemy>=2.0.0
5453
- tabulate>=0.9.0
55-
- xarray>=2024.1.1, <=2024.9.0
54+
- xarray>=2024.1.1
5655
- xlrd>=2.0.1
5756
- xlsxwriter>=3.2.0
5857
- zstandard>=0.22.0

‎ci/deps/actions-312.yaml

+1-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ dependencies:
2424

2525
# optional dependencies
2626
- beautifulsoup4>=4.12.3
27-
- blosc>=1.21.3
2827
- bottleneck>=1.3.6
2928
- fastparquet>=2024.2.0
3029
- fsspec>=2024.2.0
@@ -52,7 +51,7 @@ dependencies:
5251
- scipy>=1.12.0
5352
- sqlalchemy>=2.0.0
5453
- tabulate>=0.9.0
55-
- xarray>=2024.1.1, <=2024.9.0
54+
- xarray>=2024.1.1
5655
- xlrd>=2.0.1
5756
- xlsxwriter>=3.2.0
5857
- zstandard>=0.22.0

‎ci/deps/actions-313.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ dependencies:
5252
- scipy>=1.12.0
5353
- sqlalchemy>=2.0.0
5454
- tabulate>=0.9.0
55-
- xarray>=2024.1.1, <=2024.9.0
55+
- xarray>=2024.1.1
5656
- xlrd>=2.0.1
5757
- xlsxwriter>=3.2.0
5858
- zstandard>=0.22.0

‎doc/source/getting_started/install.rst

-1
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,6 @@ Installable with ``pip install "pandas[hdf5, parquet, feather, spss, excel]"``
305305
Dependency Minimum Version pip extra Notes
306306
====================================================== ================== ================ ==========================================================
307307
`PyTables <https://github.com/PyTables/PyTables>`__ 3.8.0 hdf5 HDF5-based reading / writing
308-
`blosc <https://github.com/Blosc/c-blosc>`__ 1.21.3 hdf5 Compression for HDF5; only available on ``conda``
309308
`zlib <https://github.com/madler/zlib>`__ hdf5 Compression for HDF5
310309
`fastparquet <https://github.com/dask/fastparquet>`__ 2024.2.0 - Parquet reading / writing (pyarrow is default)
311310
`pyarrow <https://github.com/apache/arrow>`__ 10.0.1 parquet, feather Parquet, ORC, and feather reading / writing

‎environment.yml

+1-4
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ dependencies:
2727

2828
# optional dependencies
2929
- beautifulsoup4>=4.12.3
30-
- blosc
3130
- bottleneck>=1.3.6
3231
- fastparquet>=2024.2.0
3332
- fsspec>=2024.2.0
@@ -55,7 +54,7 @@ dependencies:
5554
- scipy>=1.12.0
5655
- sqlalchemy>=2.0.0
5756
- tabulate>=0.9.0
58-
- xarray>=2024.1.1, <=2024.9.0
57+
- xarray>=2024.1.1
5958
- xlrd>=2.0.1
6059
- xlsxwriter>=3.2.0
6160
- zstandard>=0.22.0
@@ -83,8 +82,6 @@ dependencies:
8382

8483
# documentation
8584
- gitpython # obtain contributors from git for whatsnew
86-
- gitdb
87-
- google-auth
8885
- natsort # DataFrame.sort_values doctest
8986
- numpydoc
9087
- pydata-sphinx-theme=0.16

‎pandas/compat/_optional.py

-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
"adbc-driver-postgresql": "0.10.0",
2424
"adbc-driver-sqlite": "0.8.0",
2525
"bs4": "4.12.3",
26-
"blosc": "1.21.3",
2726
"bottleneck": "1.3.6",
2827
"fastparquet": "2024.2.0",
2928
"fsspec": "2024.2.0",

‎pandas/tests/generic/test_to_xarray.py

+28-24
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@
66
DataFrame,
77
MultiIndex,
88
Series,
9+
StringDtype,
910
date_range,
1011
)
1112
import pandas._testing as tm
13+
from pandas.util.version import Version
1214

13-
pytest.importorskip("xarray")
15+
xarray = pytest.importorskip("xarray")
1416

1517

1618
class TestDataFrameToXArray:
@@ -29,13 +31,17 @@ def df(self):
2931
}
3032
)
3133

32-
def test_to_xarray_index_types(self, index_flat, df, using_infer_string):
34+
def test_to_xarray_index_types(self, index_flat, df, request):
3335
index = index_flat
3436
# MultiIndex is tested in test_to_xarray_with_multiindex
3537
if len(index) == 0:
3638
pytest.skip("Test doesn't make sense for empty index")
37-
38-
from xarray import Dataset
39+
elif Version(xarray.__version__) <= Version("2024.9.0"):
40+
request.applymarker(
41+
pytest.mark.xfail(
42+
reason="Categorical column not preserved.",
43+
)
44+
)
3945

4046
df.index = index[:4]
4147
df.index.name = "foo"
@@ -45,29 +51,22 @@ def test_to_xarray_index_types(self, index_flat, df, using_infer_string):
4551
assert len(result.coords) == 1
4652
assert len(result.data_vars) == 8
4753
tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
48-
assert isinstance(result, Dataset)
54+
assert isinstance(result, xarray.Dataset)
4955

5056
# idempotency
5157
# datetimes w/tz are preserved
5258
# column names are lost
5359
expected = df.copy()
54-
expected["f"] = expected["f"].astype(
55-
object if not using_infer_string else "str"
56-
)
5760
expected.columns.name = None
5861
tm.assert_frame_equal(result.to_dataframe(), expected)
5962

6063
def test_to_xarray_empty(self, df):
61-
from xarray import Dataset
62-
6364
df.index.name = "foo"
6465
result = df[0:0].to_xarray()
6566
assert result.sizes["foo"] == 0
66-
assert isinstance(result, Dataset)
67+
assert isinstance(result, xarray.Dataset)
6768

6869
def test_to_xarray_with_multiindex(self, df, using_infer_string):
69-
from xarray import Dataset
70-
7170
# MultiIndex
7271
df.index = MultiIndex.from_product([["a"], range(4)], names=["one", "two"])
7372
result = df.to_xarray()
@@ -76,7 +75,7 @@ def test_to_xarray_with_multiindex(self, df, using_infer_string):
7675
assert len(result.coords) == 2
7776
assert len(result.data_vars) == 8
7877
tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"])
79-
assert isinstance(result, Dataset)
78+
assert isinstance(result, xarray.Dataset)
8079

8180
result = result.to_dataframe()
8281
expected = df.copy()
@@ -88,43 +87,48 @@ def test_to_xarray_with_multiindex(self, df, using_infer_string):
8887

8988

9089
class TestSeriesToXArray:
91-
def test_to_xarray_index_types(self, index_flat):
90+
def test_to_xarray_index_types(self, index_flat, request):
9291
index = index_flat
92+
if (
93+
isinstance(index.dtype, StringDtype)
94+
and index.dtype.storage == "pyarrow"
95+
and Version(xarray.__version__) > Version("2024.9.0")
96+
):
97+
request.applymarker(
98+
pytest.mark.xfail(
99+
reason="xarray calling reshape of ArrowExtensionArray",
100+
raises=NotImplementedError,
101+
)
102+
)
93103
# MultiIndex is tested in test_to_xarray_with_multiindex
94104

95-
from xarray import DataArray
96-
97105
ser = Series(range(len(index)), index=index, dtype="int64")
98106
ser.index.name = "foo"
99107
result = ser.to_xarray()
100108
repr(result)
101109
assert len(result) == len(index)
102110
assert len(result.coords) == 1
103111
tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
104-
assert isinstance(result, DataArray)
112+
assert isinstance(result, xarray.DataArray)
105113

106114
# idempotency
107115
tm.assert_series_equal(result.to_series(), ser)
108116

109117
def test_to_xarray_empty(self):
110-
from xarray import DataArray
111-
112118
ser = Series([], dtype=object)
113119
ser.index.name = "foo"
114120
result = ser.to_xarray()
115121
assert len(result) == 0
116122
assert len(result.coords) == 1
117123
tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
118-
assert isinstance(result, DataArray)
124+
assert isinstance(result, xarray.DataArray)
119125

120126
def test_to_xarray_with_multiindex(self):
121-
from xarray import DataArray
122-
123127
mi = MultiIndex.from_product([["a", "b"], range(3)], names=["one", "two"])
124128
ser = Series(range(6), dtype="int64", index=mi)
125129
result = ser.to_xarray()
126130
assert len(result) == 2
127131
tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"])
128-
assert isinstance(result, DataArray)
132+
assert isinstance(result, xarray.DataArray)
129133
res = result.to_series()
130134
tm.assert_series_equal(res, ser)

‎pandas/tests/test_downstream.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def test_xarray_cftimeindex_nearest():
103103
cftime = pytest.importorskip("cftime")
104104
xarray = pytest.importorskip("xarray")
105105

106-
times = xarray.cftime_range("0001", periods=2)
106+
times = xarray.date_range("0001", periods=2, use_cftime=True)
107107
key = cftime.DatetimeGregorian(2000, 1, 1)
108108
result = times.get_indexer([key], method="nearest")
109109
expected = 1

‎pyproject.toml

+1-5
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,7 @@ gcp = ['gcsfs>=2024.2.0']
6868
excel = ['odfpy>=1.4.1', 'openpyxl>=3.1.2', 'python-calamine>=0.1.7', 'pyxlsb>=1.0.10', 'xlrd>=2.0.1', 'xlsxwriter>=3.2.0']
6969
parquet = ['pyarrow>=10.0.1']
7070
feather = ['pyarrow>=10.0.1']
71-
hdf5 = [# blosc only available on conda (https://github.com/Blosc/python-blosc/issues/297)
72-
#'blosc>=1.20.1',
73-
'tables>=3.8.0']
71+
hdf5 = ['tables>=3.8.0']
7472
spss = ['pyreadstat>=1.2.6']
7573
postgresql = ['SQLAlchemy>=2.0.0', 'psycopg2>=2.9.6', 'adbc-driver-postgresql>=0.10.0']
7674
mysql = ['SQLAlchemy>=2.0.0', 'pymysql>=1.1.0']
@@ -85,8 +83,6 @@ timezone = ['pytz>=2023.4']
8583
all = ['adbc-driver-postgresql>=0.10.0',
8684
'adbc-driver-sqlite>=0.8.0',
8785
'beautifulsoup4>=4.12.3',
88-
# blosc only available on conda (https://github.com/Blosc/python-blosc/issues/297)
89-
#'blosc>=1.21.3',
9086
'bottleneck>=1.3.6',
9187
'fastparquet>=2024.2.0',
9288
'fsspec>=2024.2.0',

‎requirements-dev.txt

+1-4
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ coverage
1616
python-dateutil
1717
numpy<3
1818
beautifulsoup4>=4.12.3
19-
blosc
2019
bottleneck>=1.3.6
2120
fastparquet>=2024.2.0
2221
fsspec>=2024.2.0
@@ -44,7 +43,7 @@ s3fs>=2024.2.0
4443
scipy>=1.12.0
4544
SQLAlchemy>=2.0.0
4645
tabulate>=0.9.0
47-
xarray>=2024.1.1, <=2024.9.0
46+
xarray>=2024.1.1
4847
xlrd>=2.0.1
4948
xlsxwriter>=3.2.0
5049
zstandard>=0.22.0
@@ -58,8 +57,6 @@ mypy==1.13.0
5857
tokenize-rt
5958
pre-commit>=4.2.0
6059
gitpython
61-
gitdb
62-
google-auth
6360
natsort
6461
numpydoc
6562
pydata-sphinx-theme==0.16

‎scripts/tests/data/deps_expected_random.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ dependencies:
2323

2424
# optional dependencies
2525
- beautifulsoup4>=5.9.3
26-
- blosc
2726
- bottleneck>=1.3.2
2827
- fastparquet>=0.6.3
2928
- fsspec>=2021.07.0

‎scripts/tests/data/deps_minimum.toml

+1-5
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,7 @@ gcp = ['gcsfs>=2021.07.0']
6363
excel = ['odfpy>=1.4.1', 'openpyxl>=3.0.7', 'python-calamine>=0.1.7', 'pyxlsb>=1.0.8', 'xlrd>=2.0.1', 'xlsxwriter>=1.4.3']
6464
parquet = ['pyarrow>=7.0.0']
6565
feather = ['pyarrow>=7.0.0']
66-
hdf5 = [# blosc only available on conda (https://github.com/Blosc/python-blosc/issues/297)
67-
#'blosc>=1.20.1',
68-
'tables>=3.6.1']
66+
hdf5 = ['tables>=3.6.1']
6967
spss = ['pyreadstat>=1.1.2']
7068
postgresql = ['SQLAlchemy>=1.4.16', 'psycopg2>=2.8.6']
7169
mysql = ['SQLAlchemy>=1.4.16', 'pymysql>=1.1.0']
@@ -77,8 +75,6 @@ output_formatting = ['jinja2>=3.0.0', 'tabulate>=0.8.9']
7775
clipboard = ['PyQt5>=5.15.1', 'qtpy>=2.3.0']
7876
compression = ['zstandard>=0.15.2']
7977
all = ['beautifulsoup4>=5.9.3',
80-
# blosc only available on conda (https://github.com/Blosc/python-blosc/issues/297)
81-
#'blosc>=1.21.0',
8278
'bottleneck>=1.3.2',
8379
'fastparquet>=0.6.3',
8480
'fsspec>=2021.07.0',

‎scripts/tests/data/deps_unmodified_random.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ dependencies:
2323

2424
# optional dependencies
2525
- beautifulsoup4
26-
- blosc
2726
- bottleneck>=1.3.2
2827
- fastparquet>=0.6.3
2928
- fsspec>=2021.07.0

‎scripts/validate_min_versions_in_sync.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
SETUP_PATH = pathlib.Path("pyproject.toml").resolve()
3737
YAML_PATH = pathlib.Path("ci/deps")
3838
ENV_PATH = pathlib.Path("environment.yml")
39-
EXCLUDE_DEPS = {"tzdata", "blosc", "pyqt", "pyqt5"}
39+
EXCLUDE_DEPS = {"tzdata", "pyqt", "pyqt5"}
4040
EXCLUSION_LIST = frozenset(["python=3.8[build=*_pypy]"])
4141
# pandas package is not available
4242
# in pre-commit environment

0 commit comments

Comments
 (0)