diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml index 187308ad..c0e3f375 100644 --- a/.github/workflows/test-python.yml +++ b/.github/workflows/test-python.yml @@ -38,9 +38,6 @@ jobs: matrix: os: ["ubuntu-latest", "macos-latest", "windows-latest"] python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.13t"] - exclude: - - os: "windows-latest" - python-version: "3.13t" fail-fast: false name: CPython ${{ matrix.python-version }}-${{ matrix.os }} steps: @@ -89,10 +86,14 @@ jobs: - name: Ensure imports with no test deps run: just import-check - name: Run the tests + if: ${{ ! endsWith(matrix.python-version, 't') }} env: UV_PYTHON: ${{matrix.python-version}} run: just test - + - name: Run the tests with no optional deps + env: + UV_PYTHON: ${{matrix.python-version}} + run: just test-no-optional docs: runs-on: ubuntu-latest steps: diff --git a/bindings/python/CHANGELOG.md b/bindings/python/CHANGELOG.md index ceb95434..4aef730e 100644 --- a/bindings/python/CHANGELOG.md +++ b/bindings/python/CHANGELOG.md @@ -3,6 +3,11 @@ --- +# Changes in Version 1.10.0 (2025/xx/yy) + +- Make `pandas` an optional dependency. +- Add support for free-threaded python on Windows. + # Changes in Version 1.9.0 (2025/05/27) - Providing a schema now enforces strict type adherence for data. diff --git a/bindings/python/justfile b/bindings/python/justfile index c9936665..ed8c5908 100644 --- a/bindings/python/justfile +++ b/bindings/python/justfile @@ -14,7 +14,7 @@ import-check: uv run python -c "from pymongoarrow.lib import libbson_version" benchmark *args: - uv sync --dev --extra test --extra test-polars + uv sync --dev --extra test --extra test-polars --extra test-pandas uv run asv run -e --python=$(uv run python -c "import sys;print(sys.executable)") {{args}} install: @@ -23,8 +23,10 @@ install: uv run pre-commit install test *args: - uv sync --extra test --extra test-polars || uv sync --extra test - uv run pytest {{args}} + uv run --no-dev --extra test --extra test-polars --extra test-pandas pytest {{args}} + +test-no-optional *args: + uv run --no-dev --extra test pytest {{args}} lint: uv sync --no-install-project --dev --frozen diff --git a/bindings/python/pymongoarrow/api.py b/bindings/python/pymongoarrow/api.py index ecb4a794..60b32d29 100644 --- a/bindings/python/pymongoarrow/api.py +++ b/bindings/python/pymongoarrow/api.py @@ -15,7 +15,11 @@ from decimal import Decimal import numpy as np -import pandas as pd + +try: + import pandas as pd +except ImportError: + pd = None try: import polars as pl @@ -170,6 +174,9 @@ def _arrow_to_pandas(arrow_table): See https://arrow.apache.org/docs/python/pandas.html#reducing-memory-use-in-table-to-pandas for details. """ + if pd is None: + msg = "pandas is not installed. Try pip install pandas." + raise ValueError(msg) return arrow_table.to_pandas(split_blocks=True, self_destruct=True) @@ -238,10 +245,10 @@ def _arrow_to_numpy(arrow_table, schema=None): for fname in schema: dtype = get_numpy_type(schema[fname]) + container[fname] = arrow_table[fname].to_numpy() if dtype == np.str_: - container[fname] = arrow_table[fname].to_pandas().to_numpy(dtype=dtype) - else: - container[fname] = arrow_table[fname].to_numpy() + container[fname] = container[fname].astype(np.str_) + return container @@ -427,7 +434,7 @@ def _tabular_generator(tabular, *, exclude_none=False): yield {k: v for k, v in row.items() if v is not None} else: yield row - elif isinstance(tabular, pd.DataFrame): + elif pd is not None and isinstance(tabular, pd.DataFrame): for row in tabular.to_dict("records"): if exclude_none: yield {k: v for k, v in row.items() if not np.isnan(v)} @@ -498,7 +505,7 @@ def write(collection, tabular, *, exclude_none: bool = False): cols = [tabular.column(i).cast(new_types[i]) for i in range(tabular.num_columns)] tabular = Table.from_arrays(cols, names=tabular.column_names) _validate_schema(tabular.schema.types) - elif isinstance(tabular, pd.DataFrame): + elif pd is not None and isinstance(tabular, pd.DataFrame): _validate_schema(ArrowSchema.from_pandas(tabular).types) elif pl is not None and isinstance(tabular, pl.DataFrame): tabular = tabular.to_arrow() # zero-copy in most cases and done in tabular_gen anyway @@ -523,7 +530,10 @@ def write(collection, tabular, *, exclude_none: bool = False): # Add handling for special case types. codec_options = collection.codec_options - type_registry = TypeRegistry([_PandasNACodec(), _DecimalCodec()]) + if pd is not None: + type_registry = TypeRegistry([_PandasNACodec(), _DecimalCodec()]) + else: + type_registry = TypeRegistry([_DecimalCodec()]) codec_options = codec_options.with_options(type_registry=type_registry) while cur_offset < tab_size: diff --git a/bindings/python/pymongoarrow/pandas_types.py b/bindings/python/pymongoarrow/pandas_types.py index 061ee0fe..7617bc70 100644 --- a/bindings/python/pymongoarrow/pandas_types.py +++ b/bindings/python/pymongoarrow/pandas_types.py @@ -19,14 +19,22 @@ import re import numpy as np -import pandas as pd import pyarrow as pa from bson import Binary, Code, Decimal128, ObjectId -from pandas.api.extensions import ( - ExtensionArray, - ExtensionDtype, - register_extension_dtype, -) + +try: + import pandas as pd + from pandas.api.extensions import ( + ExtensionArray, + ExtensionDtype, + register_extension_dtype, + ) +except ImportError: + ExtensionDtype = object + ExtensionArray = object + + def register_extension_dtype(func): + return func class PandasBSONDtype(ExtensionDtype): diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml index 3501358d..9f7d0048 100644 --- a/bindings/python/pyproject.toml +++ b/bindings/python/pyproject.toml @@ -39,7 +39,7 @@ dependencies = [ # Must be kept in sync with "build_sytem.requires" above. "pyarrow >=20.0,<20.1", "pymongo >=4.4,<5", - "pandas >=1.3.5,<3", + "numpy>=2.0.1", "packaging >=23.2", ] dynamic = ["version"] @@ -53,6 +53,7 @@ Tracker = "https://jira.mongodb.org/projects/INTPYTHON/issues" [project.optional-dependencies] test = ["pytz", "pytest"] test-polars = ["polars"] +test-pandas = ["pandas>=1.3.5,<3"] [tool.setuptools] zip-safe = false diff --git a/bindings/python/test/conftest.py b/bindings/python/test/conftest.py index 9c6fb679..2d42df06 100644 --- a/bindings/python/test/conftest.py +++ b/bindings/python/test/conftest.py @@ -15,9 +15,14 @@ import pytest -pytest_plugins = [ - "pandas.tests.extension.conftest", -] +try: + import pandas as pd # noqa: F401 + + pytest_plugins = [ + "pandas.tests.extension.conftest", + ] +except ImportError: + pass @pytest.fixture(autouse=True, scope="session") diff --git a/bindings/python/test/pandas_types/test_binary.py b/bindings/python/test/pandas_types/test_binary.py index 7d1c7ee5..91dcff2e 100644 --- a/bindings/python/test/pandas_types/test_binary.py +++ b/bindings/python/test/pandas_types/test_binary.py @@ -14,10 +14,14 @@ import numpy as np import pytest from bson import Binary -from pandas.tests.extension import base from pymongoarrow.pandas_types import PandasBinary, PandasBinaryArray +try: + from pandas.tests.extension import base +except ImportError: + pytest.skip("skipping pandas tests", allow_module_level=True) + try: base.BaseIndexTests except AttributeError: diff --git a/bindings/python/test/pandas_types/test_code.py b/bindings/python/test/pandas_types/test_code.py index 700131d6..809e5a36 100644 --- a/bindings/python/test/pandas_types/test_code.py +++ b/bindings/python/test/pandas_types/test_code.py @@ -14,10 +14,15 @@ import numpy as np import pytest from bson import Code -from pandas.tests.extension import base from pymongoarrow.pandas_types import PandasCode, PandasCodeArray +try: + from pandas.tests.extension import base +except ImportError: + pytest.skip("skipping pandas tests", allow_module_level=True) + + try: base.BaseIndexTests except AttributeError: diff --git a/bindings/python/test/pandas_types/test_decimal128.py b/bindings/python/test/pandas_types/test_decimal128.py index 3861c3de..32aac515 100644 --- a/bindings/python/test/pandas_types/test_decimal128.py +++ b/bindings/python/test/pandas_types/test_decimal128.py @@ -14,10 +14,15 @@ import numpy as np import pytest from bson import Decimal128 -from pandas.tests.extension import base from pymongoarrow.pandas_types import PandasDecimal128, PandasDecimal128Array +try: + from pandas.tests.extension import base +except ImportError: + pytest.skip("skipping pandas tests", allow_module_level=True) + + try: base.BaseIndexTests except AttributeError: diff --git a/bindings/python/test/pandas_types/test_objectid.py b/bindings/python/test/pandas_types/test_objectid.py index 75ef6eaf..6a0b361d 100644 --- a/bindings/python/test/pandas_types/test_objectid.py +++ b/bindings/python/test/pandas_types/test_objectid.py @@ -14,10 +14,15 @@ import numpy as np import pytest from bson import ObjectId -from pandas.tests.extension import base from pymongoarrow.pandas_types import PandasObjectId, PandasObjectIdArray +try: + from pandas.tests.extension import base +except ImportError: + pytest.skip("skipping pandas tests", allow_module_level=True) + + try: base.BaseIndexTests except AttributeError: diff --git a/bindings/python/test/test_numpy.py b/bindings/python/test/test_numpy.py index 284659e4..bfdfe1d0 100644 --- a/bindings/python/test/test_numpy.py +++ b/bindings/python/test/test_numpy.py @@ -154,6 +154,10 @@ def test_write_error(self): raise awe def test_write_schema_validation(self): + try: + import pandas as pd # noqa: F401 + except ImportError: + self.skipTest("Test requires pandas") arrow_schema = {k.__name__: v(True) for k, v in _TYPE_NORMALIZER_FACTORY.items()} schema = {k: v.to_pandas_dtype() for k, v in arrow_schema.items()} schema["str"] = "str" diff --git a/bindings/python/test/test_pandas.py b/bindings/python/test/test_pandas.py index 7998b137..3c2c58f1 100644 --- a/bindings/python/test/test_pandas.py +++ b/bindings/python/test/test_pandas.py @@ -23,9 +23,8 @@ from test.utils import AllowListEventListener, NullsTestMixin import numpy as np -import pandas as pd -import pandas.testing import pyarrow as pa +import pytest from bson import Binary, Code, CodecOptions, Decimal128, ObjectId from pyarrow import decimal256, int32, int64 from pymongo import DESCENDING, WriteConcern @@ -37,6 +36,12 @@ from pymongoarrow.pandas_types import PandasBSONDtype, PandasDecimal128, PandasObjectId from pymongoarrow.types import _TYPE_NORMALIZER_FACTORY, Decimal128Type, ObjectIdType +try: + import pandas as pd + import pandas.testing +except ImportError: + pytest.skip("skipping pandas tests", allow_module_level=True) + class PandasTestBase(unittest.TestCase): @classmethod diff --git a/bindings/python/test/test_pymongoarrow.py b/bindings/python/test/test_pymongoarrow.py index f7453d12..318a5f15 100644 --- a/bindings/python/test/test_pymongoarrow.py +++ b/bindings/python/test/test_pymongoarrow.py @@ -14,7 +14,7 @@ import unittest from test import client_context -from pymongoarrow.api import find_arrow_all +from pymongoarrow.api import find_arrow_all, find_pandas_all, find_polars_all from pymongoarrow.schema import Schema from pymongoarrow.version import __version__ @@ -34,6 +34,35 @@ def test_version(self): self.assertIsNotNone(__version__) self.assertIsInstance(__version__, str) + def test_no_pandas(self): + try: + import pandas as pd # noqa: F401 + + self.skipTest("Requires no pandas") + except ImportError: + pass + self.client.test.drop_collection("test") + schema = Schema({"data": bool}) + data = [{"data": False} for _ in range(1000)] + self.client.test.test.insert_many(data) + + with self.assertRaises(ValueError): + find_pandas_all(self.client.test.test, {}, schema=schema) + + def test_no_polars(self): + try: + import polars as pl # noqa: F401 + + self.skipTest("Requires no polars") + except ImportError: + pass + self.client.test.drop_collection("test") + schema = Schema({"data": bool}) + data = [{"data": False} for _ in range(1000)] + self.client.test.test.insert_many(data) + with self.assertRaises(ValueError): + find_polars_all(self.client.test.test, {}, schema=schema) + def test_capped_collection(self): self.client.test.drop_collection("test") self.client.test.create_collection("test", capped=True, size=5000) diff --git a/bindings/python/test/utils.py b/bindings/python/test/utils.py index 8bfe9e1d..9b1f419a 100644 --- a/bindings/python/test/utils.py +++ b/bindings/python/test/utils.py @@ -19,7 +19,6 @@ import numpy as np import pyarrow as pa from bson import Decimal128, ObjectId -from pandas import isna from pyarrow import bool_, float64, int64, string, timestamp from pymongo import WriteConcern, monitoring @@ -33,6 +32,24 @@ ) +def isnan(inp): + if isinstance(inp, (pa.Array, pa.ChunkedArray)): + inp = inp.to_pylist() + + def isnan_inner(value): + if value is None: + return True + # pandas na values + if str(value) in ["", "NaT"]: + return True + try: + return np.isnan(value) + except TypeError: + return False + + return [isnan_inner(v) for v in inp] + + class EventListener(monitoring.CommandListener): def __init__(self): self.results = defaultdict(list) @@ -176,7 +193,7 @@ def test_int_handling(self): self.assertType(table["int64"], atype) # Does it contain NAs where we expect? - self.assertTrue(np.all(np.equal(isna(int64_arr), isna(table["int64"])))) + self.assertTrue(np.all(np.equal(isnan(int64_arr), isnan(table["int64"])))) # Write self.coll.drop() @@ -217,7 +234,7 @@ def test_other_handling(self): self.assertType(table["other"], con_type) self.assertEqual( self.na_safe(con_type), - np.all(np.equal(isna(others), isna(table["other"]))), + np.all(np.equal(isnan(others), isnan(table["other"]))), ) def writeback(): @@ -262,4 +279,4 @@ def test_bool_handling(self): self.assertType(table["bool_"], atype) # Does it contain Nones where expected? - self.assertTrue(np.all(np.equal(isna(bools), isna(table["bool_"])))) + self.assertTrue(np.all(np.equal(isnan(bools), isnan(table["bool_"])))) diff --git a/bindings/python/uv.lock b/bindings/python/uv.lock index 9de08526..447fe463 100644 --- a/bindings/python/uv.lock +++ b/bindings/python/uv.lock @@ -884,8 +884,9 @@ wheels = [ name = "pymongoarrow" source = { editable = "." } dependencies = [ + { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "numpy", version = "2.2.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "packaging" }, - { name = "pandas" }, { name = "pyarrow" }, { name = "pymongo" }, ] @@ -895,6 +896,9 @@ test = [ { name = "pytest" }, { name = "pytz" }, ] +test-pandas = [ + { name = "pandas" }, +] test-polars = [ { name = "polars" }, ] @@ -919,15 +923,16 @@ docs = [ [package.metadata] requires-dist = [ + { name = "numpy", specifier = ">=2.0.1" }, { name = "packaging", specifier = ">=23.2" }, - { name = "pandas", specifier = ">=1.3.5,<3" }, + { name = "pandas", marker = "extra == 'test-pandas'", specifier = ">=1.3.5,<3" }, { name = "polars", marker = "extra == 'test-polars'" }, { name = "pyarrow", specifier = ">=20.0,<20.1" }, { name = "pymongo", specifier = ">=4.4,<5" }, { name = "pytest", marker = "extra == 'test'" }, { name = "pytz", marker = "extra == 'test'" }, ] -provides-extras = ["test", "test-polars"] +provides-extras = ["test", "test-pandas", "test-polars"] [package.metadata.requires-dev] dev = [