mongodb-labs · blink1073 · Jun 20, 2025 · Jun 20, 2025 · Jun 20, 2025 · Jun 20, 2025
@@ -38,9 +38,6 @@ jobs:
       matrix:
         os: ["ubuntu-latest", "macos-latest", "windows-latest"]
         python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.13t"]
-        exclude:
-          - os: "windows-latest"
-            python-version: "3.13t"
       fail-fast: false
     name: CPython ${{ matrix.python-version }}-${{ matrix.os }}
     steps:
@@ -89,10 +86,14 @@ jobs:
       - name: Ensure imports with no test deps
         run: just import-check
       - name: Run the tests
+        if: ${{ ! endsWith(matrix.python-version, 't') }}
         env:
           UV_PYTHON: ${{matrix.python-version}}
         run: just test
-
+      - name: Run the tests with no optional deps
+        env:
+          UV_PYTHON: ${{matrix.python-version}}
+        run: just test-no-optional
   docs:
     runs-on: ubuntu-latest
     steps:

@@ -3,6 +3,11 @@
 
 ---
 
+# Changes in Version 1.10.0 (2025/xx/yy)
+
+- Make `pandas` an optional dependency.
+- Add support for free-threaded python on Windows.
+
 # Changes in Version 1.9.0 (2025/05/27)
 
 - Providing a schema now enforces strict type adherence for data.

@@ -14,7 +14,7 @@ import-check:
     uv run python -c "from pymongoarrow.lib import libbson_version"
 
 benchmark *args:
-    uv sync --dev --extra test --extra test-polars
+    uv sync --dev --extra test --extra test-polars --extra test-pandas
     uv run asv run -e --python=$(uv run python -c "import sys;print(sys.executable)") {{args}}
 
 install:
@@ -23,8 +23,10 @@ install:
     uv run pre-commit install
 
 test *args:
-    uv sync --extra test --extra test-polars || uv sync --extra test
-    uv run pytest {{args}}
+    uv run --no-dev --extra test --extra test-polars --extra test-pandas pytest {{args}}
+
+test-no-optional *args:
+    uv run --no-dev --extra test pytest {{args}}
 
 lint:
     uv sync --no-install-project --dev --frozen

@@ -15,7 +15,11 @@
 from decimal import Decimal
 
 import numpy as np
-import pandas as pd
+
+try:
+    import pandas as pd
+except ImportError:
+    pd = None
 
 try:
     import polars as pl
@@ -170,6 +174,9 @@ def _arrow_to_pandas(arrow_table):
     See https://arrow.apache.org/docs/python/pandas.html#reducing-memory-use-in-table-to-pandas
     for details.
     """
+    if pd is None:
+        msg = "pandas is not installed. Try pip install pandas."
+        raise ValueError(msg)
     return arrow_table.to_pandas(split_blocks=True, self_destruct=True)
 
 
@@ -238,10 +245,10 @@ def _arrow_to_numpy(arrow_table, schema=None):
 
     for fname in schema:
         dtype = get_numpy_type(schema[fname])
+        container[fname] = arrow_table[fname].to_numpy()
         if dtype == np.str_:
-            container[fname] = arrow_table[fname].to_pandas().to_numpy(dtype=dtype)
-        else:
-            container[fname] = arrow_table[fname].to_numpy()
+            container[fname] = container[fname].astype(np.str_)
+
     return container
 
 
@@ -427,7 +434,7 @@ def _tabular_generator(tabular, *, exclude_none=False):
                     yield {k: v for k, v in row.items() if v is not None}
                 else:
                     yield row
-    elif isinstance(tabular, pd.DataFrame):
+    elif pd is not None and isinstance(tabular, pd.DataFrame):
         for row in tabular.to_dict("records"):
             if exclude_none:
                 yield {k: v for k, v in row.items() if not np.isnan(v)}
@@ -498,7 +505,7 @@ def write(collection, tabular, *, exclude_none: bool = False):
             cols = [tabular.column(i).cast(new_types[i]) for i in range(tabular.num_columns)]
             tabular = Table.from_arrays(cols, names=tabular.column_names)
         _validate_schema(tabular.schema.types)
-    elif isinstance(tabular, pd.DataFrame):
+    elif pd is not None and isinstance(tabular, pd.DataFrame):
         _validate_schema(ArrowSchema.from_pandas(tabular).types)
     elif pl is not None and isinstance(tabular, pl.DataFrame):
         tabular = tabular.to_arrow()  # zero-copy in most cases and done in tabular_gen anyway
@@ -523,7 +530,10 @@ def write(collection, tabular, *, exclude_none: bool = False):
 
     # Add handling for special case types.
     codec_options = collection.codec_options
-    type_registry = TypeRegistry([_PandasNACodec(), _DecimalCodec()])
+    if pd is not None:
+        type_registry = TypeRegistry([_PandasNACodec(), _DecimalCodec()])
+    else:
+        type_registry = TypeRegistry([_DecimalCodec()])
     codec_options = codec_options.with_options(type_registry=type_registry)
 
     while cur_offset < tab_size:

@@ -19,14 +19,22 @@
 import re
 
 import numpy as np
-import pandas as pd
 import pyarrow as pa
 from bson import Binary, Code, Decimal128, ObjectId
-from pandas.api.extensions import (
-    ExtensionArray,
-    ExtensionDtype,
-    register_extension_dtype,
-)
+
+try:
+    import pandas as pd
+    from pandas.api.extensions import (
+        ExtensionArray,
+        ExtensionDtype,
+        register_extension_dtype,
+    )
+except ImportError:
+    ExtensionDtype = object
+    ExtensionArray = object
+
+    def register_extension_dtype(func):
+        return func
 
 
 class PandasBSONDtype(ExtensionDtype):

@@ -39,7 +39,7 @@ dependencies = [
     # Must be kept in sync with "build_sytem.requires" above.
     "pyarrow >=20.0,<20.1",
     "pymongo >=4.4,<5",
-    "pandas >=1.3.5,<3",
+    "numpy>=2.0.1",
     "packaging >=23.2",
 ]
 dynamic = ["version"]
@@ -53,6 +53,7 @@ Tracker = "https://jira.mongodb.org/projects/INTPYTHON/issues"
 [project.optional-dependencies]
 test = ["pytz", "pytest"]
 test-polars = ["polars"]
+test-pandas = ["pandas>=1.3.5,<3"]
 
 [tool.setuptools]
 zip-safe = false

@@ -15,9 +15,14 @@
 
 import pytest
 
-pytest_plugins = [
-    "pandas.tests.extension.conftest",
-]
+try:
+    import pandas as pd  # noqa: F401
+
+    pytest_plugins = [
+        "pandas.tests.extension.conftest",
+    ]
+except ImportError:
+    pass
 
 
 @pytest.fixture(autouse=True, scope="session")

@@ -14,10 +14,14 @@
 import numpy as np
 import pytest
 from bson import Binary
-from pandas.tests.extension import base
 
 from pymongoarrow.pandas_types import PandasBinary, PandasBinaryArray
 
+try:
+    from pandas.tests.extension import base
+except ImportError:
+    pytest.skip("skipping pandas tests", allow_module_level=True)
+
 try:
     base.BaseIndexTests
 except AttributeError:

@@ -14,10 +14,15 @@
 import numpy as np
 import pytest
 from bson import Code
-from pandas.tests.extension import base
 
 from pymongoarrow.pandas_types import PandasCode, PandasCodeArray
 
+try:
+    from pandas.tests.extension import base
+except ImportError:
+    pytest.skip("skipping pandas tests", allow_module_level=True)
+
+
 try:
     base.BaseIndexTests
 except AttributeError:

@@ -14,10 +14,15 @@
 import numpy as np
 import pytest
 from bson import Decimal128
-from pandas.tests.extension import base
 
 from pymongoarrow.pandas_types import PandasDecimal128, PandasDecimal128Array
 
+try:
+    from pandas.tests.extension import base
+except ImportError:
+    pytest.skip("skipping pandas tests", allow_module_level=True)
+
+
 try:
     base.BaseIndexTests
 except AttributeError:

@@ -14,10 +14,15 @@
 import numpy as np
 import pytest
 from bson import ObjectId
-from pandas.tests.extension import base
 
 from pymongoarrow.pandas_types import PandasObjectId, PandasObjectIdArray
 
+try:
+    from pandas.tests.extension import base
+except ImportError:
+    pytest.skip("skipping pandas tests", allow_module_level=True)
+
+
 try:
     base.BaseIndexTests
 except AttributeError:

@@ -154,6 +154,10 @@ def test_write_error(self):
                 raise awe
 
     def test_write_schema_validation(self):
+        try:
+            import pandas as pd  # noqa: F401
+        except ImportError:
+            self.skipTest("Test requires pandas")
         arrow_schema = {k.__name__: v(True) for k, v in _TYPE_NORMALIZER_FACTORY.items()}
         schema = {k: v.to_pandas_dtype() for k, v in arrow_schema.items()}
         schema["str"] = "str"

@@ -23,9 +23,8 @@
 from test.utils import AllowListEventListener, NullsTestMixin
 
 import numpy as np
-import pandas as pd
-import pandas.testing
 import pyarrow as pa
+import pytest
 from bson import Binary, Code, CodecOptions, Decimal128, ObjectId
 from pyarrow import decimal256, int32, int64
 from pymongo import DESCENDING, WriteConcern
@@ -37,6 +36,12 @@
 from pymongoarrow.pandas_types import PandasBSONDtype, PandasDecimal128, PandasObjectId
 from pymongoarrow.types import _TYPE_NORMALIZER_FACTORY, Decimal128Type, ObjectIdType
 
+try:
+    import pandas as pd
+    import pandas.testing
+except ImportError:
+    pytest.skip("skipping pandas tests", allow_module_level=True)
+
 
 class PandasTestBase(unittest.TestCase):
     @classmethod

@@ -14,7 +14,7 @@
 import unittest
 from test import client_context
 
-from pymongoarrow.api import find_arrow_all
+from pymongoarrow.api import find_arrow_all, find_pandas_all, find_polars_all
 from pymongoarrow.schema import Schema
 from pymongoarrow.version import __version__
 
@@ -34,6 +34,35 @@ def test_version(self):
         self.assertIsNotNone(__version__)
         self.assertIsInstance(__version__, str)
 
+    def test_no_pandas(self):
+        try:
+            import pandas as pd  # noqa: F401
+
+            self.skipTest("Requires no pandas")
+        except ImportError:
+            pass
+        self.client.test.drop_collection("test")
+        schema = Schema({"data": bool})
+        data = [{"data": False} for _ in range(1000)]
+        self.client.test.test.insert_many(data)
+
+        with self.assertRaises(ValueError):
+            find_pandas_all(self.client.test.test, {}, schema=schema)
+
+    def test_no_polars(self):
+        try:
+            import polars as pl  # noqa: F401
+
+            self.skipTest("Requires no polars")
+        except ImportError:
+            pass
+        self.client.test.drop_collection("test")
+        schema = Schema({"data": bool})
+        data = [{"data": False} for _ in range(1000)]
+        self.client.test.test.insert_many(data)
+        with self.assertRaises(ValueError):
+            find_polars_all(self.client.test.test, {}, schema=schema)
+
     def test_capped_collection(self):
         self.client.test.drop_collection("test")
         self.client.test.create_collection("test", capped=True, size=5000)

@@ -19,7 +19,6 @@
 import numpy as np
 import pyarrow as pa
 from bson import Decimal128, ObjectId
-from pandas import isna
 from pyarrow import bool_, float64, int64, string, timestamp
 from pymongo import WriteConcern, monitoring
 
@@ -33,6 +32,24 @@
 )
 
 
+def isnan(inp):
+    if isinstance(inp, (pa.Array, pa.ChunkedArray)):
+        inp = inp.to_pylist()
+
+    def isnan_inner(value):
+        if value is None:
+            return True
+        # pandas na values
+        if str(value) in ["<NA>", "NaT"]:
+            return True
+        try:
+            return np.isnan(value)
+        except TypeError:
+            return False
+
+    return [isnan_inner(v) for v in inp]
+
+
 class EventListener(monitoring.CommandListener):
     def __init__(self):
         self.results = defaultdict(list)
@@ -176,7 +193,7 @@ def test_int_handling(self):
         self.assertType(table["int64"], atype)
 
         # Does it contain NAs where we expect?
-        self.assertTrue(np.all(np.equal(isna(int64_arr), isna(table["int64"]))))
+        self.assertTrue(np.all(np.equal(isnan(int64_arr), isnan(table["int64"]))))
 
         # Write
         self.coll.drop()
@@ -217,7 +234,7 @@ def test_other_handling(self):
             self.assertType(table["other"], con_type)
             self.assertEqual(
                 self.na_safe(con_type),
-                np.all(np.equal(isna(others), isna(table["other"]))),
+                np.all(np.equal(isnan(others), isnan(table["other"]))),
             )
 
             def writeback():
@@ -262,4 +279,4 @@ def test_bool_handling(self):
         self.assertType(table["bool_"], atype)
 
         # Does it contain Nones where expected?
-        self.assertTrue(np.all(np.equal(isna(bools), isna(table["bool_"]))))
+        self.assertTrue(np.all(np.equal(isnan(bools), isnan(table["bool_"]))))