diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 99415b6fc6ec8..6749dc9634142 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -635,6 +635,16 @@ def is_string_dtype(arr_or_dtype) -> bool: >>> is_string_dtype(pd.Series([1, 2], dtype=object)) False """ + # Handle Categorical series and CategoricalDtype consistently + # - both should return False + if hasattr(arr_or_dtype, "dtype") and isinstance( + arr_or_dtype.dtype, CategoricalDtype + ): + return False + + if isinstance(arr_or_dtype, CategoricalDtype): + return False + if hasattr(arr_or_dtype, "dtype") and _get_dtype(arr_or_dtype).kind == "O": return is_all_strings(arr_or_dtype) diff --git a/pandas/tests/dtypes/test_categorical_string_dtype.py b/pandas/tests/dtypes/test_categorical_string_dtype.py new file mode 100644 index 0000000000000..a4b0152062d4d --- /dev/null +++ b/pandas/tests/dtypes/test_categorical_string_dtype.py @@ -0,0 +1,50 @@ +import numpy as np + +from pandas.core.dtypes.common import is_string_dtype +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd + + +def test_is_string_dtype_categorical_consistency(): + """Test that is_string_dtype returns consistent results for + Categorical series and dtype.""" + # Test with CategoricalDtype directly + categorical_dtype = CategoricalDtype() + assert not is_string_dtype(categorical_dtype) + + # Test with Series containing Categorical + categorical_series = pd.Series(pd.Categorical(["a", "b", "c"])) + assert not is_string_dtype(categorical_series) + + # Test with ordered CategoricalDtype + ordered_categorical_dtype = CategoricalDtype(ordered=True) + assert not is_string_dtype(ordered_categorical_dtype) + + # Test with Series containing ordered Categorical + ordered_categorical_series = pd.Series( + pd.Categorical(["a", "b", "c"], ordered=True) + ) + assert not is_string_dtype(ordered_categorical_series) + + # Test with CategoricalDtype with specific categories + specific_categorical_dtype = CategoricalDtype(categories=["x", "y", "z"]) + assert not is_string_dtype(specific_categorical_dtype) + + # Test with Series containing Categorical with specific categories + specific_categorical_series = pd.Series( + pd.Categorical(["x", "y", "z"], categories=["x", "y", "z"]) + ) + assert not is_string_dtype(specific_categorical_series) + + # Test with empty Categorical + empty_categorical = pd.Series(pd.Categorical([])) + assert not is_string_dtype(empty_categorical) + + # Test with Categorical containing NaN values + nan_categorical = pd.Series(pd.Categorical([np.nan, "a", "b"])) + assert not is_string_dtype(nan_categorical) + + # Test with numeric Categorical + numeric_categorical = pd.Series(pd.Categorical([1, 2, 3])) + assert not is_string_dtype(numeric_categorical) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index f93105498ac79..b599be5d042fe 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -168,7 +168,7 @@ def test_query_duplicate_column_name(self, engine, parser): } ).rename(columns={"B": "A"}) - res = df.query('C == 1', engine=engine, parser=parser) + res = df.query("C == 1", engine=engine, parser=parser) expect = DataFrame( [[1, 1, 1]], @@ -1411,7 +1411,7 @@ def test_expr_with_column_name_with_backtick_and_hash(self): def test_expr_with_column_name_with_backtick(self): # GH 59285 df = DataFrame({"a`b": (1, 2, 3), "ab": (4, 5, 6)}) - result = df.query("`a``b` < 2") # noqa + result = df.query("`a``b` < 2") # Note: Formatting checks may wrongly consider the above ``inline code``. expected = df[df["a`b"] < 2] tm.assert_frame_equal(result, expected)