Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,16 @@ def is_string_dtype(arr_or_dtype) -> bool:
>>> is_string_dtype(pd.Series([1, 2], dtype=object))
False
"""
# Handle Categorical series and CategoricalDtype consistently
# - both should return False
if hasattr(arr_or_dtype, "dtype") and isinstance(
arr_or_dtype.dtype, CategoricalDtype
):
return False

if isinstance(arr_or_dtype, CategoricalDtype):
return False

if hasattr(arr_or_dtype, "dtype") and _get_dtype(arr_or_dtype).kind == "O":
return is_all_strings(arr_or_dtype)

Expand Down
50 changes: 50 additions & 0 deletions pandas/tests/dtypes/test_categorical_string_dtype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import numpy as np

from pandas.core.dtypes.common import is_string_dtype
from pandas.core.dtypes.dtypes import CategoricalDtype

import pandas as pd


def test_is_string_dtype_categorical_consistency():
"""Test that is_string_dtype returns consistent results for
Categorical series and dtype."""
# Test with CategoricalDtype directly
categorical_dtype = CategoricalDtype()
assert not is_string_dtype(categorical_dtype)

# Test with Series containing Categorical
categorical_series = pd.Series(pd.Categorical(["a", "b", "c"]))
assert not is_string_dtype(categorical_series)

# Test with ordered CategoricalDtype
ordered_categorical_dtype = CategoricalDtype(ordered=True)
assert not is_string_dtype(ordered_categorical_dtype)

# Test with Series containing ordered Categorical
ordered_categorical_series = pd.Series(
pd.Categorical(["a", "b", "c"], ordered=True)
)
assert not is_string_dtype(ordered_categorical_series)

# Test with CategoricalDtype with specific categories
specific_categorical_dtype = CategoricalDtype(categories=["x", "y", "z"])
assert not is_string_dtype(specific_categorical_dtype)

# Test with Series containing Categorical with specific categories
specific_categorical_series = pd.Series(
pd.Categorical(["x", "y", "z"], categories=["x", "y", "z"])
)
assert not is_string_dtype(specific_categorical_series)

# Test with empty Categorical
empty_categorical = pd.Series(pd.Categorical([]))
assert not is_string_dtype(empty_categorical)

# Test with Categorical containing NaN values
nan_categorical = pd.Series(pd.Categorical([np.nan, "a", "b"]))
assert not is_string_dtype(nan_categorical)

# Test with numeric Categorical
numeric_categorical = pd.Series(pd.Categorical([1, 2, 3]))
assert not is_string_dtype(numeric_categorical)
4 changes: 2 additions & 2 deletions pandas/tests/frame/test_query_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def test_query_duplicate_column_name(self, engine, parser):
}
).rename(columns={"B": "A"})

res = df.query('C == 1', engine=engine, parser=parser)
res = df.query("C == 1", engine=engine, parser=parser)

expect = DataFrame(
[[1, 1, 1]],
Expand Down Expand Up @@ -1411,7 +1411,7 @@ def test_expr_with_column_name_with_backtick_and_hash(self):
def test_expr_with_column_name_with_backtick(self):
# GH 59285
df = DataFrame({"a`b": (1, 2, 3), "ab": (4, 5, 6)})
result = df.query("`a``b` < 2") # noqa
result = df.query("`a``b` < 2")
# Note: Formatting checks may wrongly consider the above ``inline code``.
expected = df[df["a`b"] < 2]
tm.assert_frame_equal(result, expected)
Expand Down
Loading