Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/release/upcoming_changes/24868.new_feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Add ``np.core.umath.find`` and ``np.core.umath.rfind`` UFuncs
-------------------------------------------------------------
Add two ``find`` and ``rfind`` UFuncs that operate on unicode or byte strings
and are used in ``np.char``. They operate similar to ``str.find`` and
``str.rfind``.
27 changes: 26 additions & 1 deletion numpy/_core/code_generators/generate_umath.py
Original file line number Diff line number Diff line change
Expand Up @@ -1141,7 +1141,32 @@ def english_upper(s):
Ufunc(1, 1, False_,
docstrings.get('numpy._core.umath.isalpha'),
None,
)
),
'find':
Ufunc(4, 1, AllOnes,
docstrings.get('numpy.core.umath.find'),
"PyUFunc_FindTypeResolver",
),
'rfind':
Ufunc(4, 1, AllOnes,
docstrings.get('numpy.core.umath.rfind'),
"PyUFunc_FindTypeResolver",
),
'count':
Ufunc(4, 1, Zero,
docstrings.get('numpy.core.umath.count'),
"PyUFunc_FindTypeResolver",
),
'__replace_impl':
Ufunc(4, 1, None,
docstrings.get('numpy.core.umath.__replace_impl'),
"PyUFunc_ReplaceTypeResolver",
),
'str_len':
Ufunc(1, 1, Zero,
docstrings.get('numpy.core.umath.str_len'),
None,
),
}

def indent(st, spaces):
Expand Down
153 changes: 153 additions & 0 deletions numpy/_core/code_generators/ufunc_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -4276,3 +4276,156 @@ def add_newdoc(place, name, doc):
--------
str.isalpha
""")

add_newdoc('numpy.core.umath', 'find',
"""
For each element, return the lowest index in the string where
substring `x2` is found, such that `sub` is contained in the
range [`x3`, `x4`].

Parameters
----------
x1 : array_like, with ``bytes_`` or ``unicode_`` dtype

x2 : array_like, with ``bytes_`` or ``unicode_`` dtype

x3 : array_like, with ``int_`` dtype

x4 : array_like, with ``int_`` dtype
$PARAMS

`x3` and `x4` are interpreted as in slice notation.

Returns
-------
y : ndarray
Output array of ints
$OUT_SCALAR_2

See Also
--------
str.find

Examples
--------
>>> a = np.array(["NumPy is a Python library"])
>>> np.core.umath.find(a, "Python", 0, None)
array([11])

""")

add_newdoc('numpy.core.umath', 'rfind',
"""
For each element, return the highest index in the string where
substring `x2` is found, such that `sub` is contained in the
range [`x3`, `x4`].

Parameters
----------
x1 : array_like, with ``bytes_`` or ``unicode_`` dtype

x2 : array_like, with ``bytes_`` or ``unicode_`` dtype

x3 : array_like, with ``int_`` dtype

x4 : array_like, with ``int_`` dtype
$PARAMS

`x3` and `x4` are interpreted as in slice notation.

Returns
-------
y : ndarray
Output array of ints
$OUT_SCALAR_2

See Also
--------
str.find

Examples
--------
>>> a = np.array(["NumPy is a Python library"])
>>> np.core.umath.find(a, "Python", 0, None)
array([11])

""")

add_newdoc('numpy.core.umath', 'count',
"""
Returns an array with the number of non-overlapping occurrences of
substring `x2` in the range [`x3`, `x4`].

Parameters
----------
x1 : array_like, with ``bytes_`` or ``unicode_`` dtype

x2 : array_like, with ``bytes_`` or ``unicode_`` dtype

x3 : array_like, with ``int_`` dtype

x4 : array_like, with ``int_`` dtype
$PARAMS

`x3` and `x4` are interpreted as in slice notation.

Returns
-------
y : ndarray
Output array of ints
$OUT_SCALAR_2

See Also
--------
str.count

Examples
--------
>>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
>>> c
array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
>>> np.core.umath.count(c, 'A')
array([3, 1, 1])
>>> np.core.umath.count(c, 'aA')
array([3, 1, 0])
>>> np.core.umath.count(c, 'A', start=1, end=4)
array([2, 1, 1])
>>> np.core.umath.count(c, 'A', start=1, end=3)
array([1, 0, 0])

""")

add_newdoc('numpy.core.umath', '__replace_impl', '')

add_newdoc('numpy.core.umath', 'str_len',
"""
Return length of ``x1`` element-wise.

Parameters
----------
x : array_like, with ``bytes_`` or ``unicode_`` dtype
$PARAMS

Returns
-------
y : ndarray
Output array of ints
$OUT_SCALAR_1

See Also
--------
len

Examples
--------
>>> a = np.array(['Grace Hopper Conference', 'Open Source Day'])
>>> np.core.umath.str_len(a)
array([23, 15])
>>> a = np.array([u'\u0420', u'\u043e'])
>>> np.core.umath.str_len(a)
array([1, 1])
>>> a = np.array([['hello', 'world'], [u'\u0420', u'\u043e']])
>>> np.core.umath.str_len(a)
array([[5, 5], [1, 1]])

""")
23 changes: 10 additions & 13 deletions numpy/_core/defchararray.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,10 +292,7 @@ def str_len(a):
>>> np.char.str_len(a)
array([[5, 5], [1, 1]])
"""
# Note: __len__, etc. currently return ints, which are not C-integers.
# Generally intp would be expected for lengths, although int is sufficient
# due to the dtype itemsize limitation.
return _vec_string(a, int_, '__len__')
return numpy._core.umath.str_len(a)


@array_function_dispatch(_binary_op_dispatcher)
Expand Down Expand Up @@ -558,7 +555,8 @@ def count(a, sub, start=0, end=None):
array([1, 0, 0])

"""
return _vec_string(a, int_, 'count', [sub, start] + _clean_args(end))
end = end or numpy.iinfo(numpy.int_).max
return numpy._core.umath.count(a, sub, start, end)


def _code_dispatcher(a, encoding=None, errors=None):
Expand Down Expand Up @@ -773,9 +771,8 @@ def find(a, sub, start=0, end=None):
array([11])

"""
return _vec_string(
a, int_, 'find', [sub, start] + _clean_args(end))

end = end or numpy.iinfo(numpy.int_).max
return numpy._core.umath.find(a, sub, start, end)

@array_function_dispatch(_count_dispatcher)
def index(a, sub, start=0, end=None):
Expand Down Expand Up @@ -1270,8 +1267,8 @@ def replace(a, old, new, count=None):
>>> np.char.replace(a, 'is', 'was')
array(['The dwash was fresh', 'Thwas was it'], dtype='<U19')
"""
return _to_bytes_or_str_array(
_vec_string(a, object_, 'replace', [old, new] + _clean_args(count)), a)
count = count or numpy.iinfo(numpy.int_).max
return numpy._core.umath.replace(a, old, new, count)


@array_function_dispatch(_count_dispatcher)
Expand Down Expand Up @@ -1303,8 +1300,8 @@ def rfind(a, sub, start=0, end=None):
str.rfind

"""
return _vec_string(
a, int_, 'rfind', [sub, start] + _clean_args(end))
end = end or numpy.iinfo(numpy.int_).max
return numpy._core.umath.rfind(a, sub, start, end)


@array_function_dispatch(_count_dispatcher)
Expand Down Expand Up @@ -2533,7 +2530,7 @@ def replace(self, old, new, count=None):
char.replace

"""
return asarray(replace(self, old, new, count))
return replace(self, old, new, count)

def rfind(self, sub, start=0, end=None):
"""
Expand Down
Loading