Skip to content

Commit a1aaa3b

Browse files
committed
Implement split() using split_left(); deactivate class Delimiter-related code
1 parent 88279b2 commit a1aaa3b

File tree

3 files changed

+155
-43
lines changed

3 files changed

+155
-43
lines changed

README.md

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -100,12 +100,7 @@ The following table presents types, values and simplified, short prototypes of t
100100

101101
| Kind | Type or function | Notes |
102102
| ----------------- | ----------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------- |
103-
| **Type** | **literal_delimiter** | a string, `literal_delimiter("exact_text")` |
104-
|   | **any_of_delimiter** | any of given characters, `any_of_delimiter("+-")` |
105-
|   | **fixed_delimiter** | fixed length, `fixed_delimiter(length)` |
106-
|   | **limit_delimiter** | apply given delimiter a limited number of times (not implemented) |
107-
|   | **regex_delimiter** | regular expression, `regex_delimiter("regexp")` |
108-
|   | **char_delimiter** | a character, `char_delimiter('&')` |
103+
| **Type** | None |   |
109104
|   |   |   |
110105
| **Value** | size_t **string::npos** | not-found position value, in nonstd::string namespace |
111106
|   |   |   |
@@ -173,7 +168,7 @@ The following table presents types, values and simplified, short prototypes of t
173168
| **Combining** | string **append**(string_view head, string_view tail) | string with tail appended to head |
174169
| &nbsp; | string **join**(collection\<string_view\> vec, string_view sep) | string with elements of collection joined with given separator string |
175170
| &nbsp; | &nbsp; | &nbsp; |
176-
| **Separating** | vector\<string_view\> **split**(string_view sv, string_view set) | vector of string_view with elements of string separated by characters from given set |
171+
| **Separating** | vector\<string_view\> **split**(string_view sv, string_view set \[, Nsplit\]) | vector of string_view with elements of string separated by characters from given set |
177172
| &nbsp; | tuple\<string_view, string_view\> **split_left**(string_view sv, string_view set \[, size_t count\]) | tuple with head and tail string_view on given string as split at left by characters in given set, default all in set |
178173
| &nbsp; | tuple\<string_view, string_view\> **split_right**(string_view sv, string_view set \[, size_t count\]) | tuple with head and tail string_view on given string as split at right by characters in given set, default all in set |
179174

@@ -281,7 +276,7 @@ strip_left: string with characters in set removed from left of string [" \t\n"]
281276
strip_right: string with characters in set removed from right of string [" \t\n"]
282277
strip: string with characters in set removed from left and right of string [" \t\n"]
283278
join: string with strings from collection joined separated by given separator
284-
split: split string into vector of string_view given delimiter - literal_delimiter
279+
split: split string into vector of string_view given set of delimiter characters
285280
split_left: split string into two-element tuple given set of delimiter characters - forward
286281
split_right: split string into two-element tuple given set of delimiter characters - reverse
287282
compare: negative, zero or positive for lsh is less than, equal to or greater than rhs

include/nonstd/string.hpp

Lines changed: 115 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1565,6 +1565,8 @@ replace_all(
15651565
// - regex_delimiter - regular expression delimiter
15661566
// - char_delimiter - single-char delimiter
15671567

1568+
#if 0
1569+
15681570
template< typename CharT >
15691571
string_nodiscard std17::basic_string_view<CharT>
15701572
basic_delimiter_end(std17::basic_string_view<CharT> sv) string_noexcept
@@ -1923,12 +1925,16 @@ typedef basic_regex_delimiter< char32_t> u32regex_delimiter;
19231925
# endif
19241926
#endif
19251927

1928+
#endif // 0 Delimiters
1929+
19261930
// split():
19271931

19281932
namespace string {
19291933
namespace detail {
19301934

1931-
// free function length(), for delimiter:
1935+
// // free function length(), for delimiter:
1936+
1937+
#if 0
19321938

19331939
template< typename Coll >
19341940
string_nodiscard inline std::size_t length( Coll const & coll )
@@ -1938,7 +1944,7 @@ string_nodiscard inline std::size_t length( Coll const & coll )
19381944

19391945
template< typename CharT, typename Delimiter >
19401946
string_nodiscard std::vector< std17::basic_string_view<CharT> >
1941-
split( std17::basic_string_view<CharT> text, Delimiter delimiter, std::size_t Nsplit )
1947+
split_delim( std17::basic_string_view<CharT> text, Delimiter delimiter, std::size_t Nsplit )
19421948
{
19431949
std::vector< std17::basic_string_view<CharT> > result;
19441950

@@ -1960,11 +1966,85 @@ split( std17::basic_string_view<CharT> text, Delimiter delimiter, std::size_t Ns
19601966
return result;
19611967
}
19621968

1969+
#endif // 0
1970+
1971+
template< typename CharT >
1972+
string_nodiscard inline auto
1973+
split_left(
1974+
std17::basic_string_view<CharT> text
1975+
, std17::basic_string_view<CharT> set
1976+
, std::size_t count = std::numeric_limits<std::size_t>::max() )
1977+
-> std::tuple<std17::basic_string_view<CharT>, std17::basic_string_view<CharT>>
1978+
{
1979+
auto const pos = text.find_first_of( set );
1980+
1981+
if ( pos == npos )
1982+
return { text, text };
1983+
1984+
auto const n = (std::min)( count, text.substr( pos ).find_first_not_of( set ) );
1985+
1986+
return { text.substr( 0, pos ), n != npos ? text.substr( pos + n ) : text.substr( text.size(), 0 ) };
1987+
1988+
// Note: `text.substr( text.size(), 0 )` indicates empty and end of text, see `lhs.cend() == text.cend()` in detail::split().
1989+
}
1990+
1991+
template< typename CharT >
1992+
string_nodiscard inline auto
1993+
split_right(
1994+
std17::basic_string_view<CharT> text
1995+
, std17::basic_string_view<CharT> set
1996+
, std::size_t count = std::numeric_limits<std::size_t>::max() )
1997+
-> std::tuple<std17::basic_string_view<CharT>, std17::basic_string_view<CharT>>
1998+
{
1999+
auto const pos = text.find_last_of( set );
2000+
2001+
if ( pos == npos )
2002+
return { text, text };
2003+
2004+
auto const n = (std::min)( count, pos - text.substr( 0, pos ).find_last_not_of( set ) );
2005+
2006+
return { text.substr( 0, pos - n + 1 ), text.substr( pos + 1 ) };
2007+
}
2008+
2009+
template< typename CharT >
2010+
string_nodiscard std::vector< std17::basic_string_view<CharT> >
2011+
split( std17::basic_string_view<CharT> text
2012+
, std17::basic_string_view<CharT> set
2013+
, std::size_t Nsplit )
2014+
{
2015+
std::vector< std17::basic_string_view<CharT> > result;
2016+
2017+
std17::basic_string_view<CharT> lhs = text;
2018+
std17::basic_string_view<CharT> rhs;
2019+
2020+
for( std::size_t cnt = 1; ; ++cnt )
2021+
{
2022+
if ( cnt >= Nsplit )
2023+
{
2024+
result.push_back( lhs ); // push tail:
2025+
break;
2026+
}
2027+
2028+
std::tie(lhs, rhs) = split_left( lhs, set /*, Nset*/ );
2029+
2030+
result.push_back( lhs );
2031+
2032+
if ( lhs.cend() == text.cend() )
2033+
break;
2034+
2035+
lhs = rhs;
2036+
}
2037+
2038+
return std::move( result );
2039+
}
2040+
19632041
} // namespace detail
19642042
} // namespace string
19652043

19662044
// split() -> vector
19672045

2046+
#if 0
2047+
19682048
#define string_MK_SPLIT_DELIM(CharT) \
19692049
template< typename Delimiter \
19702050
, typename std::enable_if< \
@@ -1977,18 +2057,30 @@ split( std17::basic_string_view<CharT> text, Delimiter delimiter, std::size_t Ns
19772057
std17::basic_string_view<CharT> text, Delimiter delimiter \
19782058
, std::size_t count = std::numeric_limits<std::size_t>::max() ) \
19792059
{ \
1980-
return detail::split(text, delimiter, count ); \
2060+
return detail::split_delim(text, delimiter, count ); \
19812061
}
19822062

19832063

19842064
#define string_MK_SPLIT_STRING(CharT) \
19852065
string_nodiscard inline std::vector<std17::basic_string_view<CharT>> \
1986-
split( \
2066+
split_string( \
19872067
std17::basic_string_view<CharT> text \
19882068
, std17::basic_string_view<CharT> set \
19892069
, std::size_t count = std::numeric_limits<std::size_t>::max() ) \
19902070
{ \
1991-
return detail::split(text, basic_literal_delimiter<CharT>(set), count ); \
2071+
return detail::split_delim(text, basic_literal_delimiter<CharT>(set), count ); \
2072+
}
2073+
2074+
#endif // 0
2075+
2076+
#define string_MK_SPLIT(CharT) \
2077+
string_nodiscard inline std::vector< std17::basic_string_view<CharT>> \
2078+
split( \
2079+
std17::basic_string_view<CharT> text \
2080+
, std17::basic_string_view<CharT> set \
2081+
, std::size_t Nsplit = std::numeric_limits<std::size_t>::max() ) \
2082+
{ \
2083+
return detail::split(text, set, Nsplit ); \
19922084
}
19932085

19942086
#if string_CONFIG_PROVIDE_CHAR_T
@@ -2003,14 +2095,7 @@ split_left(
20032095
, std::size_t count = std::numeric_limits<std::size_t>::max() ) \
20042096
-> std::tuple<std17::basic_string_view<CharT>, std17::basic_string_view<CharT>> \
20052097
{ \
2006-
auto const pos = text.find_first_of( set ); \
2007-
\
2008-
if ( pos == npos ) \
2009-
return { text, text }; \
2010-
\
2011-
auto const n = (std::min)( count, text.substr( pos ).find_first_not_of( set ) ); \
2012-
\
2013-
return { text.substr( 0, pos ), n != npos ? text.substr( pos + n ) : text.substr( 0, 0 ) }; \
2098+
return detail::split_left( text, set, count ); \
20142099
}
20152100

20162101
// split_right() -> tuple
@@ -2023,14 +2108,7 @@ split_right(
20232108
, std::size_t count = std::numeric_limits<std::size_t>::max() ) \
20242109
-> std::tuple<std17::basic_string_view<CharT>, std17::basic_string_view<CharT>> \
20252110
{ \
2026-
auto const pos = text.find_last_of( set ); \
2027-
\
2028-
if ( pos == npos ) \
2029-
return { text, text }; \
2030-
\
2031-
auto const n = (std::min)( count, pos - text.substr( 0, pos ).find_last_not_of( set ) ); \
2032-
\
2033-
return { text.substr( 0, pos - n + 1 ), text.substr( pos + 1 ) }; \
2111+
return detail::split_right( text, set, count ); \
20342112
}
20352113

20362114
#endif // string_CONFIG_PROVIDE_CHAR_T
@@ -2147,8 +2225,9 @@ string_MK_TO_CASE_STRING ( char, lowercase )
21472225
string_MK_TO_CASE_STRING ( char, uppercase )
21482226
string_MK_CAPITALIZE ( char )
21492227
string_MK_JOIN ( char )
2150-
string_MK_SPLIT_DELIM ( char )
2151-
string_MK_SPLIT_STRING ( char )
2228+
// string_MK_SPLIT_DELIM ( char )
2229+
// string_MK_SPLIT_STRING ( char )
2230+
string_MK_SPLIT ( char )
21522231
string_MK_SPLIT_LEFT ( char )
21532232
string_MK_SPLIT_RIGHT ( char )
21542233

@@ -2216,8 +2295,9 @@ string_MK_TO_CASE_STRING ( wchar_t, lowercase )
22162295
string_MK_TO_CASE_STRING ( wchar_t, uppercase )
22172296
string_MK_CAPITALIZE ( wchar_t )
22182297
string_MK_JOIN ( wchar_t )
2219-
string_MK_SPLIT_DELIM ( wchar_t )
2220-
string_MK_SPLIT_STRING ( wchar_t )
2298+
// string_MK_SPLIT_DELIM ( wchar_t )
2299+
// string_MK_SPLIT_STRING ( wchar_t )
2300+
string_MK_SPLIT ( wchar_t )
22212301
string_MK_SPLIT_LEFT ( wchar_t )
22222302
string_MK_SPLIT_RIGHT ( wchar_t )
22232303
// ...
@@ -2285,8 +2365,9 @@ string_MK_TO_CASE_STRING ( char8_t, lowercase )
22852365
string_MK_TO_CASE_STRING ( char8_t, uppercase )
22862366
string_MK_CAPITALIZE ( char8_t )
22872367
string_MK_JOIN ( char8_t )
2288-
string_MK_SPLIT_DELIM ( char8_t )
2289-
string_MK_SPLIT_STRING ( char8_t )
2368+
// string_MK_SPLIT_DELIM ( char8_t )
2369+
// string_MK_SPLIT_STRING ( char8_t )
2370+
string_MK_SPLIT ( char8_t )
22902371
string_MK_SPLIT_LEFT ( char8_t )
22912372
string_MK_SPLIT_RIGHT ( char8_t )
22922373
// ...
@@ -2354,8 +2435,9 @@ string_MK_TO_CASE_STRING ( char16_t, lowercase )
23542435
string_MK_TO_CASE_STRING ( char16_t, uppercase )
23552436
string_MK_CAPITALIZE ( char16_t )
23562437
string_MK_JOIN ( char16_t )
2357-
string_MK_SPLIT_DELIM ( char16_t )
2358-
string_MK_SPLIT_STRING ( char16_t )
2438+
// string_MK_SPLIT_DELIM ( char16_t )
2439+
// string_MK_SPLIT_STRING ( char16_t )
2440+
string_MK_SPLIT ( char16_t )
23592441
string_MK_SPLIT_LEFT ( char16_t )
23602442
string_MK_SPLIT_RIGHT ( char16_t )
23612443
// ...
@@ -2423,8 +2505,9 @@ string_MK_TO_CASE_STRING ( char32_t, lowercase )
24232505
string_MK_TO_CASE_STRING ( char32_t, uppercase )
24242506
string_MK_CAPITALIZE ( char32_t )
24252507
string_MK_JOIN ( char32_t )
2426-
string_MK_SPLIT_DELIM ( char32_t )
2427-
string_MK_SPLIT_STRING ( char32_t )
2508+
// string_MK_SPLIT_DELIM ( char32_t )
2509+
// string_MK_SPLIT_STRING ( char32_t )
2510+
string_MK_SPLIT ( char32_t )
24282511
string_MK_SPLIT_LEFT ( char32_t )
24292512
string_MK_SPLIT_RIGHT ( char32_t )
24302513
// ...
@@ -2494,6 +2577,7 @@ string_MK_COMPARE_GT ( char32_t )
24942577
#undef string_MK_JOIN
24952578
#undef string_MK_SPLIT_DELIM
24962579
#undef string_MK_SPLIT_STRING
2580+
#undef string_MK_SPLIT
24972581
#undef string_MK_SPLIT_LEFT
24982582
#undef string_MK_SPLIT_RIGHT
24992583
#undef string_MK_COMPARE

test/string.t.cpp

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -674,23 +674,56 @@ CASE( "join: string with strings from collection joined separated by given separ
674674
// - char_delimiter - single-char delimiter
675675
// - above as empty limiters
676676

677+
#if 0
678+
677679
CASE( "split: split string into vector of string_view given delimiter - literal_delimiter" )
678680
{
679681
std::vector<std::string> golden( make_vec_of_strings() );
680682

681-
EXPECT( split("abc..def..ghi", "..") == golden );
683+
// EXPECT( split("abc..def..ghi", "..") == golden );
682684
EXPECT( split("abc..def..ghi", literal_delimiter("..")) == golden );
683685
}
684686

685-
#if 0
686-
687687
CASE( "split: split string into vector of string_view given delimiter - literal_delimiter" )
688688
{
689689
std::vector<std::string> golden( make_vec_of_strings("", "abc", "def") );
690690

691-
EXPECT( split("-abc-def", "-") == golden );
691+
EXPECT( split_string("-abc-def", "-") == golden );
692+
}
693+
694+
#endif // 0
695+
696+
CASE( "split: split string into vector of string_view given set of delimiter characters" )
697+
{
698+
// single separator, single character set - in between:
699+
{
700+
std::vector<std::string> golden( make_vec_of_strings("abc", "def", "ghi") );
701+
EXPECT( split("abc-def-ghi", "-") == golden );
702+
}
703+
// single separator, single character set - at start and in between:
704+
{
705+
std::vector<std::string> golden( make_vec_of_strings("", "abc", "def") );
706+
EXPECT( split("-abc-def", "-") == golden );
707+
}
708+
// single separator, single character set - at start and at end:
709+
{
710+
std::vector<std::string> golden( make_vec_of_strings("", "abc", "") );
711+
EXPECT( split("-abc-", "-") == golden );
712+
}
713+
// multiple and single separator, multiple characters in set - in between:
714+
{
715+
std::vector<std::string> golden( make_vec_of_strings("abc", "123", "xyz") );
716+
EXPECT( split("abc,;:123;xyz", ":;,") == golden );
717+
}
718+
// single separator, single character set - max number of split results (3):
719+
{
720+
std::vector<std::string> golden( make_vec_of_strings("abc", "123", "xyz-789") );
721+
EXPECT( split("abc-123-xyz-789", "-", 3) == golden );
722+
}
692723
}
693724

725+
#if 0
726+
694727
CASE( "split: split string into vector of string_view given delimiter - literal_delimiter" )
695728
{
696729
std::vector<std::string> golden( make_vec_of_strings("abc", "", "def") );

0 commit comments

Comments
 (0)