diff --git a/include/sparrow/types/data_type.hpp b/include/sparrow/types/data_type.hpp index bda2088f6..e34fa8aa1 100644 --- a/include/sparrow/types/data_type.hpp +++ b/include/sparrow/types/data_type.hpp @@ -955,18 +955,4 @@ namespace sparrow } } -template <> -struct std::formatter -{ - constexpr auto parse(std::format_parse_context& ctx) - { - return ctx.begin(); // Simple implementation - } - - auto format(const std::byte& b, std::format_context& ctx) const - { - return std::format_to(ctx.out(), "{}", static_cast(b)); - } -}; - #endif diff --git a/include/sparrow/utils/format.hpp b/include/sparrow/utils/format.hpp index 43edef173..b695bba7b 100644 --- a/include/sparrow/utils/format.hpp +++ b/include/sparrow/utils/format.hpp @@ -17,15 +17,122 @@ #include #include +#include #include #include #include #include +#include #include #include #include "sparrow/utils/contracts.hpp" +namespace sparrow::detail +{ + struct sequence_format_spec + { + char fill = ' '; + char align = '>'; // '<', '>', '^' + std::size_t width = 0; + + // Parse: [[fill]align] [width] + // Grammar subset: (fill? align?) width? + template + constexpr It parse(It it, It end) + { + if (it == end || *it == '}') + { + return it; + } + + // Detect [fill][align] or [align] + auto next = it; + if (next != end) + { + ++next; + if (next != end && (*next == '<' || *next == '>' || *next == '^') && *it != '<' && *it != '>' + && *it != '^') + { + fill = *it; + align = *next; + it = ++next; + } + else if (*it == '<' || *it == '>' || *it == '^') + { + align = *it; + ++it; + } + } + + // Parse width + std::size_t w = 0; + bool has_w = false; + while (it != end && *it >= '0' && *it <= '9') + { + has_w = true; + w = w * 10 + static_cast(*it - '0'); + ++it; + } + if (has_w) + { + width = w; + } + + // Ignore (silently) everything until '}' (keeps constexpr friendliness) + while (it != end && *it != '}') + { + ++it; + } + + return it; + } + + std::string apply_alignment(std::string inner) const + { + if (width <= inner.size()) + { + return inner; + } + + const std::size_t pad = width - inner.size(); + switch (align) + { + case '<': + return inner + std::string(pad, fill); + case '^': + { + std::size_t left = pad / 2; + std::size_t right = pad - left; + return std::string(left, fill) + inner + std::string(right, fill); + } + case '>': + default: + return std::string(pad, fill) + inner; + } + } + + template + std::string build_core(const Seq& seq) const + { + std::string core; + core.push_back('<'); + bool first = true; + for (auto&& elem : seq) + { + if (!first) + { + core.append(", "); + } + std::format_to(std::back_inserter(core), "{}", elem); + first = false; + } + core.push_back('>'); + return core; + } + }; +} // namespace sparrow::detail + namespace std { template @@ -56,6 +163,25 @@ namespace std std::string m_format_string = "{:"; }; + + template <> + struct formatter + { + constexpr auto parse(format_parse_context& ctx) + { + return m_underlying_formatter.parse(ctx); + } + + auto format(std::byte b, std::format_context& ctx) const + { + return std::format_to(ctx.out(), "{:#04x}", std::to_integer(b)); + } + + private: + + // Store the parsed format specification + std::formatter m_underlying_formatter; + }; } namespace sparrow @@ -69,12 +195,35 @@ namespace sparrow template concept RangeOfFormats = std::ranges::range && Format>; + constexpr size_t size_of_utf8(const std::string_view str) + { + size_t size = 0; + for (const char c : str) + { + if ((c & 0xC0) != 0x80) + { + ++size; + } + } + return size; + } + constexpr size_t max_width(const std::ranges::input_range auto& data) { size_t max_width = 0; for (const auto& value : data) { - max_width = std::max(max_width, std::format("{}", value).size()); + if constexpr (std::is_same_v>, std::string> + || std::is_same_v>, std::string_view> + || std::is_same_v>, const char*> + || std::is_same_v>, char*>) + { + max_width = std::max(max_width, size_of_utf8(value)); + } + else + { + max_width = std::max(max_width, std::format("{}", value).size()); + } } return max_width; } diff --git a/include/sparrow/utils/nullable.hpp b/include/sparrow/utils/nullable.hpp index b14a785a3..5e40c4f7a 100644 --- a/include/sparrow/utils/nullable.hpp +++ b/include/sparrow/utils/nullable.hpp @@ -1492,7 +1492,7 @@ struct std::formatter> } else { - return std::format_to(ctx.out(), "{}", "null"); + return std::vformat_to(ctx.out(), m_format_string, std::make_format_args("null")); } } diff --git a/include/sparrow/utils/ranges.hpp b/include/sparrow/utils/ranges.hpp index ff812c004..ed038e592 100644 --- a/include/sparrow/utils/ranges.hpp +++ b/include/sparrow/utils/ranges.hpp @@ -20,6 +20,8 @@ #if defined(__cpp_lib_format) # include + +# include "sparrow/utils/format.hpp" #endif #include "sparrow/utils/mp_utils.hpp" @@ -135,29 +137,19 @@ struct std::formatter> { constexpr auto parse(std::format_parse_context& ctx) { - return ctx.begin(); // Simple implementation + return m_spec.parse(ctx.begin(), ctx.end()); } auto format(const std::array& array, std::format_context& ctx) const { - auto out = ctx.out(); - *out++ = '<'; + std::string core = m_spec.build_core(array); + std::string out_str = m_spec.apply_alignment(std::move(core)); + return std::ranges::copy(out_str, ctx.out()).out; + } - bool first = true; - for (const auto& elem : array) - { - if (!first) - { - *out++ = ','; - *out++ = ' '; - } - out = std::format_to(out, "{}", elem); - first = false; - } +private: - *out++ = '>'; - return out; - } + sparrow::detail::sequence_format_spec m_spec; }; template @@ -165,29 +157,19 @@ struct std::formatter> { constexpr auto parse(std::format_parse_context& ctx) { - return ctx.begin(); // Simple implementation + return m_spec.parse(ctx.begin(), ctx.end()); } auto format(const std::vector& vector, std::format_context& ctx) const { - auto out = ctx.out(); - *out++ = '<'; + std::string core = m_spec.build_core(vector); + std::string out_str = m_spec.apply_alignment(std::move(core)); + return std::ranges::copy(out_str, ctx.out()).out; + } - bool first = true; - for (const auto& elem : vector) - { - if (!first) - { - *out++ = ','; - *out++ = ' '; - } - out = std::format_to(out, "{}", elem); - first = false; - } +private: - *out++ = '>'; - return out; - } + sparrow::detail::sequence_format_spec m_spec; }; template @@ -195,29 +177,19 @@ struct std::formatter> { constexpr auto parse(std::format_parse_context& ctx) { - return ctx.begin(); // Simple implementation + return m_spec.parse(ctx.begin(), ctx.end()); } auto format(const std::bitset& vector, std::format_context& ctx) const { - auto out = ctx.out(); - *out++ = '<'; + std::string core = m_spec.build_core(vector); + std::string out_str = m_spec.apply_alignment(std::move(core)); + return std::ranges::copy(out_str, ctx.out()).out; + } - bool first = true; - for (const auto& elem : vector) - { - if (!first) - { - *out++ = ','; - *out++ = ' '; - } - out = std::format_to(out, "{}", elem); - first = false; - } +private: - *out++ = '>'; - return out; - } + sparrow::detail::sequence_format_spec m_spec; }; #endif diff --git a/include/sparrow/utils/sequence_view.hpp b/include/sparrow/utils/sequence_view.hpp index c2007dcfb..13e54579f 100644 --- a/include/sparrow/utils/sequence_view.hpp +++ b/include/sparrow/utils/sequence_view.hpp @@ -20,7 +20,7 @@ #include #if defined(__cpp_lib_format) -# include +# include "sparrow/utils/format.hpp" #endif #include "sparrow/utils/mp_utils.hpp" @@ -132,21 +132,19 @@ struct std::formatter> { constexpr auto parse(std::format_parse_context& ctx) { - return ctx.begin(); // Simple implementation + return m_spec.parse(ctx.begin(), ctx.end()); } auto format(const sparrow::sequence_view& vec, std::format_context& ctx) const { - std::format_to(ctx.out(), "<"); - if (!vec.empty()) - { - for (std::size_t i = 0; i < vec.size() - 1; ++i) - { - std::format_to(ctx.out(), "{}, ", vec[i]); - } - } - return std::format_to(ctx.out(), "{}>", vec.back()); + std::string core = m_spec.build_core(vec); + std::string out_str = m_spec.apply_alignment(std::move(core)); + return std::ranges::copy(out_str, ctx.out()).out; } + +private: + + sparrow::detail::sequence_format_spec m_spec; }; namespace sparrow diff --git a/test/test_binary_array.cpp b/test/test_binary_array.cpp index de27702f1..968d61b50 100644 --- a/test/test_binary_array.cpp +++ b/test/test_binary_array.cpp @@ -13,30 +13,22 @@ // limitations under the License. #include -#include #include -#include "sparrow/arrow_interface/arrow_array.hpp" #include "sparrow/arrow_interface/arrow_array_schema_proxy.hpp" -#include "sparrow/arrow_interface/arrow_schema.hpp" #include "sparrow/c_interface.hpp" #include "sparrow/utils/nullable.hpp" #include "sparrow/variable_size_binary_array.hpp" #include "../test/external_array_data_creation.hpp" -#include "../test/metadata_sample.hpp" #include "doctest/doctest.h" #include "test_utils.hpp" namespace sparrow { - // Type list for testing both binary_array and big_binary_array - using binary_array_types = std::tuple; - - template struct binary_array_fixture { - using layout_type = T; + using layout_type = binary_array; binary_array_fixture() : m_arrow_proxy(create_arrow_proxy()) @@ -53,105 +45,25 @@ namespace sparrow private: - static_assert(is_binary_array_v || is_big_binary_array_v); - static_assert(std::same_as); + static_assert(is_binary_array_v); + static_assert(std::same_as); static_assert( - std::same_as> + std::same_as> ); - static_assert(std::same_as); - using const_value_iterator = typename layout_type::const_value_iterator; - static_assert(std::same_as); + static_assert(std::same_as); + using const_value_iterator = layout_type::const_value_iterator; + static_assert(std::same_as); - static_assert(std::same_as); + static_assert(std::same_as); arrow_proxy create_arrow_proxy() { ArrowSchema schema{}; ArrowArray array{}; const std::vector false_bitmap{m_false_bitmap.begin(), m_false_bitmap.end()}; - - if constexpr (std::same_as) - { - test::fill_schema_and_array>(schema, array, m_length, m_offset, false_bitmap); - } - else if constexpr (std::same_as) - { - fill_big_binary_schema_and_array(schema, array, m_length, m_offset, false_bitmap); - } - + test::fill_schema_and_array>(schema, array, m_length, m_offset, false_bitmap); return arrow_proxy{std::move(array), std::move(schema)}; } - - private: - - void fill_big_binary_schema_and_array( - ArrowSchema& schema, - ArrowArray& arr, - size_t size, - size_t offset, - const std::vector& false_bitmap - ) - { - const repeat_view children_ownership(true, 0); - - sparrow::fill_arrow_schema( - schema, - std::string_view("Z"), // Large binary format - "test", - metadata_sample_opt, - std::nullopt, - nullptr, - children_ownership, - nullptr, - true - ); - - using buffer_type = sparrow::buffer; - - auto bytes = test::make_testing_bytes(size); - std::size_t value_size = std::accumulate( - bytes.cbegin(), - bytes.cbegin() + std::ptrdiff_t(size), - std::size_t(0), - [](std::size_t res, const auto& s) - { - return res + s.size(); - } - ); - - buffer_type offset_buf(sizeof(std::int64_t) * (size + 1)); // Use int64_t for big binary - buffer_type value_buf(sizeof(char) * value_size); - { - std::int64_t* offset_data = offset_buf.data(); // Use int64_t - offset_data[0] = 0; - byte_t* ptr = value_buf.data(); - for (std::size_t i = 0; i < size; ++i) - { - offset_data[i + 1] = offset_data[i] + static_cast(bytes[i].size()); // Use - // int64_t - sparrow::ranges::copy(bytes[i], ptr); - ptr += bytes[i].size(); - } - } - - std::vector arr_buffs = { - sparrow::test::make_bitmap_buffer(size, false_bitmap), - std::move(offset_buf), - std::move(value_buf) - }; - - sparrow::fill_arrow_array( - arr, - static_cast(size - offset), - static_cast(false_bitmap.size()), - static_cast(offset), - std::move(arr_buffs), - nullptr, - children_ownership, - nullptr, - true - ); - } }; template @@ -194,208 +106,187 @@ namespace sparrow } } - TEST_CASE_TEMPLATE_DEFINE("constructor", T, constructor_id) + TEST_CASE_FIXTURE(binary_array_fixture, "constructor") { - binary_array_fixture fixture; - using layout_type = T; - SUBCASE("copy arrow_proxy") { - CHECK_NOTHROW(layout_type(fixture.m_arrow_proxy)); + CHECK_NOTHROW(layout_type(m_arrow_proxy)); } SUBCASE("move arrow_proxy") { - CHECK_NOTHROW(layout_type(std::move(fixture.m_arrow_proxy))); + CHECK_NOTHROW(layout_type(std::move(m_arrow_proxy))); } } - TEST_CASE_TEMPLATE_DEFINE("copy", T, copy_id) + TEST_CASE_FIXTURE(binary_array_fixture, "copy") { - binary_array_fixture fixture; - using layout_type = T; - - layout_type ar(fixture.m_arrow_proxy); + layout_type ar(m_arrow_proxy); layout_type ar2(ar); CHECK_EQ(ar, ar2); - layout_type ar3(std::move(fixture.m_arrow_proxy)); + layout_type ar3(std::move(m_arrow_proxy)); ar3 = ar2; CHECK_EQ(ar2, ar3); } - TEST_CASE_TEMPLATE_DEFINE("move", T, move_id) + TEST_CASE_FIXTURE(binary_array_fixture, "move") { - binary_array_fixture fixture; - using layout_type = T; - - layout_type ar(fixture.m_arrow_proxy); + layout_type ar(m_arrow_proxy); layout_type ar2(ar); layout_type ar3(std::move(ar)); CHECK_EQ(ar2, ar3); - layout_type ar4(std::move(fixture.m_arrow_proxy)); + layout_type ar4(std::move(m_arrow_proxy)); ar4 = std::move(ar3); CHECK_EQ(ar2, ar4); } - TEST_CASE_TEMPLATE_DEFINE("size", T, size_id) + TEST_CASE_FIXTURE(binary_array_fixture, "size") { - binary_array_fixture fixture; - using layout_type = T; - - const layout_type array(std::move(fixture.m_arrow_proxy)); - CHECK_EQ(array.size(), fixture.m_length - fixture.m_offset); + const layout_type array(std::move(m_arrow_proxy)); + CHECK_EQ(array.size(), m_length - m_offset); } - TEST_CASE_TEMPLATE_DEFINE("operator[]", T, operator_bracket_id) + TEST_CASE_FIXTURE(binary_array_fixture, "operator[]") { - binary_array_fixture fixture; - using layout_type = T; - - std::vector> words = test::make_testing_bytes(fixture.m_length); + std::vector> words = test::make_testing_bytes(m_length); SUBCASE("const") { - const layout_type array(std::move(fixture.m_arrow_proxy)); - REQUIRE_EQ(array.size(), fixture.m_length - fixture.m_offset); + const layout_type array(std::move(m_arrow_proxy)); + REQUIRE_EQ(array.size(), m_length - m_offset); const auto cref0 = array[0]; REQUIRE(cref0.has_value()); - CHECK_EQ(cref0.get(), words[fixture.m_offset]); + CHECK_EQ(cref0.get(), words[m_offset]); const auto cref1 = array[1]; REQUIRE_FALSE(cref1.has_value()); const auto cref2 = array[2]; REQUIRE(cref2.has_value()); - CHECK_EQ(cref2.get(), words[fixture.m_offset + 2]); + CHECK_EQ(cref2.get(), words[m_offset + 2]); const auto cref3 = array[3]; REQUIRE(cref3.has_value()); - CHECK_EQ(cref3.get(), words[fixture.m_offset + 3]); + CHECK_EQ(cref3.get(), words[m_offset + 3]); const auto cref4 = array[4]; REQUIRE_FALSE(cref4.has_value()); const auto cref5 = array[5]; REQUIRE(cref5.has_value()); - CHECK_EQ(cref5.get(), words[fixture.m_offset + 5]); + CHECK_EQ(cref5.get(), words[m_offset + 5]); const auto cref6 = array[6]; REQUIRE(cref6.has_value()); - CHECK_EQ(cref6.get(), words[fixture.m_offset + 6]); + CHECK_EQ(cref6.get(), words[m_offset + 6]); const auto cref7 = array[7]; REQUIRE(cref7.has_value()); - CHECK_EQ(cref7.get(), words[fixture.m_offset + 7]); + CHECK_EQ(cref7.get(), words[m_offset + 7]); const auto cref8 = array[8]; REQUIRE(cref8.has_value()); - CHECK_EQ(cref8.get(), words[fixture.m_offset + 8]); + CHECK_EQ(cref8.get(), words[m_offset + 8]); } SUBCASE("mutable") { - layout_type array(std::move(fixture.m_arrow_proxy)); - REQUIRE_EQ(array.size(), fixture.m_length - fixture.m_offset); + layout_type array(std::move(m_arrow_proxy)); + REQUIRE_EQ(array.size(), m_length - m_offset); auto ref0 = array[0]; REQUIRE(ref0.has_value()); - CHECK_EQ(ref0.get(), words[fixture.m_offset]); + CHECK_EQ(ref0.get(), words[m_offset]); auto ref1 = array[1]; REQUIRE_FALSE(ref1.has_value()); auto ref2 = array[2]; REQUIRE(ref2.has_value()); - CHECK_EQ(ref2.get(), words[fixture.m_offset + 2]); + CHECK_EQ(ref2.get(), words[m_offset + 2]); auto ref3 = array[3]; REQUIRE(ref3.has_value()); - CHECK_EQ(ref3.get(), words[fixture.m_offset + 3]); + CHECK_EQ(ref3.get(), words[m_offset + 3]); auto ref4 = array[4]; REQUIRE_FALSE(ref4.has_value()); auto ref5 = array[5]; REQUIRE(ref5.has_value()); - CHECK_EQ(ref5.get(), words[fixture.m_offset + 5]); + CHECK_EQ(ref5.get(), words[m_offset + 5]); auto ref6 = array[6]; REQUIRE(ref6.has_value()); - CHECK_EQ(ref6.get(), words[fixture.m_offset + 6]); + CHECK_EQ(ref6.get(), words[m_offset + 6]); auto ref7 = array[7]; REQUIRE(ref7.has_value()); - CHECK_EQ(ref7.get(), words[fixture.m_offset + 7]); + CHECK_EQ(ref7.get(), words[m_offset + 7]); auto ref8 = array[8]; REQUIRE(ref8.has_value()); - CHECK_EQ(ref8.get(), words[fixture.m_offset + 8]); + CHECK_EQ(ref8.get(), words[m_offset + 8]); using bytes_type = std::vector; bytes_type word61 = {byte_t(14), byte_t(15)}; array[6] = make_nullable(bytes_type(word61)); CHECK_EQ(ref6.get(), word61); - CHECK_EQ(ref7.get(), words[fixture.m_offset + 7]); - CHECK_EQ(ref8.get(), words[fixture.m_offset + 8]); + CHECK_EQ(ref7.get(), words[m_offset + 7]); + CHECK_EQ(ref8.get(), words[m_offset + 8]); bytes_type word62 = {byte_t(17)}; array[6] = make_nullable(bytes_type(word62)); CHECK_EQ(ref6.get(), word62); - CHECK_EQ(ref7.get(), words[fixture.m_offset + 7]); - CHECK_EQ(ref8.get(), words[fixture.m_offset + 8]); + CHECK_EQ(ref7.get(), words[m_offset + 7]); + CHECK_EQ(ref8.get(), words[m_offset + 8]); } } - TEST_CASE_TEMPLATE_DEFINE("value", T, value_id) + TEST_CASE_FIXTURE(binary_array_fixture, "value") { - binary_array_fixture fixture; - using layout_type = T; - - std::vector> words = test::make_testing_bytes(fixture.m_length); + std::vector> words = test::make_testing_bytes(m_length); SUBCASE("const") { - const layout_type array(std::move(fixture.m_arrow_proxy)); - CHECK_EQ(array.value(0), words[fixture.m_offset]); - CHECK_EQ(array.value(1), words[fixture.m_offset + 1]); - CHECK_EQ(array.value(2), words[fixture.m_offset + 2]); - CHECK_EQ(array.value(3), words[fixture.m_offset + 3]); - CHECK_EQ(array.value(4), words[fixture.m_offset + 4]); - CHECK_EQ(array.value(5), words[fixture.m_offset + 5]); - CHECK_EQ(array.value(6), words[fixture.m_offset + 6]); + const layout_type array(std::move(m_arrow_proxy)); + CHECK_EQ(array.value(0), words[m_offset]); + CHECK_EQ(array.value(1), words[m_offset + 1]); + CHECK_EQ(array.value(2), words[m_offset + 2]); + CHECK_EQ(array.value(3), words[m_offset + 3]); + CHECK_EQ(array.value(4), words[m_offset + 4]); + CHECK_EQ(array.value(5), words[m_offset + 5]); + CHECK_EQ(array.value(6), words[m_offset + 6]); } SUBCASE("mutable") { - layout_type array(std::move(fixture.m_arrow_proxy)); - CHECK_EQ(array.value(0), words[fixture.m_offset]); - CHECK_EQ(array.value(1), words[fixture.m_offset + 1]); - CHECK_EQ(array.value(2), words[fixture.m_offset + 2]); - CHECK_EQ(array.value(3), words[fixture.m_offset + 3]); - CHECK_EQ(array.value(4), words[fixture.m_offset + 4]); - CHECK_EQ(array.value(5), words[fixture.m_offset + 5]); - CHECK_EQ(array.value(6), words[fixture.m_offset + 6]); - CHECK_EQ(array.value(7), words[fixture.m_offset + 7]); - CHECK_EQ(array.value(8), words[fixture.m_offset + 8]); + layout_type array(std::move(m_arrow_proxy)); + CHECK_EQ(array.value(0), words[m_offset]); + CHECK_EQ(array.value(1), words[m_offset + 1]); + CHECK_EQ(array.value(2), words[m_offset + 2]); + CHECK_EQ(array.value(3), words[m_offset + 3]); + CHECK_EQ(array.value(4), words[m_offset + 4]); + CHECK_EQ(array.value(5), words[m_offset + 5]); + CHECK_EQ(array.value(6), words[m_offset + 6]); + CHECK_EQ(array.value(7), words[m_offset + 7]); + CHECK_EQ(array.value(8), words[m_offset + 8]); using bytes_type = std::vector; bytes_type word61 = {byte_t(14), byte_t(15)}; array.value(6) = word61; CHECK_EQ(array.value(6), word61); - CHECK_EQ(array.value(7), words[fixture.m_offset + 7]); - CHECK_EQ(array.value(8), words[fixture.m_offset + 8]); + CHECK_EQ(array.value(7), words[m_offset + 7]); + CHECK_EQ(array.value(8), words[m_offset + 8]); bytes_type word62 = {byte_t(17)}; array.value(6) = word62; CHECK_EQ(array.value(6), word62); - CHECK_EQ(array.value(7), words[fixture.m_offset + 7]); - CHECK_EQ(array.value(8), words[fixture.m_offset + 8]); + CHECK_EQ(array.value(7), words[m_offset + 7]); + CHECK_EQ(array.value(8), words[m_offset + 8]); } } - TEST_CASE_TEMPLATE_DEFINE("const_bitmap_iterator", T, const_bitmap_iterator_id) + TEST_CASE_FIXTURE(binary_array_fixture, "const_bitmap_iterator") { - binary_array_fixture fixture; - using layout_type = T; - SUBCASE("ordering") { - const layout_type array(std::move(fixture.m_arrow_proxy)); + const layout_type array(std::move(m_arrow_proxy)); const auto array_bitmap = array.bitmap(); CHECK(array_bitmap.begin() < array_bitmap.end()); } SUBCASE("equality") { - const layout_type array(std::move(fixture.m_arrow_proxy)); + const layout_type array(std::move(m_arrow_proxy)); const auto array_bitmap = array.bitmap(); - typename layout_type::const_bitmap_iterator citer = array_bitmap.begin(); + layout_type::const_bitmap_iterator citer = array_bitmap.begin(); CHECK(*citer); CHECK_FALSE(*(++citer)); CHECK(*(++citer)); @@ -408,53 +299,50 @@ namespace sparrow } } - TEST_CASE_TEMPLATE_DEFINE("iterator", T, iterator_id) + TEST_CASE_FIXTURE(binary_array_fixture, "iterator") { - binary_array_fixture fixture; - using layout_type = T; - - std::vector> words = test::make_testing_bytes(fixture.m_length); + std::vector> words = test::make_testing_bytes(m_length); SUBCASE("const") { - const layout_type array(std::move(fixture.m_arrow_proxy)); + const layout_type array(std::move(m_arrow_proxy)); auto it = array.cbegin(); REQUIRE(it->has_value()); - CHECK_EQ(it->value(), words[fixture.m_offset]); + CHECK_EQ(it->value(), words[m_offset]); CHECK_EQ(*it, make_nullable(array[0].value())); ++it; CHECK_FALSE(it->has_value()); - CHECK_EQ(it->get(), words[fixture.m_offset + 1]); + CHECK_EQ(it->get(), words[m_offset + 1]); ++it; REQUIRE(it->has_value()); - CHECK_EQ(it->get(), words[fixture.m_offset + 2]); + CHECK_EQ(it->get(), words[m_offset + 2]); ++it; REQUIRE(it->has_value()); - CHECK_EQ(it->get(), words[fixture.m_offset + 3]); + CHECK_EQ(it->get(), words[m_offset + 3]); ++it; CHECK_FALSE(it->has_value()); - CHECK_EQ(it->get(), words[fixture.m_offset + 4]); + CHECK_EQ(it->get(), words[m_offset + 4]); ++it; REQUIRE(it->has_value()); - CHECK_EQ(it->get(), words[fixture.m_offset + 5]); + CHECK_EQ(it->get(), words[m_offset + 5]); ++it; REQUIRE(it->has_value()); - CHECK_EQ(it->get(), words[fixture.m_offset + 6]); + CHECK_EQ(it->get(), words[m_offset + 6]); ++it; REQUIRE(it->has_value()); - CHECK_EQ(it->get(), words[fixture.m_offset + 7]); + CHECK_EQ(it->get(), words[m_offset + 7]); ++it; REQUIRE(it->has_value()); - CHECK_EQ(it->get(), words[fixture.m_offset + 8]); + CHECK_EQ(it->get(), words[m_offset + 8]); ++it; CHECK_EQ(it, array.end()); @@ -462,44 +350,44 @@ namespace sparrow SUBCASE("non const") { - layout_type array(std::move(fixture.m_arrow_proxy)); + layout_type array(std::move(m_arrow_proxy)); auto it = array.begin(); REQUIRE(it->has_value()); - CHECK_EQ(it->value(), words[fixture.m_offset]); + CHECK_EQ(it->value(), words[m_offset]); CHECK_EQ(*it, make_nullable(array[0].value())); ++it; CHECK_FALSE(it->has_value()); - CHECK_EQ(it->get(), words[fixture.m_offset + 1]); + CHECK_EQ(it->get(), words[m_offset + 1]); ++it; REQUIRE(it->has_value()); - CHECK_EQ(it->get(), words[fixture.m_offset + 2]); + CHECK_EQ(it->get(), words[m_offset + 2]); ++it; REQUIRE(it->has_value()); - CHECK_EQ(it->get(), words[fixture.m_offset + 3]); + CHECK_EQ(it->get(), words[m_offset + 3]); ++it; CHECK_FALSE(it->has_value()); - CHECK_EQ(it->get(), words[fixture.m_offset + 4]); + CHECK_EQ(it->get(), words[m_offset + 4]); ++it; REQUIRE(it->has_value()); - CHECK_EQ(it->get(), words[fixture.m_offset + 5]); + CHECK_EQ(it->get(), words[m_offset + 5]); ++it; REQUIRE(it->has_value()); - CHECK_EQ(it->get(), words[fixture.m_offset + 6]); + CHECK_EQ(it->get(), words[m_offset + 6]); ++it; REQUIRE(it->has_value()); - CHECK_EQ(it->get(), words[fixture.m_offset + 7]); + CHECK_EQ(it->get(), words[m_offset + 7]); ++it; REQUIRE(it->has_value()); - CHECK_EQ(it->get(), words[fixture.m_offset + 8]); + CHECK_EQ(it->get(), words[m_offset + 8]); ++it; CHECK_EQ(it, array.end()); @@ -513,16 +401,13 @@ namespace sparrow CHECK_EQ(it->get(), word61); ++it; REQUIRE(it->has_value()); - CHECK_EQ(it->get(), words[fixture.m_offset + 8]); + CHECK_EQ(it->get(), words[m_offset + 8]); } } - TEST_CASE_TEMPLATE_DEFINE("zero_null_values", T, zero_null_values_id) + TEST_CASE_FIXTURE(binary_array_fixture, "zero_null_values") { - binary_array_fixture fixture; - using layout_type = T; - - layout_type array(std::move(fixture.m_arrow_proxy)); + layout_type array(std::move(m_arrow_proxy)); array.zero_null_values(); // CHECK that all null values are set to empty vector for (auto&& i : array) @@ -534,41 +419,14 @@ namespace sparrow } } #if defined(__cpp_lib_format) - TEST_CASE_TEMPLATE_DEFINE("formatting", T, formatting_id) + TEST_CASE_FIXTURE(binary_array_fixture, "formatting") { - binary_array_fixture fixture; - using layout_type = T; - - const layout_type array(std::move(fixture.m_arrow_proxy)); + const layout_type array(std::move(m_arrow_proxy)); const std::string formatted = std::format("{}", array); - - if constexpr (std::same_as) - { - constexpr std::string_view - expected = "Binary [name=test | size=9] <<1, 1, 255, 0>, null, <2, 3>, <3, 5, 255>, null, <8, 13>, <13, 21, 251, 8>, <21, 34, 248>, <34, 55>>"; - CHECK_EQ(formatted, expected); - } - else if constexpr (std::same_as) - { - constexpr std::string_view - expected = "Large binary [name=test | size=9] <<1, 1, 255, 0>, null, <2, 3>, <3, 5, 255>, null, <8, 13>, <13, 21, 251, 8>, <21, 34, 248>, <34, 55>>"; - CHECK_EQ(formatted, expected); - } + constexpr std::string_view + expected = "Binary [name=test | size=9] <<0x01, 0x01, 0xff, 0x00>, null, <0x02, 0x03>, <0x03, 0x05, 0xff>, null, <0x08, 0x0d>, <0x0d, 0x15, 0xfb, 0x08>, <0x15, 0x22, 0xf8>, <0x22, 0x37>>"; + CHECK_EQ(formatted, expected); } #endif - - // Apply the template tests to both binary_array and big_binary_array - TEST_CASE_TEMPLATE_APPLY(constructor_id, binary_array_types); - TEST_CASE_TEMPLATE_APPLY(copy_id, binary_array_types); - TEST_CASE_TEMPLATE_APPLY(move_id, binary_array_types); - TEST_CASE_TEMPLATE_APPLY(size_id, binary_array_types); - TEST_CASE_TEMPLATE_APPLY(operator_bracket_id, binary_array_types); - TEST_CASE_TEMPLATE_APPLY(value_id, binary_array_types); - TEST_CASE_TEMPLATE_APPLY(const_bitmap_iterator_id, binary_array_types); - TEST_CASE_TEMPLATE_APPLY(iterator_id, binary_array_types); - TEST_CASE_TEMPLATE_APPLY(zero_null_values_id, binary_array_types); -#if defined(__cpp_lib_format) - TEST_CASE_TEMPLATE_APPLY(formatting_id, binary_array_types); -#endif } } diff --git a/test/test_fixed_width_binary_array.cpp b/test/test_fixed_width_binary_array.cpp index c86e5baa8..728f05e2f 100644 --- a/test/test_fixed_width_binary_array.cpp +++ b/test/test_fixed_width_binary_array.cpp @@ -798,7 +798,7 @@ namespace sparrow }; const std::string formatted = std::format("{}", arr); constexpr std::string_view - expected = "Fixed width binary [name=nullptr | size=3] <<1, 2, 3>, null, <7, 8, 9>>"; + expected = "Fixed width binary [name=nullptr | size=3] <<0x01, 0x02, 0x03>, null, <0x07, 0x08, 0x09>>"; CHECK_EQ(formatted, expected); } #endif diff --git a/test/test_format.cpp b/test/test_format.cpp index 5ae63fe89..1e6a7408a 100644 --- a/test/test_format.cpp +++ b/test/test_format.cpp @@ -12,11 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #if defined(__cpp_lib_format) # include "sparrow/utils/format.hpp" +# include "sparrow/utils/ranges.hpp" # include "doctest/doctest.h" @@ -25,6 +27,44 @@ using namespace sparrow; TEST_SUITE("format") { + TEST_CASE("size_of_utf8") + { + SUBCASE("empty") + { + CHECK_EQ(size_of_utf8(""), 0); + } + + SUBCASE("ascii") + { + CHECK_EQ(size_of_utf8("hello"), 5); + } + + SUBCASE("multibyte characters") + { + CHECK_EQ(size_of_utf8("こんにちは"), 5); + } + + SUBCASE("mixed characters") + { + CHECK_EQ(size_of_utf8("hello こんにちは"), 11); + } + + SUBCASE("emoji") + { + CHECK_EQ(size_of_utf8("😀😃😄😁"), 4); + } + + SUBCASE("greek letters") + { + CHECK_EQ(size_of_utf8("αβγδε"), 5); + } + + SUBCASE("p├┤r4┬Á3i") + { + CHECK_EQ(size_of_utf8("p├┤r4┬Á3i"), 9); + } + } + TEST_CASE("max_width") { SUBCASE("empty") @@ -45,6 +85,18 @@ TEST_SUITE("format") CHECK_EQ(max_width(data), 3); } + SUBCASE("mixed") + { + const std::vector data{"a", "bb", "こんにちは"}; + CHECK_EQ(max_width(data), 5); + } + + SUBCASE("mixed 2") + { + const std::vector data{"a", "bb", "p├┤r4┬Á3i"}; + CHECK_EQ(max_width(data), 9); + } + SUBCASE("floating points") { const std::vector data{1.0, 2.0, 3.456}; @@ -198,6 +250,21 @@ TEST_SUITE("format") CHECK_EQ(out, expected); } } + + TEST_CASE("std::byte") + { + std::vector data(10, std::byte{0x1}); + std::string out = std::format("{}", data); + CHECK_EQ(out, "<0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01>"); + } + + TEST_CASE("sequence_view") + { + std::vector data(10, std::byte{0x1}); + sequence_view view(data); + std::string out = std::format("{}", view); + CHECK_EQ(out, "<0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01>"); + } } #endif diff --git a/test/test_record_batch.cpp b/test/test_record_batch.cpp index cc193bd72..56745a219 100644 --- a/test/test_record_batch.cpp +++ b/test/test_record_batch.cpp @@ -14,8 +14,10 @@ #include +#include "sparrow/fixed_width_binary_array.hpp" #include "sparrow/primitive_array.hpp" #include "sparrow/record_batch.hpp" +#include "sparrow/variable_size_binary_array.hpp" #include "doctest/doctest.h" @@ -334,22 +336,91 @@ namespace sparrow #if defined(__cpp_lib_format) TEST_CASE("formatter") { - const auto record = make_record_batch(col_size); - const std::string formatted = std::format("{}", record); - constexpr std::string_view expected = "|first|second|third|\n" - "--------------------\n" - "| 0| 4| 2|\n" - "| 1| 5| 3|\n" - "| 2| 6| 4|\n" - "| 3| 7| 5|\n" - "| 4| 8| 6|\n" - "| 5| 9| 7|\n" - "| 6| 10| 8|\n" - "| 7| 11| 9|\n" - "| 8| 12| 10|\n" - "| 9| 13| 11|\n" - "--------------------"; - CHECK_EQ(formatted, expected); + SUBCASE("simple") + { + const auto record = make_record_batch(col_size); + const std::string formatted = std::format("{}", record); + constexpr std::string_view expected = "|first|second|third|\n" + "--------------------\n" + "| 0| 4| 2|\n" + "| 1| 5| 3|\n" + "| 2| 6| 4|\n" + "| 3| 7| 5|\n" + "| 4| 8| 6|\n" + "| 5| 9| 7|\n" + "| 6| 10| 8|\n" + "| 7| 11| 9|\n" + "| 8| 12| 10|\n" + "| 9| 13| 11|\n" + "--------------------"; + CHECK_EQ(formatted, expected); + } + + SUBCASE("complex") + { + sparrow::validity_bitmap vb( + std::vector{true, false, true, true, true, false, true, true, true, true} + ); + sparrow::fixed_width_binary_array col( + std::vector>{ + {std::byte{1}, std::byte{2}, std::byte{3}}, + {std::byte{4}, std::byte{5}, std::byte{6}}, + {std::byte{7}, std::byte{8}, std::byte{9}}, + {std::byte{10}, std::byte{11}, std::byte{12}}, + {std::byte{13}, std::byte{14}, std::byte{15}}, + {std::byte{16}, std::byte{17}, std::byte{18}}, + {std::byte{19}, std::byte{20}, std::byte{21}}, + {std::byte{22}, std::byte{23}, std::byte{24}}, + {std::byte{25}, std::byte{26}, std::byte{27}}, + {std::byte{28}, std::byte{29}, std::byte{30}} + }, + vb, + "column fixed_width_binary_array" + ); + + sparrow::validity_bitmap vb2( + std::vector{true, true, true, false, true, false, true, true, true, true} + ); + sparrow::string_array col2( + std::vector{ + "こんにちは", + "this", + "is", + "a", + "test", + "of", + "the", + "string", + "array", + "formatting" + }, + vb2, + "column string" + ); + + std::vector arr_list; + arr_list.emplace_back(std::move(col)); + arr_list.emplace_back(std::move(col2)); + + + sparrow::record_batch record_batch(std::move(arr_list)); + const std::string formatted = std::format("{}", record_batch); + constexpr std::string_view expected = "|column fixed_width_binary_array|column string|\n" + "---------------------------------------------------\n" + "| <0x01, 0x02, 0x03>| こんにちは|\n" + "| null| this|\n" + "| <0x07, 0x08, 0x09>| is|\n" + "| <0x0a, 0x0b, 0x0c>| null|\n" + "| <0x0d, 0x0e, 0x0f>| test|\n" + "| null| null|\n" + "| <0x13, 0x14, 0x15>| the|\n" + "| <0x16, 0x17, 0x18>| string|\n" + "| <0x19, 0x1a, 0x1b>| array|\n" + "| <0x1c, 0x1d, 0x1e>| formatting|\n" + "---------------------------------------------------"; + + CHECK_EQ(formatted, expected); + } } #endif } diff --git a/test/test_string_array.cpp b/test/test_string_array.cpp index 4e9048e2b..8674032ec 100644 --- a/test/test_string_array.cpp +++ b/test/test_string_array.cpp @@ -975,6 +975,24 @@ namespace sparrow expected = "String [name=test | size=9] "; CHECK_EQ(formatted, expected); } + + TEST_CASE("formatting with utf8") + { + const std::vector> values = { + make_nullable("こんにちは"), // "Hello" in Japanese + make_nullable("😊"), // Smiling face emoji + make_nullable("Café"), // Word with accent + make_nullable("naïve"), // Word with diaeresis + make_nullable("Σὲ γνωρίζω ἀπὸ τὴν κόψη"), // Greek phrase + make_nullable("coöperate") // Word with diaeresis + }; + + string_array array(values); + const std::string formatted = std::format("{}", array); + constexpr std::string_view + expected = "String [name=nullptr | size=6] <こんにちは, 😊, Café, naïve, Σὲ γνωρίζω ἀπὸ τὴν κόψη, coöperate>"; + CHECK_EQ(formatted, expected); + } #endif } }