diff --git a/dwio/nimble/velox/RawSizeContext.h b/dwio/nimble/velox/RawSizeContext.h index 640d1609..b40743b8 100644 --- a/dwio/nimble/velox/RawSizeContext.h +++ b/dwio/nimble/velox/RawSizeContext.h @@ -16,6 +16,7 @@ #pragma once +#include "dwio/nimble/common/Exceptions.h" #include "dwio/nimble/velox/DecodedVectorManager.h" namespace facebook::nimble { @@ -28,8 +29,65 @@ class RawSizeContext { return decodedVectorManager_; } + void appendSize(uint64_t size) { + columnSizes_.push_back(size); + } + + uint64_t sizeAt(uint64_t columnIndex) const { + NIMBLE_ASSERT( + columnIndex < columnSizes_.size(), + fmt::format( + "Column index {} is out of range. Total number of columns is {}", + columnIndex, + columnSizes_.size())); + return columnSizes_.at(columnIndex); + } + + void setSizeAt(uint64_t columnIndex, uint64_t size) { + NIMBLE_ASSERT( + columnIndex < columnSizes_.size(), + fmt::format( + "Column index {} is out of range. Total number of columns is {}", + columnIndex, + columnSizes_.size())); + columnSizes_[columnIndex] = size; + } + + uint64_t columnCount() const { + return columnSizes_.size(); + } + + void appendNullCount(uint64_t nulls) { + columnNullCounts_.push_back(nulls); + } + + uint64_t nullsAt(uint64_t columnIndex) const { + NIMBLE_ASSERT( + columnIndex < columnNullCounts_.size(), + fmt::format( + "Column index {} is out of range. Total number of columns is {}", + columnIndex, + columnNullCounts_.size())); + return columnNullCounts_.at(columnIndex); + } + + void setNullsAt(uint64_t columnIndex, uint64_t nulls) { + NIMBLE_ASSERT( + columnIndex < columnNullCounts_.size(), + fmt::format( + "Column index {} is out of range. Total number of columns is {}", + columnIndex, + columnNullCounts_.size())); + columnNullCounts_[columnIndex] = nulls; + } + + // Number of nulls in last visited node + uint64_t nullCount{0}; + private: DecodedVectorManager decodedVectorManager_; + std::vector columnSizes_; + std::vector columnNullCounts_; }; } // namespace facebook::nimble diff --git a/dwio/nimble/velox/RawSizeUtils.cpp b/dwio/nimble/velox/RawSizeUtils.cpp index 6bc0acd2..a3f3756a 100644 --- a/dwio/nimble/velox/RawSizeUtils.cpp +++ b/dwio/nimble/velox/RawSizeUtils.cpp @@ -56,6 +56,7 @@ uint64_t getRawSizeFromFixedWidthVector( } } + context.nullCount = nullCount; return ((ranges.size() - nullCount) * sizeof(T)) + (nullCount * NULL_SIZE); } @@ -67,6 +68,7 @@ uint64_t getRawSizeFromFixedWidthVector( encoding, vector->typeKind()); + context.nullCount = constVector->mayHaveNulls() ? ranges.size() : 0; return constVector->mayHaveNulls() ? ranges.size() * NULL_SIZE : ranges.size() * sizeof(T); } @@ -92,6 +94,7 @@ uint64_t getRawSizeFromFixedWidthVector( } } + context.nullCount = nullCount; return ((ranges.size() - nullCount) * sizeof(T)) + (nullCount * NULL_SIZE); } @@ -132,6 +135,7 @@ uint64_t getRawSizeFromStringVector( } } + context.nullCount = nullCount; return rawSize + (nullCount * NULL_SIZE); } case velox::VectorEncoding::Simple::CONSTANT: { @@ -143,6 +147,7 @@ uint64_t getRawSizeFromStringVector( encoding, vector->typeKind()); + context.nullCount = constVector->mayHaveNulls() ? ranges.size() : 0; return constVector->mayHaveNulls() ? ranges.size() * NULL_SIZE : ranges.size() * constVector->value().size(); @@ -179,6 +184,7 @@ uint64_t getRawSizeFromStringVector( } } + context.nullCount = nullCount; return rawSize + (nullCount * NULL_SIZE); } default: { @@ -190,7 +196,8 @@ uint64_t getRawSizeFromStringVector( uint64_t getRawSizeFromConstantComplexVector( const velox::VectorPtr& vector, const velox::common::Ranges& ranges, - RawSizeContext& context) { + RawSizeContext& context, + bool topLevelRow = false) { VELOX_CHECK_NOT_NULL(vector); VELOX_DCHECK( velox::VectorEncoding::Simple::CONSTANT == vector->encoding(), @@ -199,7 +206,7 @@ uint64_t getRawSizeFromConstantComplexVector( const auto* constantVector = vector->as>(); VELOX_CHECK_NOT_NULL( - vector, + constantVector, "Encoding mismatch on ConstantVector. Encoding: {}. TypeKind: {}.", vector->encoding(), vector->typeKind()); @@ -209,8 +216,22 @@ uint64_t getRawSizeFromConstantComplexVector( velox::common::Ranges childRanges; childRanges.add(index, index + 1); - uint64_t rawSize = getRawSizeFromVector(valueVector, childRanges, context); - + uint64_t rawSize = 0; + if (topLevelRow) { + VELOX_CHECK_EQ( + velox::TypeKind::ROW, + valueVector->typeKind(), + "Value vector should be a RowVector"); + rawSize = getRawSizeFromRowVector( + valueVector, childRanges, context, /*topLevel=*/true); + for (int idx = 0; idx < context.columnCount(); ++idx) { + context.setSizeAt(idx, context.sizeAt(idx) * ranges.size()); + context.setNullsAt(idx, context.nullsAt(idx) * ranges.size()); + } + } else { + rawSize = getRawSizeFromVector(valueVector, childRanges, context); + } + context.nullCount = constantVector->mayHaveNulls() ? ranges.size() : 0; return rawSize * ranges.size(); } @@ -319,6 +340,7 @@ uint64_t getRawSizeFromArrayVector( getRawSizeFromVector(arrayVector->elements(), childRanges, context); } + context.nullCount = nullCount; if (nullCount) { rawSize += nullCount * NULL_SIZE; } @@ -432,6 +454,7 @@ uint64_t getRawSizeFromMapVector( getRawSizeFromVector(mapVector->mapValues(), childRanges, context); } + context.nullCount = nullCount; if (nullCount) { rawSize += nullCount * NULL_SIZE; } @@ -442,7 +465,8 @@ uint64_t getRawSizeFromMapVector( uint64_t getRawSizeFromRowVector( const velox::VectorPtr& vector, const velox::common::Ranges& ranges, - RawSizeContext& context) { + RawSizeContext& context, + const bool topLevel) { VELOX_CHECK_NOT_NULL(vector); const auto& encoding = vector->encoding(); const velox::RowVector* rowVector; @@ -477,7 +501,8 @@ uint64_t getRawSizeFromRowVector( break; } case velox::VectorEncoding::Simple::CONSTANT: { - return getRawSizeFromConstantComplexVector(vector, ranges, context); + return getRawSizeFromConstantComplexVector( + vector, ranges, context, topLevel); } case velox::VectorEncoding::Simple::DICTIONARY: { const auto* dictionaryRowVector = @@ -528,11 +553,22 @@ uint64_t getRawSizeFromRowVector( if ((*childRangesPtr).size()) { const auto childrenSize = rowVector->childrenSize(); for (size_t i = 0; i < childrenSize; ++i) { - rawSize += + auto childRawSize = getRawSizeFromVector(rowVector->childAt(i), *childRangesPtr, context); + rawSize += childRawSize; + if (topLevel) { + context.appendSize(childRawSize); + context.appendNullCount(context.nullCount); + } + } + } else if (topLevel) { + for (size_t i = 0; i < rowVector->childrenSize(); ++i) { + context.appendSize(0); + context.appendNullCount(0); } } + context.nullCount = nullCount; if (nullCount) { rawSize += nullCount * NULL_SIZE; } diff --git a/dwio/nimble/velox/RawSizeUtils.h b/dwio/nimble/velox/RawSizeUtils.h index ef64829f..ed75bd60 100644 --- a/dwio/nimble/velox/RawSizeUtils.h +++ b/dwio/nimble/velox/RawSizeUtils.h @@ -32,4 +32,9 @@ uint64_t getRawSizeFromVector( const velox::VectorPtr& vector, const velox::common::Ranges& ranges); +uint64_t getRawSizeFromRowVector( + const velox::VectorPtr& vector, + const velox::common::Ranges& ranges, + RawSizeContext& context, + const bool topLevel = false); } // namespace facebook::nimble diff --git a/dwio/nimble/velox/StreamLabels.cpp b/dwio/nimble/velox/StreamLabels.cpp index e54027e4..bd70dea9 100644 --- a/dwio/nimble/velox/StreamLabels.cpp +++ b/dwio/nimble/velox/StreamLabels.cpp @@ -81,7 +81,7 @@ void addLabels( const auto offset = row.nullsDescriptor().offset(); NIMBLE_DASSERT(labelIndex < labels.size(), "Unexpected label index."); NIMBLE_DASSERT(offsetToLabel.size() > offset, "Unexpected offset."); - labels.push_back(labels[labelIndex] + "/"); + labels.push_back(labels[labelIndex] + name + "/"); labelIndex = labels.size() - 1; offsetToLabel[offset] = labelIndex; for (auto i = 0; i < row.childrenCount(); ++i) { diff --git a/dwio/nimble/velox/VeloxWriter.cpp b/dwio/nimble/velox/VeloxWriter.cpp index 087e0c84..f6ad13be 100644 --- a/dwio/nimble/velox/VeloxWriter.cpp +++ b/dwio/nimble/velox/VeloxWriter.cpp @@ -36,6 +36,7 @@ #include "dwio/nimble/velox/SchemaSerialization.h" #include "dwio/nimble/velox/SchemaTypes.h" #include "dwio/nimble/velox/StatsGenerated.h" +#include "dwio/nimble/velox/StreamLabels.h" #include "folly/ScopeGuard.h" #include "velox/common/time/CpuWallTimer.h" #include "velox/dwio/common/ExecutorBarrier.h" @@ -47,6 +48,11 @@ namespace detail { class WriterContext : public FieldWriterContext { public: + struct ColumnStats { + uint64_t logicalSize{0}; + uint64_t nullCount{0}; + }; + const VeloxWriterOptions options; std::unique_ptr flushPolicy; velox::CpuWallTiming totalFlushTiming; @@ -63,6 +69,8 @@ class WriterContext : public FieldWriterContext { uint64_t stripeSize{0}; uint64_t rawSize{0}; std::vector rowsPerStripe; + std::unordered_map> streamPhysicalSize; + std::vector columnStats; WriterContext( velox::memory::MemoryPool& memoryPool, @@ -516,10 +524,20 @@ bool VeloxWriter::write(const velox::VectorPtr& vector) { auto size = vector->size(); // Calculate raw size. - auto rawSize = nimble::getRawSizeFromVector( - vector, velox::common::Ranges::of(0, size)); + RawSizeContext context; + auto rawSize = nimble::getRawSizeFromRowVector( + vector, velox::common::Ranges::of(0, size), context, /*topLevel=*/true); DWIO_ENSURE_GE(rawSize, 0, "Invalid raw size"); context_->rawSize += rawSize; + auto columnCount = context.columnCount(); + if (context_->columnStats.empty()) { + context_->columnStats = + std::vector(columnCount); + } + for (auto i = 0; i < columnCount; ++i) { + context_->columnStats[i].logicalSize += context.sizeAt(i); + context_->columnStats[i].nullCount += context.nullsAt(i); + } if (context_->options.writeExecutor) { velox::dwio::common::ExecutorBarrier barrier{ @@ -580,6 +598,34 @@ void VeloxWriter::close() { builder.GetSize()}); } + { + // Accumulate column physical size. + std::vector columnPhysicalSize( + context_->columnStats.size(), 0); + nimble::StreamLabels streamLabels{nimble::SchemaReader::getSchema( + context_->schemaBuilder.getSchemaNodes())}; + for (const auto& [offset, streamSize] : context_->streamPhysicalSize) { + if (offset == 0) { + continue; + } + std::vector streamLabel; + folly::split( + '/', + streamLabels.streamLabel(offset), + streamLabel, + /*ignoreEmpty=*/true); + NIMBLE_DASSERT(!streamLabel.empty(), "Invalid stream label"); + auto column = std::stoi(streamLabel[0]); + NIMBLE_DASSERT( + column < columnPhysicalSize.size(), + fmt::format( + "Index {} is out of range for physical size vector of size {}", + column, + columnPhysicalSize.size())); + columnPhysicalSize[column] += streamSize; + } + } + { flatbuffers::FlatBufferBuilder builder; builder.Finish(serialization::CreateStats(builder, context_->rawSize)); @@ -691,7 +737,8 @@ void VeloxWriter::writeChunk(bool lastChunk) { StreamData& streamData_; }; - auto encode = [&](StreamData& streamData) { + auto encode = [&](StreamData& streamData, + std::atomic& streamSize) { const auto offset = streamData.descriptor().offset(); auto encoded = encodeStream(*context_, *encodingBuffer_, streamData); if (!encoded.empty()) { @@ -699,6 +746,7 @@ void VeloxWriter::writeChunk(bool lastChunk) { NIMBLE_DASSERT(offset < streams_.size(), "Stream offset out of range."); auto& stream = streams_[offset]; for (auto& buffer : chunkWriter.encode(encoded)) { + streamSize += buffer.size(); chunkSize += buffer.size(); stream.content.push_back(std::move(buffer)); } @@ -739,29 +787,35 @@ void VeloxWriter::writeChunk(bool lastChunk) { velox::dwio::common::ExecutorBarrier barrier{ context_->options.encodingExecutor}; for (auto& streamData : context_->streams()) { + auto& streamSize = + context_->streamPhysicalSize[streamData->descriptor().offset()]; processStream( *streamData, [&](StreamData& innerStreamData, bool isNullStream) { - barrier.add([&innerStreamData, isNullStream, &encode]() { - if (isNullStream) { - NullsAsDataStreamData nullsStreamData{innerStreamData}; - encode(nullsStreamData); - } else { - encode(innerStreamData); - } - }); + barrier.add( + [&innerStreamData, isNullStream, &encode, &streamSize]() { + if (isNullStream) { + NullsAsDataStreamData nullsStreamData{innerStreamData}; + encode(nullsStreamData, streamSize); + } else { + encode(innerStreamData, streamSize); + } + }); }); } barrier.waitAll(); } else { for (auto& streamData : context_->streams()) { + auto& streamSize = + context_->streamPhysicalSize[streamData->descriptor().offset()]; processStream( *streamData, - [&encode](StreamData& innerStreamData, bool isNullStream) { + [&encode, &streamSize]( + StreamData& innerStreamData, bool isNullStream) { if (isNullStream) { NullsAsDataStreamData nullsStreamData{innerStreamData}; - encode(nullsStreamData); + encode(nullsStreamData, streamSize); } else { - encode(innerStreamData); + encode(innerStreamData, streamSize); } }); } diff --git a/dwio/nimble/velox/tests/RawSizeTests.cpp b/dwio/nimble/velox/tests/RawSizeTests.cpp index 41d20414..986f512a 100644 --- a/dwio/nimble/velox/tests/RawSizeTests.cpp +++ b/dwio/nimble/velox/tests/RawSizeTests.cpp @@ -92,6 +92,7 @@ class RawSizeBaseTestFixture : public ::testing::Test { std::shared_ptr pool_; velox::common::Ranges ranges_; + nimble::RawSizeContext context_; std::unique_ptr vectorMaker_; }; @@ -1381,9 +1382,16 @@ TEST_F(RawSizeTestFixture, RowSameTypes) { auto rowVector = vectorMaker_->rowVector( {"1", "2", "3"}, {childVector1, childVector2, childVector3}); this->ranges_.add(0, rowVector->size()); - auto rawSize = nimble::getRawSizeFromVector(rowVector, this->ranges_); + auto rawSize = nimble::getRawSizeFromRowVector( + rowVector, this->ranges_, context_, /*topLevel=*/true); ASSERT_EQ(sizeof(int64_t) * 18, rawSize); + size_t expectedChildCount = 3; + ASSERT_EQ(expectedChildCount, context_.columnCount()); + for (size_t i = 0; i < expectedChildCount; ++i) { + ASSERT_EQ(sizeof(int64_t) * 6, context_.sizeAt(i)); + ASSERT_EQ(0, context_.nullsAt(i)); + } } TEST_F(RawSizeTestFixture, RowDifferentTypes) { @@ -1393,11 +1401,20 @@ TEST_F(RawSizeTestFixture, RowDifferentTypes) { auto rowVector = vectorMaker_->rowVector( {"1", "2", "3"}, {childVector1, childVector2, childVector3}); this->ranges_.add(0, rowVector->size()); - auto rawSize = nimble::getRawSizeFromVector(rowVector, this->ranges_); + auto rawSize = nimble::getRawSizeFromRowVector( + rowVector, this->ranges_, context_, /*topLevel=*/true); + constexpr auto expectedRawSize = sizeof(int64_t) * 6 + sizeof(bool) * 6 + sizeof(int16_t) * 6; - ASSERT_EQ(expectedRawSize, rawSize); + ASSERT_EQ(3, context_.columnCount()); + ASSERT_EQ(sizeof(int64_t) * 6, context_.sizeAt(0)); + ASSERT_EQ(sizeof(bool) * 6, context_.sizeAt(1)); + ASSERT_EQ(sizeof(int16_t) * 6, context_.sizeAt(2)); + + for (size_t i = 0; i < 3; ++i) { + ASSERT_EQ(0, context_.nullsAt(i)); + } } TEST_F(RawSizeTestFixture, RowDifferentTypes2) { @@ -1408,11 +1425,20 @@ TEST_F(RawSizeTestFixture, RowDifferentTypes2) { auto rowVector = vectorMaker_->rowVector( {"1", "2", "3"}, {childVector1, childVector2, childVector3}); this->ranges_.add(0, rowVector->size()); - auto rawSize = nimble::getRawSizeFromVector(rowVector, this->ranges_); + auto rawSize = nimble::getRawSizeFromRowVector( + rowVector, this->ranges_, context_, /*topLevel=*/true); + constexpr auto expectedRawSize = sizeof(int64_t) * 6 + sizeof(int16_t) * 6 + 21; - ASSERT_EQ(expectedRawSize, rawSize); + ASSERT_EQ(3, context_.columnCount()); + ASSERT_EQ(sizeof(int64_t) * 6, context_.sizeAt(0)); + ASSERT_EQ(21, context_.sizeAt(1)); + ASSERT_EQ(sizeof(int16_t) * 6, context_.sizeAt(2)); + + for (size_t i = 0; i < 3; ++i) { + ASSERT_EQ(0, context_.nullsAt(i)); + } } TEST_F(RawSizeTestFixture, RowNulls) { @@ -1432,11 +1458,48 @@ TEST_F(RawSizeTestFixture, RowNulls) { 6, children); this->ranges_.add(0, rowVector->size()); - auto rawSize = nimble::getRawSizeFromVector(rowVector, this->ranges_); + auto rawSize = nimble::getRawSizeFromRowVector( + rowVector, this->ranges_, context_, /*topLevel=*/true); + constexpr auto expectedRawSize = sizeof(int64_t) * 5 + sizeof(bool) * 5 + sizeof(int16_t) * 5 + nimble::NULL_SIZE * 1; + ASSERT_EQ(expectedRawSize, rawSize); + ASSERT_EQ(3, context_.columnCount()); + ASSERT_EQ(sizeof(int64_t) * 5, context_.sizeAt(0)); + ASSERT_EQ(sizeof(bool) * 5, context_.sizeAt(1)); + ASSERT_EQ(sizeof(int16_t) * 5, context_.sizeAt(2)); + + ASSERT_EQ(1, context_.nullCount); + for (size_t i = 0; i < 3; ++i) { + ASSERT_EQ(0, context_.nullsAt(i)); + } +} +TEST_F(RawSizeTestFixture, RowAllNulls) { + constexpr velox::vector_size_t VECTOR_TEST_SIZE = 6; + auto childVector1 = vectorMaker_->flatVector({0, 0, 0, 1, 1, 1}); + auto childVector2 = vectorMaker_->flatVector({0, 1, 0, 1, 0, 1}); + auto childVector3 = vectorMaker_->flatVector({0, 1, 0, 1, 0, 1}); + velox::BufferPtr nulls = velox::AlignedBuffer::allocate( + VECTOR_TEST_SIZE, this->pool_.get(), velox::bits::kNull); + const std::vector& children = { + childVector1, childVector2, childVector3}; + auto rowVector = std::make_shared( + pool_.get(), + velox::ROW({velox::BIGINT(), velox::BOOLEAN(), velox::SMALLINT()}), + nulls, + VECTOR_TEST_SIZE, + children); + this->ranges_.add(0, rowVector->size()); + auto rawSize = nimble::getRawSizeFromRowVector( + rowVector, this->ranges_, context_, /*topLevel=*/true); + + constexpr auto expectedRawSize = nimble::NULL_SIZE * VECTOR_TEST_SIZE; ASSERT_EQ(expectedRawSize, rawSize); + ASSERT_EQ(3, context_.columnCount()); + for (size_t i = 0; i < 3; ++i) { + ASSERT_EQ(0, context_.sizeAt(i)); + } } TEST_F(RawSizeTestFixture, RowNestedNull) { @@ -1449,11 +1512,21 @@ TEST_F(RawSizeTestFixture, RowNestedNull) { auto rowVector = vectorMaker_->rowVector( {"1", "2", "3"}, {childVector1, childVector2, childVector3}); this->ranges_.add(0, rowVector->size()); - auto rawSize = nimble::getRawSizeFromVector(rowVector, this->ranges_); + auto rawSize = nimble::getRawSizeFromRowVector( + rowVector, this->ranges_, context_, /*topLevel=*/true); + constexpr auto expectedRawSize = sizeof(int64_t) * 5 + (1 + 2 + 3 + 4 + 5) + sizeof(int16_t) * 5 + nimble::NULL_SIZE * 3; - ASSERT_EQ(expectedRawSize, rawSize); + ASSERT_EQ(3, context_.columnCount()); + ASSERT_EQ(sizeof(int64_t) * 5 + nimble::NULL_SIZE, context_.sizeAt(0)); + ASSERT_EQ(15 + nimble::NULL_SIZE, context_.sizeAt(1)); + ASSERT_EQ(sizeof(int16_t) * 5 + nimble::NULL_SIZE, context_.sizeAt(2)); + + ASSERT_EQ(0, context_.nullCount); + for (size_t i = 0; i < 3; ++i) { + ASSERT_EQ(1, context_.nullsAt(i)); + } } TEST_F(RawSizeTestFixture, RowDictionaryChildren) { @@ -1505,10 +1578,19 @@ TEST_F(RawSizeTestFixture, RowDictionaryChildren) { vectorMaker_->rowVector({"1", "2"}, {dictArrayVector, dictMapVector}); this->ranges_.add(0, rowVector->size()); - auto rawSize = nimble::getRawSizeFromVector(rowVector, this->ranges_); + auto rawSize = nimble::getRawSizeFromRowVector( + rowVector, this->ranges_, context_, /*topLevel=*/true); const uint64_t expectedSize = expectedArrayRawSize + expectedMapRawSize; ASSERT_EQ(expectedSize, rawSize); + ASSERT_EQ(2, context_.columnCount()); + ASSERT_EQ(expectedArrayRawSize, context_.sizeAt(0)); + ASSERT_EQ(expectedMapRawSize, context_.sizeAt(1)); + + ASSERT_EQ(0, context_.nullCount); + for (size_t i = 0; i < 2; ++i) { + ASSERT_EQ(0, context_.nullsAt(i)); + } } TEST_F(RawSizeTestFixture, ConstRow) { @@ -1518,12 +1600,25 @@ TEST_F(RawSizeTestFixture, ConstRow) { auto childVector3 = vectorMaker_->flatVector({0, 1, 0, 1, 0, 1}); auto rowVector = vectorMaker_->rowVector( {"1", "2", "3"}, {childVector1, childVector2, childVector3}); - auto constVector = velox::BaseVector::wrapInConstant(10, 5, rowVector); - this->ranges_.add(0, constVector->size()); - auto rawSize = nimble::getRawSizeFromVector(constVector, this->ranges_); - constexpr auto expectedRawSize = (sizeof(int64_t) + 6 + sizeof(int16_t)) * 10; + const velox::vector_size_t CONST_VECTOR_SIZE = 10; + auto constVector = + velox::BaseVector::wrapInConstant(CONST_VECTOR_SIZE, 5, rowVector); + this->ranges_.add(0, CONST_VECTOR_SIZE); + auto rawSize = nimble::getRawSizeFromRowVector( + constVector, this->ranges_, context_, /*topLevel=*/true); + constexpr auto expectedRawSize = + (sizeof(int64_t) + 6 + sizeof(int16_t)) * CONST_VECTOR_SIZE; ASSERT_EQ(expectedRawSize, rawSize); + ASSERT_EQ(3, context_.columnCount()); + ASSERT_EQ(sizeof(int64_t) * CONST_VECTOR_SIZE, context_.sizeAt(0)); + ASSERT_EQ(6 * CONST_VECTOR_SIZE, context_.sizeAt(1)); + ASSERT_EQ(sizeof(int16_t) * CONST_VECTOR_SIZE, context_.sizeAt(2)); + + ASSERT_EQ(0, context_.nullCount); + for (size_t i = 0; i < 3; ++i) { + ASSERT_EQ(0, context_.nullsAt(i)); + } } TEST_F(RawSizeTestFixture, ConstRowNestedNull) { @@ -1535,13 +1630,25 @@ TEST_F(RawSizeTestFixture, ConstRowNestedNull) { vectorMaker_->flatVectorNullable({0, 1, 0, 1, 0, std::nullopt}); auto rowVector = vectorMaker_->rowVector( {"1", "2", "3"}, {childVector1, childVector2, childVector3}); - auto constVector = velox::BaseVector::wrapInConstant(10, 5, rowVector); + const velox::vector_size_t CONST_VECTOR_SIZE = 10; + auto constVector = + velox::BaseVector::wrapInConstant(CONST_VECTOR_SIZE, 5, rowVector); this->ranges_.add(0, constVector->size()); - auto rawSize = nimble::getRawSizeFromVector(constVector, this->ranges_); + auto rawSize = nimble::getRawSizeFromRowVector( + constVector, this->ranges_, context_, /*topLevel=*/true); constexpr auto expectedRawSize = - (sizeof(int64_t) + nimble::NULL_SIZE * 2) * 10; + (sizeof(int64_t) + nimble::NULL_SIZE * 2) * CONST_VECTOR_SIZE; ASSERT_EQ(expectedRawSize, rawSize); + ASSERT_EQ(3, context_.columnCount()); + ASSERT_EQ(sizeof(int64_t) * CONST_VECTOR_SIZE, context_.sizeAt(0)); + ASSERT_EQ(nimble::NULL_SIZE * CONST_VECTOR_SIZE, context_.sizeAt(1)); + ASSERT_EQ(nimble::NULL_SIZE * CONST_VECTOR_SIZE, context_.sizeAt(2)); + + ASSERT_EQ(0, context_.nullCount); + ASSERT_EQ(0, context_.nullsAt(0)); + ASSERT_EQ(CONST_VECTOR_SIZE, context_.nullsAt(1)); + ASSERT_EQ(CONST_VECTOR_SIZE, context_.nullsAt(2)); } TEST_F(RawSizeTestFixture, DictRow) { @@ -1567,11 +1674,21 @@ TEST_F(RawSizeTestFixture, DictRow) { auto dictVector = velox::BaseVector::wrapInDictionary( velox::BufferPtr(nullptr), indices, VECTOR_TEST_SIZE, rowVector); this->ranges_.add(0, dictVector->size()); - auto rawSize = nimble::getRawSizeFromVector(dictVector, this->ranges_); + auto rawSize = nimble::getRawSizeFromRowVector( + dictVector, this->ranges_, context_, /*topLevel=*/true); constexpr auto expectedRawSize = sizeof(int64_t) * 5 + sizeof(int16_t) * 5 + 11; ASSERT_EQ(expectedRawSize, rawSize); + ASSERT_EQ(3, context_.columnCount()); + ASSERT_EQ(sizeof(int64_t) * VECTOR_TEST_SIZE, context_.sizeAt(0)); + ASSERT_EQ(11, context_.sizeAt(1)); + ASSERT_EQ(sizeof(int16_t) * VECTOR_TEST_SIZE, context_.sizeAt(2)); + + ASSERT_EQ(0, context_.nullCount); + for (size_t i = 0; i < 3; ++i) { + ASSERT_EQ(0, context_.nullsAt(i)); + } } TEST_F(RawSizeTestFixture, DictRowNull) { @@ -1598,11 +1715,21 @@ TEST_F(RawSizeTestFixture, DictRowNull) { auto dictVector = velox::BaseVector::wrapInDictionary( velox::BufferPtr(nullptr), indices, VECTOR_TEST_SIZE, rowVector); this->ranges_.add(0, dictVector->size()); - auto rawSize = nimble::getRawSizeFromVector(dictVector, this->ranges_); + auto rawSize = nimble::getRawSizeFromRowVector( + dictVector, this->ranges_, context_, /*topLevel=*/true); constexpr auto expectedRawSize = sizeof(int64_t) * 3 + sizeof(int16_t) * 5 + 11 + nimble::NULL_SIZE * 2; ASSERT_EQ(expectedRawSize, rawSize); + ASSERT_EQ(3, context_.columnCount()); + ASSERT_EQ(nimble::NULL_SIZE * 2 + sizeof(int64_t) * 3, context_.sizeAt(0)); + ASSERT_EQ(11, context_.sizeAt(1)); + ASSERT_EQ(sizeof(int16_t) * VECTOR_TEST_SIZE, context_.sizeAt(2)); + + ASSERT_EQ(0, context_.nullCount); + ASSERT_EQ(2, context_.nullsAt(0)); + ASSERT_EQ(0, context_.nullsAt(1)); + ASSERT_EQ(0, context_.nullsAt(2)); } TEST_F(RawSizeTestFixture, DictRowNullTopLevel) { @@ -1633,11 +1760,20 @@ TEST_F(RawSizeTestFixture, DictRowNullTopLevel) { auto dictVector = velox::BaseVector::wrapInDictionary( nulls, indices, VECTOR_TEST_SIZE, rowVector); this->ranges_.add(0, dictVector->size()); - auto rawSize = nimble::getRawSizeFromVector(dictVector, this->ranges_); + auto rawSize = nimble::getRawSizeFromRowVector( + dictVector, this->ranges_, context_, /*topLevel=*/true); constexpr auto expectedRawSize = sizeof(int64_t) * 4 + sizeof(int16_t) * 4 + 9 + nimble::NULL_SIZE * 1; ASSERT_EQ(expectedRawSize, rawSize); + ASSERT_EQ(sizeof(int64_t) * (VECTOR_TEST_SIZE - 1), context_.sizeAt(0)); + ASSERT_EQ(9, context_.sizeAt(1)); + ASSERT_EQ(sizeof(int16_t) * (VECTOR_TEST_SIZE - 1), context_.sizeAt(2)); + + ASSERT_EQ(1, context_.nullCount); + for (size_t i = 0; i < 3; ++i) { + ASSERT_EQ(0, context_.nullsAt(i)); + } } TEST_F(RawSizeTestFixture, ThrowOnDefaultType) { @@ -1671,11 +1807,9 @@ TEST_F(RawSizeTestFixture, ThrowOnDefaultEncodingVariableWidth) { } TEST_F(RawSizeTestFixture, LocalDecodedVectorMoveConstructor) { - facebook::nimble::RawSizeContext context; - auto localDecodedVector1 = facebook::nimble::DecodedVectorManager::LocalDecodedVector( - context.getDecodedVectorManager()); + context_.getDecodedVectorManager()); // Constuct LocalDecodedVector by LocalDecodedVector ctr auto localDecodedVector2 = std::move(localDecodedVector1); diff --git a/dwio/nimble/velox/tests/SchemaTests.cpp b/dwio/nimble/velox/tests/SchemaTests.cpp index 0be9086d..9dd4b003 100644 --- a/dwio/nimble/velox/tests/SchemaTests.cpp +++ b/dwio/nimble/velox/tests/SchemaTests.cpp @@ -65,13 +65,14 @@ TEST(SchemaTests, SchemaUtils) { {"c13", BINARY()}, {"c14", OFFSETARRAY(INTEGER())}, {"c15", SLIDINGWINDOWMAP(INTEGER(), INTEGER())}, + {"c16", ROW({{"d1", TINYINT()}, {"d2", ARRAY(TINYINT())}})}, })); auto nodes = builder.getSchemaNodes(); nimble::test::verifySchemaNodes( nodes, { - {nimble::Kind::Row, 23, nimble::ScalarKind::Bool, std::nullopt, 15}, + {nimble::Kind::Row, 27, nimble::ScalarKind::Bool, std::nullopt, 16}, {nimble::Kind::Scalar, 0, nimble::ScalarKind::Int8, "c1"}, {nimble::Kind::Array, 2, nimble::ScalarKind::UInt32, "c2"}, {nimble::Kind::Scalar, 1, nimble::ScalarKind::Int8}, @@ -101,11 +102,16 @@ TEST(SchemaTests, SchemaUtils) { {nimble::Kind::Scalar, 22, nimble::ScalarKind::UInt32}, {nimble::Kind::Scalar, 19, nimble::ScalarKind::Int32}, {nimble::Kind::Scalar, 20, nimble::ScalarKind::Int32}, + {nimble::Kind::Row, 26, nimble::ScalarKind::Bool, "c16", 2}, + {nimble::Kind::Scalar, 23, nimble::ScalarKind::Int8, "d1"}, + {nimble::Kind::Array, 25, nimble::ScalarKind::UInt32, "d2"}, + {nimble::Kind::Scalar, 24, nimble::ScalarKind::Int8}, }); - verifyLabels(nodes, {"/", "/0", "/1", "/1", "/2", "/3", "/3", "/3", - "/4", "/5", "/6", "/7", "/8", "/9", "/10", "/11", - "/12", "/13", "/13", "/13", "/14", "/14", "/14", "/14"}); + verifyLabels(nodes, {"/", "/0", "/1", "/1", "/2", "/3", "/3", + "/3", "/4", "/5", "/6", "/7", "/8", "/9", + "/10", "/11", "/12", "/13", "/13", "/13", "/14", + "/14", "/14", "/14", "/15/", "/15/0", "/15/1", "/15/1"}); fm2.addChild("f1"); @@ -113,7 +119,7 @@ TEST(SchemaTests, SchemaUtils) { nimble::test::verifySchemaNodes( nodes, { - {nimble::Kind::Row, 23, nimble::ScalarKind::Bool, std::nullopt, 15}, + {nimble::Kind::Row, 27, nimble::ScalarKind::Bool, std::nullopt, 16}, {nimble::Kind::Scalar, 0, nimble::ScalarKind::Int8, "c1"}, {nimble::Kind::Array, 2, nimble::ScalarKind::UInt32, "c2"}, {nimble::Kind::Scalar, 1, nimble::ScalarKind::Int8}, @@ -122,9 +128,9 @@ TEST(SchemaTests, SchemaUtils) { {nimble::Kind::Scalar, 4, nimble::ScalarKind::Int8}, {nimble::Kind::Scalar, 5, nimble::ScalarKind::Int8}, {nimble::Kind::FlatMap, 7, nimble::ScalarKind::Float, "c5", 1}, - {nimble::Kind::Scalar, 26, nimble::ScalarKind::Bool, "f1"}, - {nimble::Kind::Array, 25, nimble::ScalarKind::UInt32}, - {nimble::Kind::Scalar, 24, nimble::ScalarKind::Int64}, + {nimble::Kind::Scalar, 30, nimble::ScalarKind::Bool, "f1"}, + {nimble::Kind::Array, 29, nimble::ScalarKind::UInt32}, + {nimble::Kind::Scalar, 28, nimble::ScalarKind::Int64}, {nimble::Kind::Scalar, 8, nimble::ScalarKind::Int16, "c6"}, {nimble::Kind::Scalar, 9, nimble::ScalarKind::Int32, "c7"}, {nimble::Kind::Scalar, 10, nimble::ScalarKind::Int64, "c8"}, @@ -146,12 +152,17 @@ TEST(SchemaTests, SchemaUtils) { {nimble::Kind::Scalar, 22, nimble::ScalarKind::UInt32}, {nimble::Kind::Scalar, 19, nimble::ScalarKind::Int32}, {nimble::Kind::Scalar, 20, nimble::ScalarKind::Int32}, + {nimble::Kind::Row, 26, nimble::ScalarKind::Bool, "c16", 2}, + {nimble::Kind::Scalar, 23, nimble::ScalarKind::Int8, "d1"}, + {nimble::Kind::Array, 25, nimble::ScalarKind::UInt32, "d2"}, + {nimble::Kind::Scalar, 24, nimble::ScalarKind::Int8}, }); - verifyLabels(nodes, {"/", "/0", "/1", "/1", "/2", "/3", "/3", - "/3", "/4", "/4/f1", "/4/f1", "/4/f1", "/5", "/6", - "/7", "/8", "/9", "/10", "/11", "/12", "/13", - "/13", "/13", "/14", "/14", "/14", "/14"}); + verifyLabels( + nodes, {"/", "/0", "/1", "/1", "/2", "/3", "/3", "/3", + "/4", "/4/f1", "/4/f1", "/4/f1", "/5", "/6", "/7", "/8", + "/9", "/10", "/11", "/12", "/13", "/13", "/13", "/14", + "/14", "/14", "/14", "/15/", "/15/0", "/15/1", "/15/1"}); fm1.addChild("f1"); fm1.addChild("f2"); @@ -162,28 +173,28 @@ TEST(SchemaTests, SchemaUtils) { nimble::test::verifySchemaNodes( nodes, { - {nimble::Kind::Row, 23, nimble::ScalarKind::Bool, std::nullopt, 15}, + {nimble::Kind::Row, 27, nimble::ScalarKind::Bool, std::nullopt, 16}, {nimble::Kind::Scalar, 0, nimble::ScalarKind::Int8, "c1"}, {nimble::Kind::Array, 2, nimble::ScalarKind::UInt32, "c2"}, {nimble::Kind::Scalar, 1, nimble::ScalarKind::Int8}, {nimble::Kind::FlatMap, 3, nimble::ScalarKind::Int8, "c3", 2}, - {nimble::Kind::Scalar, 28, nimble::ScalarKind::Bool, "f1"}, - {nimble::Kind::Scalar, 27, nimble::ScalarKind::Int8}, - {nimble::Kind::Scalar, 30, nimble::ScalarKind::Bool, "f2"}, - {nimble::Kind::Scalar, 29, nimble::ScalarKind::Int8}, + {nimble::Kind::Scalar, 32, nimble::ScalarKind::Bool, "f1"}, + {nimble::Kind::Scalar, 31, nimble::ScalarKind::Int8}, + {nimble::Kind::Scalar, 34, nimble::ScalarKind::Bool, "f2"}, + {nimble::Kind::Scalar, 33, nimble::ScalarKind::Int8}, {nimble::Kind::Map, 6, nimble::ScalarKind::UInt32, "c4"}, {nimble::Kind::Scalar, 4, nimble::ScalarKind::Int8}, {nimble::Kind::Scalar, 5, nimble::ScalarKind::Int8}, {nimble::Kind::FlatMap, 7, nimble::ScalarKind::Float, "c5", 3}, - {nimble::Kind::Scalar, 26, nimble::ScalarKind::Bool, "f1"}, - {nimble::Kind::Array, 25, nimble::ScalarKind::UInt32}, - {nimble::Kind::Scalar, 24, nimble::ScalarKind::Int64}, - {nimble::Kind::Scalar, 33, nimble::ScalarKind::Bool, "f2"}, - {nimble::Kind::Array, 32, nimble::ScalarKind::UInt32}, - {nimble::Kind::Scalar, 31, nimble::ScalarKind::Int64}, - {nimble::Kind::Scalar, 36, nimble::ScalarKind::Bool, "f3"}, - {nimble::Kind::Array, 35, nimble::ScalarKind::UInt32}, - {nimble::Kind::Scalar, 34, nimble::ScalarKind::Int64}, + {nimble::Kind::Scalar, 30, nimble::ScalarKind::Bool, "f1"}, + {nimble::Kind::Array, 29, nimble::ScalarKind::UInt32}, + {nimble::Kind::Scalar, 28, nimble::ScalarKind::Int64}, + {nimble::Kind::Scalar, 37, nimble::ScalarKind::Bool, "f2"}, + {nimble::Kind::Array, 36, nimble::ScalarKind::UInt32}, + {nimble::Kind::Scalar, 35, nimble::ScalarKind::Int64}, + {nimble::Kind::Scalar, 40, nimble::ScalarKind::Bool, "f3"}, + {nimble::Kind::Array, 39, nimble::ScalarKind::UInt32}, + {nimble::Kind::Scalar, 38, nimble::ScalarKind::Int64}, {nimble::Kind::Scalar, 8, nimble::ScalarKind::Int16, "c6"}, {nimble::Kind::Scalar, 9, nimble::ScalarKind::Int32, "c7"}, {nimble::Kind::Scalar, 10, nimble::ScalarKind::Int64, "c8"}, @@ -205,15 +216,19 @@ TEST(SchemaTests, SchemaUtils) { {nimble::Kind::Scalar, 22, nimble::ScalarKind::UInt32}, {nimble::Kind::Scalar, 19, nimble::ScalarKind::Int32}, {nimble::Kind::Scalar, 20, nimble::ScalarKind::Int32}, + {nimble::Kind::Row, 26, nimble::ScalarKind::Bool, "c16", 2}, + {nimble::Kind::Scalar, 23, nimble::ScalarKind::Int8, "d1"}, + {nimble::Kind::Array, 25, nimble::ScalarKind::UInt32, "d2"}, + {nimble::Kind::Scalar, 24, nimble::ScalarKind::Int8}, }); verifyLabels( - nodes, - {"/", "/0", "/1", "/1", "/2", "/2/f1", "/2/f1", "/2/f2", - "/2/f2", "/3", "/3", "/3", "/4", "/4/f1", "/4/f1", "/4/f1", - "/4/f2", "/4/f2", "/4/f2", "/4/f3", "/4/f3", "/4/f3", "/5", "/6", - "/7", "/8", "/9", "/10", "/11", "/12", "/13", "/13", - "/13", "/14", "/14", "/14", "/14"}); + nodes, {"/", "/0", "/1", "/1", "/2", "/2/f1", "/2/f1", + "/2/f2", "/2/f2", "/3", "/3", "/3", "/4", "/4/f1", + "/4/f1", "/4/f1", "/4/f2", "/4/f2", "/4/f2", "/4/f3", "/4/f3", + "/4/f3", "/5", "/6", "/7", "/8", "/9", "/10", + "/11", "/12", "/13", "/13", "/13", "/14", "/14", + "/14", "/14", "/15/", "/15/0", "/15/1", "/15/1"}); } TEST(SchemaTests, RoundTrip) { @@ -226,8 +241,12 @@ TEST(SchemaTests, RoundTrip) { // c5:BOOL, // c6:OFFSETARRAY, // c7:SLIDINGWINDOWMAP) + // c8:ROW( + // d1:TINYINT, + // d2:ARRAY) + // ) - auto row = builder.createRowTypeBuilder(7); + auto row = builder.createRowTypeBuilder(8); { auto scalar = builder.createScalarTypeBuilder(nimble::ScalarKind::Int32); row->addChild("c1", scalar); @@ -268,11 +287,24 @@ TEST(SchemaTests, RoundTrip) { row->addChild("c7", slidingWindowMap); } + { + auto row2 = builder.createRowTypeBuilder(2); + auto scalar1 = builder.createScalarTypeBuilder(nimble::ScalarKind::Int8); + row2->addChild("d1", scalar1); + + auto array = builder.createArrayTypeBuilder(); + auto elements = builder.createScalarTypeBuilder(nimble::ScalarKind::UInt32); + array->setChildren(elements); + row2->addChild("d2", array); + + row->addChild("c8", row2); + } + auto nodes = builder.getSchemaNodes(); nimble::test::verifySchemaNodes( nodes, { - {nimble::Kind::Row, 0, nimble::ScalarKind::Bool, std::nullopt, 7}, + {nimble::Kind::Row, 0, nimble::ScalarKind::Bool, std::nullopt, 8}, {nimble::Kind::Scalar, 1, nimble::ScalarKind::Int32, "c1", 0}, {nimble::Kind::FlatMap, 2, nimble::ScalarKind::Int8, "c2", 0}, {nimble::Kind::Map, 3, nimble::ScalarKind::UInt32, "c3"}, @@ -295,6 +327,10 @@ TEST(SchemaTests, RoundTrip) { {nimble::Kind::Scalar, 12, nimble::ScalarKind::UInt32, std::nullopt}, {nimble::Kind::Scalar, 13, nimble::ScalarKind::Int32, std::nullopt}, {nimble::Kind::Scalar, 14, nimble::ScalarKind::Int32, std::nullopt}, + {nimble::Kind::Row, 15, nimble::ScalarKind::Bool, "c8", 2}, + {nimble::Kind::Scalar, 16, nimble::ScalarKind::Int8, "d1"}, + {nimble::Kind::Array, 17, nimble::ScalarKind::UInt32, "d2"}, + {nimble::Kind::Scalar, 18, nimble::ScalarKind::UInt32}, }); verifyLabels( @@ -313,7 +349,11 @@ TEST(SchemaTests, RoundTrip) { "/6", "/6", "/6", - "/6"}); + "/6", + "/7/", + "/7/0", + "/7/1", + "/7/1"}); { auto array = builder.createArrayTypeBuilder(); @@ -339,21 +379,21 @@ TEST(SchemaTests, RoundTrip) { nimble::test::verifySchemaNodes( nodes, { - {nimble::Kind::Row, 0, nimble::ScalarKind::Bool, std::nullopt, 7}, + {nimble::Kind::Row, 0, nimble::ScalarKind::Bool, std::nullopt, 8}, {nimble::Kind::Scalar, 1, nimble::ScalarKind::Int32, "c1", 0}, {nimble::Kind::FlatMap, 2, nimble::ScalarKind::Int8, "c2", 2}, - {nimble::Kind::Scalar, 17, nimble::ScalarKind::Bool, "f1"}, - {nimble::Kind::Array, 15, nimble::ScalarKind::UInt32}, - {nimble::Kind::Scalar, 16, nimble::ScalarKind::Double}, - {nimble::Kind::Scalar, 20, nimble::ScalarKind::Bool, "f2"}, - {nimble::Kind::Array, 18, nimble::ScalarKind::UInt32}, - {nimble::Kind::Scalar, 19, nimble::ScalarKind::Double}, + {nimble::Kind::Scalar, 21, nimble::ScalarKind::Bool, "f1"}, + {nimble::Kind::Array, 19, nimble::ScalarKind::UInt32}, + {nimble::Kind::Scalar, 20, nimble::ScalarKind::Double}, + {nimble::Kind::Scalar, 24, nimble::ScalarKind::Bool, "f2"}, + {nimble::Kind::Array, 22, nimble::ScalarKind::UInt32}, + {nimble::Kind::Scalar, 23, nimble::ScalarKind::Double}, {nimble::Kind::Map, 3, nimble::ScalarKind::UInt32, "c3"}, {nimble::Kind::Scalar, 4, nimble::ScalarKind::String}, {nimble::Kind::Scalar, 5, nimble::ScalarKind::Float}, {nimble::Kind::FlatMap, 6, nimble::ScalarKind::Int64, "c4", 1}, - {nimble::Kind::Scalar, 22, nimble::ScalarKind::Bool, "f1"}, - {nimble::Kind::Scalar, 21, nimble::ScalarKind::Int32}, + {nimble::Kind::Scalar, 26, nimble::ScalarKind::Bool, "f1"}, + {nimble::Kind::Scalar, 25, nimble::ScalarKind::Int32}, {nimble::Kind::Scalar, 7, nimble::ScalarKind::Bool, "c5"}, {nimble::Kind::ArrayWithOffsets, 9, nimble::ScalarKind::UInt32, "c6"}, {nimble::Kind::Scalar, 8, nimble::ScalarKind::UInt32}, @@ -365,12 +405,17 @@ TEST(SchemaTests, RoundTrip) { {nimble::Kind::Scalar, 12, nimble::ScalarKind::UInt32, std::nullopt}, {nimble::Kind::Scalar, 13, nimble::ScalarKind::Int32, std::nullopt}, {nimble::Kind::Scalar, 14, nimble::ScalarKind::Int32, std::nullopt}, + {nimble::Kind::Row, 15, nimble::ScalarKind::Bool, "c8", 2}, + {nimble::Kind::Scalar, 16, nimble::ScalarKind::Int8, "d1"}, + {nimble::Kind::Array, 17, nimble::ScalarKind::UInt32, "d2"}, + {nimble::Kind::Scalar, 18, nimble::ScalarKind::UInt32}, }); verifyLabels( - nodes, {"/", "/0", "/1", "/1/f1", "/1/f1", "/1/f1", "/1/f2", "/1/f2", - "/1/f2", "/2", "/2", "/2", "/3", "/3/f1", "/3/f1", "/4", - "/5", "/5", "/5", "/6", "/6", "/6", "/6"}); + nodes, + {"/", "/0", "/1", "/1/f1", "/1/f1", "/1/f1", "/1/f2", "/1/f2", "/1/f2", + "/2", "/2", "/2", "/3", "/3/f1", "/3/f1", "/4", "/5", "/5", + "/5", "/6", "/6", "/6", "/6", "/7/", "/7/0", "/7/1", "/7/1"}); { auto array = builder.createArrayTypeBuilder(); @@ -389,26 +434,26 @@ TEST(SchemaTests, RoundTrip) { nimble::test::verifySchemaNodes( nodes, { - {nimble::Kind::Row, 0, nimble::ScalarKind::Bool, std::nullopt, 7}, + {nimble::Kind::Row, 0, nimble::ScalarKind::Bool, std::nullopt, 8}, {nimble::Kind::Scalar, 1, nimble::ScalarKind::Int32, "c1", 0}, {nimble::Kind::FlatMap, 2, nimble::ScalarKind::Int8, "c2", 3}, - {nimble::Kind::Scalar, 17, nimble::ScalarKind::Bool, "f1"}, - {nimble::Kind::Array, 15, nimble::ScalarKind::UInt32}, - {nimble::Kind::Scalar, 16, nimble::ScalarKind::Double}, - {nimble::Kind::Scalar, 20, nimble::ScalarKind::Bool, "f2"}, - {nimble::Kind::Array, 18, nimble::ScalarKind::UInt32}, - {nimble::Kind::Scalar, 19, nimble::ScalarKind::Double}, - {nimble::Kind::Scalar, 25, nimble::ScalarKind::Bool, "f3"}, - {nimble::Kind::Array, 23, nimble::ScalarKind::UInt32}, - {nimble::Kind::Scalar, 24, nimble::ScalarKind::Double}, + {nimble::Kind::Scalar, 21, nimble::ScalarKind::Bool, "f1"}, + {nimble::Kind::Array, 19, nimble::ScalarKind::UInt32}, + {nimble::Kind::Scalar, 20, nimble::ScalarKind::Double}, + {nimble::Kind::Scalar, 24, nimble::ScalarKind::Bool, "f2"}, + {nimble::Kind::Array, 22, nimble::ScalarKind::UInt32}, + {nimble::Kind::Scalar, 23, nimble::ScalarKind::Double}, + {nimble::Kind::Scalar, 29, nimble::ScalarKind::Bool, "f3"}, + {nimble::Kind::Array, 27, nimble::ScalarKind::UInt32}, + {nimble::Kind::Scalar, 28, nimble::ScalarKind::Double}, {nimble::Kind::Map, 3, nimble::ScalarKind::UInt32, "c3"}, {nimble::Kind::Scalar, 4, nimble::ScalarKind::String}, {nimble::Kind::Scalar, 5, nimble::ScalarKind::Float}, {nimble::Kind::FlatMap, 6, nimble::ScalarKind::Int64, "c4", 2}, - {nimble::Kind::Scalar, 22, nimble::ScalarKind::Bool, "f1"}, - {nimble::Kind::Scalar, 21, nimble::ScalarKind::Int32}, - {nimble::Kind::Scalar, 27, nimble::ScalarKind::Bool, "f2"}, - {nimble::Kind::Scalar, 26, nimble::ScalarKind::Int32}, + {nimble::Kind::Scalar, 26, nimble::ScalarKind::Bool, "f1"}, + {nimble::Kind::Scalar, 25, nimble::ScalarKind::Int32}, + {nimble::Kind::Scalar, 31, nimble::ScalarKind::Bool, "f2"}, + {nimble::Kind::Scalar, 30, nimble::ScalarKind::Int32}, {nimble::Kind::Scalar, 7, nimble::ScalarKind::Bool, "c5"}, {nimble::Kind::ArrayWithOffsets, 9, nimble::ScalarKind::UInt32, "c6"}, {nimble::Kind::Scalar, 8, nimble::ScalarKind::UInt32}, @@ -420,13 +465,18 @@ TEST(SchemaTests, RoundTrip) { {nimble::Kind::Scalar, 12, nimble::ScalarKind::UInt32, std::nullopt}, {nimble::Kind::Scalar, 13, nimble::ScalarKind::Int32, std::nullopt}, {nimble::Kind::Scalar, 14, nimble::ScalarKind::Int32, std::nullopt}, + {nimble::Kind::Row, 15, nimble::ScalarKind::Bool, "c8", 2}, + {nimble::Kind::Scalar, 16, nimble::ScalarKind::Int8, "d1"}, + {nimble::Kind::Array, 17, nimble::ScalarKind::UInt32, "d2"}, + {nimble::Kind::Scalar, 18, nimble::ScalarKind::UInt32}, }); verifyLabels( - nodes, {"/", "/0", "/1", "/1/f1", "/1/f1", "/1/f1", "/1/f2", - "/1/f2", "/1/f2", "/1/f3", "/1/f3", "/1/f3", "/2", "/2", - "/2", "/3", "/3/f1", "/3/f1", "/3/f2", "/3/f2", "/4", - "/5", "/5", "/5", "/6", "/6", "/6", "/6"}); + nodes, + {"/", "/0", "/1", "/1/f1", "/1/f1", "/1/f1", "/1/f2", "/1/f2", + "/1/f2", "/1/f3", "/1/f3", "/1/f3", "/2", "/2", "/2", "/3", + "/3/f1", "/3/f1", "/3/f2", "/3/f2", "/4", "/5", "/5", "/5", + "/6", "/6", "/6", "/6", "/7/", "/7/0", "/7/1", "/7/1"}); auto result = nimble::SchemaReader::getSchema(nodes); nimble::test::compareSchema(nodes, result);