Skip to content

Commit f4a490d

Browse files
macvincentfacebook-github-bot
authored andcommitted
Extract Null Count from Nimble Files (#177)
Summary: Rollback Plan: Differential Revision: D75928862
1 parent 4e7efdd commit f4a490d

File tree

2 files changed

+29
-1
lines changed

2 files changed

+29
-1
lines changed

dwio/nimble/velox/RawSizeContext.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,27 @@ class RawSizeContext {
5757
return columnSizes_.size();
5858
}
5959

60+
void appendNullCount(uint64_t nullCount) {
61+
columnNullCounts_.push_back(nullCount);
62+
}
63+
64+
uint64_t nullsAt(uint64_t columnIndex) const {
65+
NIMBLE_ASSERT(
66+
columnIndex < columnNullCounts_.size(),
67+
fmt::format(
68+
"Column index {} is out of range. Total number of columns is {}",
69+
columnIndex,
70+
columnNullCounts_.size()));
71+
return columnNullCounts_.at(columnIndex);
72+
}
73+
74+
// Number of nulls in last visited node
75+
uint64_t nulls{0};
76+
6077
private:
6178
DecodedVectorManager decodedVectorManager_;
6279
std::vector<uint64_t> columnSizes_;
80+
std::vector<uint64_t> columnNullCounts_;
6381
};
6482

6583
} // namespace facebook::nimble

dwio/nimble/velox/RawSizeUtils.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ uint64_t getRawSizeFromFixedWidthVector(
5656
}
5757
}
5858

59+
context.nulls = nullCount;
5960
return ((ranges.size() - nullCount) * sizeof(T)) +
6061
(nullCount * NULL_SIZE);
6162
}
@@ -67,6 +68,7 @@ uint64_t getRawSizeFromFixedWidthVector(
6768
encoding,
6869
vector->typeKind());
6970

71+
context.nulls = constVector->mayHaveNulls() ? ranges.size() : 0;
7072
return constVector->mayHaveNulls() ? ranges.size() * NULL_SIZE
7173
: ranges.size() * sizeof(T);
7274
}
@@ -92,6 +94,7 @@ uint64_t getRawSizeFromFixedWidthVector(
9294
}
9395
}
9496

97+
context.nulls = nullCount;
9598
return ((ranges.size() - nullCount) * sizeof(T)) +
9699
(nullCount * NULL_SIZE);
97100
}
@@ -132,6 +135,7 @@ uint64_t getRawSizeFromStringVector(
132135
}
133136
}
134137

138+
context.nulls = nullCount;
135139
return rawSize + (nullCount * NULL_SIZE);
136140
}
137141
case velox::VectorEncoding::Simple::CONSTANT: {
@@ -143,6 +147,7 @@ uint64_t getRawSizeFromStringVector(
143147
encoding,
144148
vector->typeKind());
145149

150+
context.nulls = constVector->mayHaveNulls() ? ranges.size() : 0;
146151
return constVector->mayHaveNulls()
147152
? ranges.size() * NULL_SIZE
148153
: ranges.size() * constVector->value().size();
@@ -179,6 +184,7 @@ uint64_t getRawSizeFromStringVector(
179184
}
180185
}
181186

187+
context.nulls = nullCount;
182188
return rawSize + (nullCount * NULL_SIZE);
183189
}
184190
default: {
@@ -224,7 +230,7 @@ uint64_t getRawSizeFromConstantComplexVector(
224230
} else {
225231
rawSize = getRawSizeFromVector(valueVector, childRanges, context);
226232
}
227-
233+
context.nulls = constantVector->mayHaveNulls() ? ranges.size() : 0;
228234
return rawSize * ranges.size();
229235
}
230236

@@ -333,6 +339,7 @@ uint64_t getRawSizeFromArrayVector(
333339
getRawSizeFromVector(arrayVector->elements(), childRanges, context);
334340
}
335341

342+
context.nulls = nullCount;
336343
if (nullCount) {
337344
rawSize += nullCount * NULL_SIZE;
338345
}
@@ -446,6 +453,7 @@ uint64_t getRawSizeFromMapVector(
446453
getRawSizeFromVector(mapVector->mapValues(), childRanges, context);
447454
}
448455

456+
context.nulls = nullCount;
449457
if (nullCount) {
450458
rawSize += nullCount * NULL_SIZE;
451459
}
@@ -549,10 +557,12 @@ uint64_t getRawSizeFromRowVector(
549557
rawSize += childRawSize;
550558
if (topLevel) {
551559
context.appendSize(childRawSize);
560+
context.appendNullCount(context.nulls);
552561
}
553562
}
554563
}
555564

565+
context.nulls = nullCount;
556566
if (nullCount) {
557567
rawSize += nullCount * NULL_SIZE;
558568
}

0 commit comments

Comments
 (0)