Skip to content

Commit 2afc37d

Browse files
committed
GH-34375: [C++][Parquet] Ignore page header stats when page index enabled
1 parent 6d3d2fc commit 2afc37d

File tree

2 files changed

+29
-4
lines changed

2 files changed

+29
-4
lines changed

cpp/src/parquet/arrow/arrow_reader_writer_test.cc

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5252,17 +5252,34 @@ class ParquetPageIndexRoundTripTest : public ::testing::Test {
52525252
auto row_group_index_reader = page_index_reader->RowGroup(rg);
52535253
ASSERT_NE(row_group_index_reader, nullptr);
52545254

5255+
auto row_group_reader = reader->RowGroup(rg);
5256+
ASSERT_NE(row_group_reader, nullptr);
5257+
52555258
for (int col = 0; col < metadata->num_columns(); ++col) {
52565259
auto column_index = row_group_index_reader->GetColumnIndex(col);
52575260
column_indexes_.emplace_back(column_index.get());
52585261

5262+
bool expect_no_page_index =
5263+
expect_columns_without_index.find(col) != expect_columns_without_index.cend();
5264+
52595265
auto offset_index = row_group_index_reader->GetOffsetIndex(col);
5260-
if (expect_columns_without_index.find(col) !=
5261-
expect_columns_without_index.cend()) {
5266+
if (expect_no_page_index) {
52625267
ASSERT_EQ(offset_index, nullptr);
52635268
} else {
52645269
CheckOffsetIndex(offset_index.get(), expect_num_pages, &offset_lower_bound);
52655270
}
5271+
5272+
// Verify page stats are not written to page header if page index is enabled.
5273+
auto page_reader = row_group_reader->GetColumnPageReader(col);
5274+
ASSERT_NE(page_reader, nullptr);
5275+
std::shared_ptr<Page> page = nullptr;
5276+
while ((page = page_reader->NextPage()) != nullptr) {
5277+
if (page->type() == PageType::DATA_PAGE ||
5278+
page->type() == PageType::DATA_PAGE_V2) {
5279+
ASSERT_EQ(std::static_pointer_cast<DataPage>(page)->statistics().is_set(),
5280+
expect_no_page_index);
5281+
}
5282+
}
52665283
}
52675284
}
52685285
}

cpp/src/parquet/column_writer.cc

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,11 @@ class SerializedPageWriter : public PageWriter {
460460
ToThrift(page.definition_level_encoding()));
461461
data_page_header.__set_repetition_level_encoding(
462462
ToThrift(page.repetition_level_encoding()));
463-
data_page_header.__set_statistics(ToThrift(page.statistics()));
463+
464+
// Write page statistics only when page index is not enabled.
465+
if (column_index_builder_ == nullptr) {
466+
data_page_header.__set_statistics(ToThrift(page.statistics()));
467+
}
464468

465469
page_header.__set_type(format::PageType::DATA_PAGE);
466470
page_header.__set_data_page_header(data_page_header);
@@ -479,7 +483,11 @@ class SerializedPageWriter : public PageWriter {
479483
page.repetition_levels_byte_length());
480484

481485
data_page_header.__set_is_compressed(page.is_compressed());
482-
data_page_header.__set_statistics(ToThrift(page.statistics()));
486+
487+
// Write page statistics only when page index is not enabled.
488+
if (column_index_builder_ == nullptr) {
489+
data_page_header.__set_statistics(ToThrift(page.statistics()));
490+
}
483491

484492
page_header.__set_type(format::PageType::DATA_PAGE_V2);
485493
page_header.__set_data_page_header_v2(data_page_header);

0 commit comments

Comments
 (0)