19
19
20
20
#include " manifest_reader_internal.h"
21
21
22
- #include < array>
23
-
24
22
#include < nanoarrow/nanoarrow.h>
25
23
26
24
#include " iceberg/arrow_c_data_guard_internal.h"
29
27
#include " iceberg/manifest_list.h"
30
28
#include " iceberg/schema.h"
31
29
#include " iceberg/type.h"
30
+ #include " iceberg/util/checked_cast.h"
32
31
#include " iceberg/util/macros.h"
33
32
34
33
namespace iceberg {
@@ -39,7 +38,7 @@ namespace iceberg {
39
38
}
40
39
41
40
#define PARSE_PRIMITIVE_FIELD (item, array_view, type ) \
42
- for (size_t row_idx = 0 ; row_idx < array_view->length; row_idx++) { \
41
+ for (int64_t row_idx = 0 ; row_idx < array_view->length; row_idx++) { \
43
42
if (!ArrowArrayViewIsNull (array_view, row_idx)) { \
44
43
auto value = ArrowArrayViewGetIntUnsafe (array_view, row_idx); \
45
44
item = static_cast <type>(value); \
@@ -50,7 +49,7 @@ namespace iceberg {
50
49
}
51
50
52
51
#define PARSE_STRING_FIELD (item, array_view ) \
53
- for (size_t row_idx = 0 ; row_idx < array_view->length; row_idx++) { \
52
+ for (int64_t row_idx = 0 ; row_idx < array_view->length; row_idx++) { \
54
53
if (!ArrowArrayViewIsNull (array_view, row_idx)) { \
55
54
auto value = ArrowArrayViewGetStringUnsafe (array_view, row_idx); \
56
55
item = std::string (value.data , value.size_bytes ); \
@@ -61,7 +60,7 @@ namespace iceberg {
61
60
}
62
61
63
62
#define PARSE_BINARY_FIELD (item, array_view ) \
64
- for (size_t row_idx = 0 ; row_idx < array_view->length; row_idx++) { \
63
+ for (int64_t row_idx = 0 ; row_idx < array_view->length; row_idx++) { \
65
64
if (!ArrowArrayViewIsNull (view_of_column, row_idx)) { \
66
65
item = ArrowArrayViewGetInt8Vector (array_view, row_idx); \
67
66
} else if (required) { \
@@ -227,66 +226,67 @@ Result<std::vector<ManifestFile>> ParseManifestList(ArrowSchema* schema,
227
226
auto field_name = field.value ().get ().name ();
228
227
bool required = !field.value ().get ().optional ();
229
228
auto view_of_column = array_view.children [idx];
230
- switch (idx) {
231
- case 0 :
229
+ ICEBERG_ASSIGN_OR_RAISE (auto manifest_file_field, ManifestFileFieldFromIndex (idx));
230
+ switch (manifest_file_field) {
231
+ case ManifestFileField::kManifestPath :
232
232
PARSE_STRING_FIELD (manifest_files[row_idx].manifest_path , view_of_column);
233
233
break ;
234
- case 1 :
234
+ case ManifestFileField:: kManifestLength :
235
235
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].manifest_length , view_of_column,
236
236
int64_t );
237
237
break ;
238
- case 2 :
238
+ case ManifestFileField:: kPartitionSpecId :
239
239
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].partition_spec_id , view_of_column,
240
240
int32_t );
241
241
break ;
242
- case 3 :
242
+ case ManifestFileField:: kContent :
243
243
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].content , view_of_column,
244
244
ManifestFile::Content);
245
245
break ;
246
- case 4 :
246
+ case ManifestFileField:: kSequenceNumber :
247
247
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].sequence_number , view_of_column,
248
248
int64_t );
249
249
break ;
250
- case 5 :
250
+ case ManifestFileField:: kMinSequenceNumber :
251
251
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].min_sequence_number , view_of_column,
252
252
int64_t );
253
253
break ;
254
- case 6 :
254
+ case ManifestFileField:: kAddedSnapshotId :
255
255
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].added_snapshot_id , view_of_column,
256
256
int64_t );
257
257
break ;
258
- case 7 :
258
+ case ManifestFileField:: kAddedFilesCount :
259
259
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].added_files_count , view_of_column,
260
260
int32_t );
261
261
break ;
262
- case 8 :
262
+ case ManifestFileField:: kExistingFilesCount :
263
263
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].existing_files_count ,
264
264
view_of_column, int32_t );
265
265
break ;
266
- case 9 :
266
+ case ManifestFileField:: kDeletedFilesCount :
267
267
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].deleted_files_count , view_of_column,
268
268
int32_t );
269
269
break ;
270
- case 10 :
270
+ case ManifestFileField:: kAddedRowsCount :
271
271
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].added_rows_count , view_of_column,
272
272
int64_t );
273
273
break ;
274
- case 11 :
274
+ case ManifestFileField:: kExistingRowsCount :
275
275
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].existing_rows_count , view_of_column,
276
276
int64_t );
277
277
break ;
278
- case 12 :
278
+ case ManifestFileField:: kDeletedRowsCount :
279
279
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].deleted_rows_count , view_of_column,
280
280
int64_t );
281
281
break ;
282
- case 13 :
282
+ case ManifestFileField:: kPartitionFieldSummary :
283
283
ICEBERG_RETURN_UNEXPECTED (
284
284
ParsePartitionFieldSummaryList (view_of_column, manifest_files));
285
285
break ;
286
- case 14 :
286
+ case ManifestFileField:: kKeyMetadata :
287
287
PARSE_BINARY_FIELD (manifest_files[row_idx].key_metadata , view_of_column);
288
288
break ;
289
- case 15 :
289
+ case ManifestFileField:: kFirstRowId :
290
290
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].first_row_id , view_of_column,
291
291
int64_t );
292
292
break ;
@@ -297,7 +297,7 @@ Result<std::vector<ManifestFile>> ParseManifestList(ArrowSchema* schema,
297
297
return manifest_files;
298
298
}
299
299
300
- Status ParseLiteral (ArrowArrayView* view_of_partition, size_t row_idx,
300
+ Status ParseLiteral (ArrowArrayView* view_of_partition, int64_t row_idx,
301
301
std::vector<ManifestEntry>& manifest_entries) {
302
302
if (view_of_partition->storage_type == ArrowType::NANOARROW_TYPE_BOOL) {
303
303
auto value = ArrowArrayViewGetUIntUnsafe (view_of_partition, row_idx);
@@ -357,7 +357,7 @@ Status ParseDataFile(const std::shared_ptr<StructType>& data_file_schema,
357
357
view_of_file_field);
358
358
break ;
359
359
case 2 :
360
- for (size_t row_idx = 0 ; row_idx < view_of_file_field->length ; row_idx++) {
360
+ for (int64_t row_idx = 0 ; row_idx < view_of_file_field->length ; row_idx++) {
361
361
if (!ArrowArrayViewIsNull (view_of_file_field, row_idx)) {
362
362
auto value = ArrowArrayViewGetStringUnsafe (view_of_file_field, row_idx);
363
363
std::string_view path_str (value.data , value.size_bytes );
@@ -512,7 +512,7 @@ Result<std::vector<ManifestEntry>> ParseManifestEntry(ArrowSchema* schema,
512
512
break ;
513
513
case 4 : {
514
514
auto data_file_schema =
515
- dynamic_pointer_cast <StructType>(field.value ().get ().type ());
515
+ internal::checked_pointer_cast <StructType>(field.value ().get ().type ());
516
516
ICEBERG_RETURN_UNEXPECTED (
517
517
ParseDataFile (data_file_schema, view_of_column, manifest_entries));
518
518
break ;
@@ -567,4 +567,11 @@ Result<std::vector<ManifestFile>> ManifestListReaderImpl::Files() const {
567
567
return manifest_files;
568
568
}
569
569
570
+ Result<ManifestFileField> ManifestFileFieldFromIndex (int32_t index) {
571
+ if (index >= 0 && index < static_cast <int32_t >(ManifestFileField::kNextId )) {
572
+ return static_cast <ManifestFileField>(index);
573
+ }
574
+ return InvalidArgument (" Invalid manifest file field index: {}" , index);
575
+ }
576
+
570
577
} // namespace iceberg
0 commit comments