@@ -330,8 +330,8 @@ fn get_indices(
330
330
fields : & ArrowFields ,
331
331
mask_indices : & mut Vec < usize > ,
332
332
) -> DeltaResult < ( usize , Vec < ReorderIndex > ) > {
333
- let mut found_fields = HashSet :: with_capacity ( requested_schema. fields . len ( ) ) ;
334
- let mut reorder_indices = Vec :: with_capacity ( requested_schema. fields . len ( ) ) ;
333
+ let mut found_fields = HashSet :: with_capacity ( requested_schema. num_fields ( ) ) ;
334
+ let mut reorder_indices = Vec :: with_capacity ( requested_schema. num_fields ( ) ) ;
335
335
let mut parquet_offset = start_parquet_offset;
336
336
// for each field, get its position in the parquet (via enumerate), a reference to the arrow
337
337
// field, and info about where it appears in the requested_schema, or None if the field is not
@@ -507,10 +507,17 @@ fn get_indices(
507
507
}
508
508
}
509
509
510
- if found_fields. len ( ) != requested_schema. fields . len ( ) {
510
+ if found_fields. len ( ) != requested_schema. num_fields ( ) {
511
511
// some fields are missing, but they might be nullable, need to insert them into the reorder_indices
512
512
for ( requested_position, field) in requested_schema. fields ( ) . enumerate ( ) {
513
513
if !found_fields. contains ( field. name ( ) ) {
514
+ if let Some ( metadata_spec) = field. get_metadata_column_spec ( ) {
515
+ // We don't support reading any metadata columns yet
516
+ // TODO: Implement row index support for the Parquet reader
517
+ return Err ( Error :: Generic ( format ! (
518
+ "Metadata column {metadata_spec:?} is not supported by the default parquet reader"
519
+ ) ) ) ;
520
+ }
514
521
if field. nullable {
515
522
debug ! ( "Inserting missing and nullable field: {}" , field. name( ) ) ;
516
523
reorder_indices. push ( ReorderIndex :: missing (
@@ -582,11 +589,12 @@ fn match_parquet_fields<'k, 'p>(
582
589
// Map the parquet ArrowField to the matching kernel KernelFieldInfo if present.
583
590
let kernel_field_info =
584
591
kernel_schema
585
- . fields
586
- . get_full ( field_name)
587
- . map ( |( idx, _name, field) | KernelFieldInfo {
588
- parquet_index : idx,
589
- field,
592
+ . field_with_index ( field_name)
593
+ . and_then ( |( idx, field) | {
594
+ ( !field. is_metadata_column ( ) ) . then_some ( KernelFieldInfo {
595
+ parquet_index : idx,
596
+ field,
597
+ } )
590
598
} ) ;
591
599
592
600
MatchedParquetField {
@@ -1533,6 +1541,41 @@ mod tests {
1533
1541
] ) )
1534
1542
}
1535
1543
1544
+ #[ test]
1545
+ fn test_match_parquet_fields_filters_metadata_columns ( ) {
1546
+ use crate :: schema:: MetadataColumnSpec ;
1547
+
1548
+ let kernel_schema = StructType :: new_unchecked ( [
1549
+ StructField :: not_null ( "regular_field" , DataType :: INTEGER ) ,
1550
+ StructField :: create_metadata_column ( "row_index" , MetadataColumnSpec :: RowIndex ) ,
1551
+ StructField :: nullable ( "another_field" , DataType :: STRING ) ,
1552
+ ] ) ;
1553
+
1554
+ let parquet_fields: ArrowFields = vec ! [
1555
+ ArrowField :: new( "regular_field" , ArrowDataType :: Int32 , false ) ,
1556
+ ArrowField :: new( "row_index" , ArrowDataType :: Int64 , false ) ,
1557
+ ArrowField :: new( "another_field" , ArrowDataType :: Utf8 , true ) ,
1558
+ ]
1559
+ . into ( ) ;
1560
+
1561
+ let matched_fields: Vec < _ > =
1562
+ match_parquet_fields ( & kernel_schema, & parquet_fields) . collect ( ) ;
1563
+
1564
+ assert_eq ! ( matched_fields. len( ) , 3 ) ;
1565
+
1566
+ // First field (regular_field) should have kernel_field_info
1567
+ assert ! ( matched_fields[ 0 ] . kernel_field_info. is_some( ) ) ;
1568
+ assert_eq ! ( matched_fields[ 0 ] . parquet_field. name( ) , "regular_field" ) ;
1569
+
1570
+ // Second field (row_index metadata column) should have None for kernel_field_info
1571
+ assert ! ( matched_fields[ 1 ] . kernel_field_info. is_none( ) ) ;
1572
+ assert_eq ! ( matched_fields[ 1 ] . parquet_field. name( ) , "row_index" ) ;
1573
+
1574
+ // Third field (another_field) should have kernel_field_info
1575
+ assert ! ( matched_fields[ 2 ] . kernel_field_info. is_some( ) ) ;
1576
+ assert_eq ! ( matched_fields[ 2 ] . parquet_field. name( ) , "another_field" ) ;
1577
+ }
1578
+
1536
1579
#[ test]
1537
1580
fn nested_indices ( ) {
1538
1581
column_mapping_cases ( ) . into_iter ( ) . for_each ( |mode| {
0 commit comments