@@ -5,7 +5,8 @@ use std::ops::Range;
5
5
use std:: sync:: Arc ;
6
6
7
7
use crate :: arrow:: array:: builder:: { MapBuilder , MapFieldNames , StringBuilder } ;
8
- use crate :: arrow:: array:: { BooleanArray , Int64Array , RecordBatch , StringArray } ;
8
+ use crate :: arrow:: array:: { BooleanArray , Int64Array , RecordBatch , StringArray , StructArray } ;
9
+ use crate :: arrow:: datatypes:: { DataType , Field } ;
9
10
use crate :: parquet:: arrow:: arrow_reader:: {
10
11
ArrowReaderMetadata , ArrowReaderOptions , ParquetRecordBatchReaderBuilder ,
11
12
} ;
@@ -24,6 +25,7 @@ use crate::engine::arrow_utils::{fixup_parquet_read, generate_mask, get_requeste
24
25
use crate :: engine:: default:: executor:: TaskExecutor ;
25
26
use crate :: engine:: parquet_row_group_skipping:: ParquetRowGroupSkipping ;
26
27
use crate :: schema:: SchemaRef ;
28
+ use crate :: transaction:: add_files_schema;
27
29
use crate :: {
28
30
DeltaResult , EngineData , Error , FileDataReadResultIterator , FileMeta , ParquetHandler ,
29
31
PredicateRef ,
@@ -55,7 +57,10 @@ impl DataFileMetadata {
55
57
}
56
58
}
57
59
58
- // convert DataFileMetadata into a record batch which matches the 'add_files_schema' schema
60
+ /// Convert DataFileMetadata into a record batch which matches the schema returned by
61
+ /// [`add_files_schema`].
62
+ ///
63
+ /// [`add_files_schema`]: crate::transaction::add_files_schema
59
64
fn as_record_batch (
60
65
& self ,
61
66
partition_values : & HashMap < String , String > ,
@@ -70,8 +75,6 @@ impl DataFileMetadata {
70
75
} ,
71
76
num_records,
72
77
} = self ;
73
- let add_files_schema = crate :: transaction:: add_files_schema ( ) ;
74
-
75
78
// create the record batch of the write metadata
76
79
let path = Arc :: new ( StringArray :: from ( vec ! [ location. to_string( ) ] ) ) ;
77
80
let key_builder = StringBuilder :: new ( ) ;
@@ -95,17 +98,22 @@ impl DataFileMetadata {
95
98
let size = Arc :: new ( Int64Array :: from ( vec ! [ size] ) ) ;
96
99
let data_change = Arc :: new ( BooleanArray :: from ( vec ! [ data_change] ) ) ;
97
100
let modification_time = Arc :: new ( Int64Array :: from ( vec ! [ * last_modified] ) ) ;
98
- let num_records = Arc :: new ( Int64Array :: from ( vec ! [ * num_records as i64 ] ) ) ;
101
+ let stats = Arc :: new ( StructArray :: try_new_with_length (
102
+ vec ! [ Field :: new( "numRecords" , DataType :: Int64 , true ) ] . into ( ) ,
103
+ vec ! [ Arc :: new( Int64Array :: from( vec![ * num_records as i64 ] ) ) ] ,
104
+ None ,
105
+ 1 ,
106
+ ) ?) ;
99
107
100
108
Ok ( Box :: new ( ArrowEngineData :: new ( RecordBatch :: try_new (
101
- Arc :: new ( add_files_schema. as_ref ( ) . try_into_arrow ( ) ?) ,
109
+ Arc :: new ( add_files_schema ( ) . as_ref ( ) . try_into_arrow ( ) ?) ,
102
110
vec ! [
103
111
path,
104
112
partitions,
105
113
size,
106
114
modification_time,
107
115
data_change,
108
- num_records ,
116
+ stats ,
109
117
] ,
110
118
) ?) ) )
111
119
}
@@ -502,6 +510,14 @@ mod tests {
502
510
partition_values_builder. values ( ) . append_value ( "a" ) ;
503
511
partition_values_builder. append ( true ) . unwrap ( ) ;
504
512
let partition_values = partition_values_builder. finish ( ) ;
513
+ let stats_struct = StructArray :: try_new_with_length (
514
+ vec ! [ Field :: new( "numRecords" , DataType :: Int64 , true ) ] . into ( ) ,
515
+ vec ! [ Arc :: new( Int64Array :: from( vec![ num_records as i64 ] ) ) ] ,
516
+ None ,
517
+ 1 ,
518
+ )
519
+ . unwrap ( ) ;
520
+
505
521
let expected = RecordBatch :: try_new (
506
522
schema,
507
523
vec ! [
@@ -510,7 +526,7 @@ mod tests {
510
526
Arc :: new( Int64Array :: from( vec![ size as i64 ] ) ) ,
511
527
Arc :: new( Int64Array :: from( vec![ last_modified] ) ) ,
512
528
Arc :: new( BooleanArray :: from( vec![ data_change] ) ) ,
513
- Arc :: new( Int64Array :: from ( vec! [ num_records as i64 ] ) ) ,
529
+ Arc :: new( stats_struct ) ,
514
530
] ,
515
531
)
516
532
. unwrap ( ) ;
0 commit comments