@@ -21,10 +21,6 @@ use crate::{
21
21
RowVisitor , Version ,
22
22
} ;
23
23
24
- /// Type alias for an iterator of [`EngineData`] results.
25
- type EngineDataResultIterator < ' a > =
26
- Box < dyn Iterator < Item = DeltaResult < Box < dyn EngineData > > > + Send + ' a > ;
27
-
28
24
/// The minimal (i.e., mandatory) fields in an add action.
29
25
pub ( crate ) static MANDATORY_ADD_FILE_SCHEMA : LazyLock < SchemaRef > = LazyLock :: new ( || {
30
26
Arc :: new ( StructType :: new ( vec ! [
@@ -99,6 +95,18 @@ fn with_row_tracking_cols(schema: &SchemaRef) -> SchemaRef {
99
95
Arc :: new ( StructType :: new ( fields) )
100
96
}
101
97
98
+ /// Type alias for the complex return type of prepare_row_tracking_metadata:
99
+ /// - Row tracking domain metadata actions (system metadata with "delta.rowTracking" domain)
100
+ /// - Input schema for add actions (with row tracking columns)
101
+ /// - Output schema for add actions (with row tracking columns)
102
+ /// - Extended add files metadata (with row tracking columns appended)
103
+ type RowTrackingMetadata < ' a > = (
104
+ Vec < DeltaResult < Box < dyn EngineData > > > ,
105
+ SchemaRef ,
106
+ SchemaRef ,
107
+ Box < dyn Iterator < Item = DeltaResult < Box < dyn EngineData > > > + Send + ' a > ,
108
+ ) ;
109
+
102
110
/// A transaction represents an in-progress write to a table. After creating a transaction, changes
103
111
/// to the table may be staged via the transaction methods before calling `commit` to commit the
104
112
/// changes to the table.
@@ -204,22 +212,36 @@ impl Transaction {
204
212
205
213
// Step 3: Generate add actions with or without row tracking metadata
206
214
let commit_version = self . read_snapshot . version ( ) + 1 ;
207
- let add_actions = if self
215
+
216
+ let ( row_tracking_domain_metadata_actions, add_actions) : (
217
+ Vec < _ > ,
218
+ Box < dyn Iterator < Item = _ > + Send > ,
219
+ ) = if self
208
220
. read_snapshot
209
221
. table_configuration ( )
210
222
. should_write_row_tracking ( )
211
223
{
212
- self . generate_adds_with_row_tracking ( engine, commit_version) ?
224
+ let ( domain_metadata, input_schema, output_schema, extended_add_files_metadata) =
225
+ self . prepare_row_tracking_metadata ( engine, commit_version) ?;
226
+
227
+ let add_actions = self . generate_adds (
228
+ engine,
229
+ extended_add_files_metadata,
230
+ input_schema,
231
+ output_schema,
232
+ ) ;
233
+ ( domain_metadata, Box :: new ( add_actions) )
213
234
} else {
214
- self . generate_adds (
235
+ let add_actions = self . generate_adds (
215
236
engine,
216
- self . add_files_metadata . iter ( ) . map ( |a| Ok ( a. deref ( ) ) ) ,
237
+ self . add_files_metadata . iter ( ) . map ( |a| Ok ( a. as_ref ( ) ) ) ,
217
238
add_files_schema ( ) . clone ( ) ,
218
239
as_log_add_schema ( with_stats_col ( mandatory_add_file_schema ( ) ) ) ,
219
- )
240
+ ) ;
241
+ ( vec ! [ ] , Box :: new ( add_actions) )
220
242
} ;
221
243
222
- let domain_metadata_actions = Self :: generate_domain_metadata_actions (
244
+ let user_domain_metadata_actions = Self :: generate_domain_metadata_actions (
223
245
engine,
224
246
& self . domain_metadatas ,
225
247
& self . read_snapshot ,
@@ -228,6 +250,9 @@ impl Transaction {
228
250
// Step 4: Commit the actions as a JSON file to the Delta log
229
251
let commit_path =
230
252
ParsedLogPath :: new_commit ( self . read_snapshot . table_root ( ) , commit_version) ?;
253
+ let domain_metadata_actions =
254
+ user_domain_metadata_actions. chain ( row_tracking_domain_metadata_actions) ;
255
+
231
256
let actions = iter:: once ( commit_info_action)
232
257
. chain ( set_transaction_actions)
233
258
. chain ( add_actions)
@@ -375,14 +400,14 @@ impl Transaction {
375
400
add_files_metadata : I ,
376
401
input_schema : SchemaRef ,
377
402
output_schema : SchemaRef ,
378
- ) -> EngineDataResultIterator < ' a >
403
+ ) -> impl Iterator < Item = DeltaResult < Box < dyn EngineData > > > + Send + ' a
379
404
where
380
405
I : Iterator < Item = DeltaResult < T > > + Send + ' a ,
381
406
T : Deref < Target = dyn EngineData > + Send + ' a ,
382
407
{
383
408
let evaluation_handler = engine. evaluation_handler ( ) ;
384
409
385
- Box :: new ( add_files_metadata. map ( move |add_files_batch| {
410
+ add_files_metadata. map ( move |add_files_batch| {
386
411
// Convert stats to a JSON string and nest the add action in a top-level struct
387
412
let adds_expr = Expression :: struct_from ( [ Expression :: transform (
388
413
Transform :: new_top_level ( ) . with_replaced_field (
@@ -396,19 +421,31 @@ impl Transaction {
396
421
output_schema. clone ( ) . into ( ) ,
397
422
) ;
398
423
adds_evaluator. evaluate ( add_files_batch?. deref ( ) )
399
- } ) )
424
+ } )
400
425
}
401
426
402
- /// Extend file metadata provided by the engine with row tracking information and convert them into
403
- /// protocol-compliant add actions.
404
- fn generate_adds_with_row_tracking < ' a > (
427
+ /// When row tracking is enabled, prepare row tracking domain metadata and add files metadata with row tracking columns.
428
+ ///
429
+ /// Returns:
430
+ /// - Row tracking domain metadata actions (system metadata with "delta.rowTracking" domain)
431
+ /// - Input schema for add actions (with row tracking columns)
432
+ /// - Output schema for add actions (with row tracking columns)
433
+ /// - Extended add files metadata (with row tracking columns appended)
434
+ fn prepare_row_tracking_metadata < ' a > (
405
435
& ' a self ,
406
436
engine : & dyn Engine ,
407
437
commit_version : u64 ,
408
- ) -> DeltaResult < EngineDataResultIterator < ' a > > {
409
- // Return early if we have nothing to add
438
+ ) -> DeltaResult < RowTrackingMetadata < ' a > > {
439
+ // There is nothing to add
410
440
if self . add_files_metadata . is_empty ( ) {
411
- return Ok ( Box :: new ( iter:: empty ( ) ) ) ;
441
+ return Ok ( (
442
+ vec ! [ ] ,
443
+ with_row_tracking_cols ( add_files_schema ( ) ) ,
444
+ as_log_add_schema ( with_row_tracking_cols ( & with_stats_col (
445
+ mandatory_add_file_schema ( ) ,
446
+ ) ) ) ,
447
+ Box :: new ( iter:: empty ( ) ) ,
448
+ ) ) ;
412
449
}
413
450
414
451
// Read the current rowIdHighWaterMark from the snapshot's row tracking domain metadata
@@ -426,14 +463,12 @@ impl Transaction {
426
463
base_row_id_batches. push ( row_tracking_visitor. base_row_ids . clone ( ) ) ;
427
464
}
428
465
429
- // Generate a domain metadata action based on the final high water mark
430
466
let domain_metadata = DomainMetadata :: try_from ( RowTrackingDomainMetadata :: new (
431
467
row_tracking_visitor. row_id_high_water_mark ,
432
468
) ) ?;
433
469
let domain_metadata_action =
434
470
domain_metadata. into_engine_data ( get_log_domain_metadata_schema ( ) . clone ( ) , engine) ;
435
471
436
- // Create an iterator that pairs each add action with its row tracking metadata
437
472
let extended_add_files_metadata =
438
473
self . add_files_metadata . iter ( ) . zip ( base_row_id_batches) . map (
439
474
move |( add_files_batch, base_row_ids) | {
@@ -450,19 +485,13 @@ impl Transaction {
450
485
} ,
451
486
) ;
452
487
453
- // Generate add actions including row tracking metadata
454
- let add_actions = self . generate_adds (
455
- engine,
456
- extended_add_files_metadata,
488
+ Ok ( (
489
+ vec ! [ domain_metadata_action] ,
457
490
with_row_tracking_cols ( add_files_schema ( ) ) ,
458
491
as_log_add_schema ( with_row_tracking_cols ( & with_stats_col (
459
492
mandatory_add_file_schema ( ) ,
460
493
) ) ) ,
461
- ) ;
462
-
463
- // Return a chained iterator with add and domain metadata actions
464
- Ok ( Box :: new (
465
- add_actions. chain ( iter:: once ( domain_metadata_action) ) ,
494
+ Box :: new ( extended_add_files_metadata) ,
466
495
) )
467
496
}
468
497
}
0 commit comments