Skip to content

Commit dab5841

Browse files
committed
feat: update to DataFusion 50, pyo3 24, pyo3-arrow 0.11
Signed-off-by: Andrew Lamb <[email protected]>
1 parent 538a90c commit dab5841

File tree

8 files changed

+62
-24
lines changed

8 files changed

+62
-24
lines changed

Cargo.toml

Lines changed: 41 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -26,32 +26,32 @@ debug = true
2626
debug = "line-tables-only"
2727

2828
[workspace.dependencies]
29-
delta_kernel = { version = "0.15.1", features = [
30-
"arrow-55",
29+
delta_kernel = { version = "0.15.2", features = [
30+
"arrow-56",
3131
"default-engine-rustls",
3232
"internal-api",
3333
] }
3434

3535

3636
# arrow
37-
arrow = { version = "55.2.0" }
38-
arrow-arith = { version = "55.2.0" }
39-
arrow-array = { version = "55.2.0", features = ["chrono-tz"] }
40-
arrow-buffer = { version = "55.2.0" }
41-
arrow-cast = { version = "55.2.0" }
42-
arrow-ipc = { version = "55.2.0" }
43-
arrow-json = { version = "55.2.0" }
44-
arrow-ord = { version = "55.2.0" }
45-
arrow-row = { version = "55.2.0" }
46-
arrow-schema = { version = "55.2.0" }
47-
arrow-select = { version = "55.2.0" }
37+
arrow = { version = "56.0.0" }
38+
arrow-arith = { version = "56.0.0" }
39+
arrow-array = { version = "56.0.0", features = ["chrono-tz"] }
40+
arrow-buffer = { version = "56.0.0" }
41+
arrow-cast = { version = "56.0.0" }
42+
arrow-ipc = { version = "56.0.0" }
43+
arrow-json = { version = "56.0.0" }
44+
arrow-ord = { version = "56.0.0" }
45+
arrow-row = { version = "56.0.0" }
46+
arrow-schema = { version = "56.0.0" }
47+
arrow-select = { version = "56.0.0" }
4848
object_store = { version = "0.12.1" }
49-
parquet = { version = "55.2.0" }
49+
parquet = { version = "56.0.0" }
5050

5151
# datafusion
52-
datafusion = "49.0.0"
53-
datafusion-ffi = "49.0.0"
54-
datafusion-proto = "49.0.0"
52+
datafusion = "50.0.0"
53+
datafusion-ffi = "50.0.0"
54+
datafusion-proto = "50.0.0"
5555

5656
# serde
5757
serde = { version = "1.0.194", features = ["derive"] }
@@ -94,3 +94,27 @@ Arro3 = "Arro3"
9494
AKS = "AKS"
9595
# to avoid using 'type' as a field name.
9696
tpe = "tpe"
97+
98+
[patch.crates-io]
99+
datafusion = { git = "https://github.com/apache/datafusion.git", branch = "branch-50" }
100+
datafusion-expr = { git = "https://github.com/apache/datafusion.git", branch = "branch-50" }
101+
datafusion-common = { git = "https://github.com/apache/datafusion.git", branch = "branch-50" }
102+
datafusion-ffi = { git = "https://github.com/apache/datafusion.git", branch = "branch-50" }
103+
datafusion-functions = { git = "https://github.com/apache/datafusion.git", branch = "branch-50" }
104+
datafusion-functions-aggregate = {git = "https://github.com/apache/datafusion.git", branch = "branch-50" }
105+
datafusion-physical-expr = { git = "https://github.com/apache/datafusion.git", branch = "branch-50" }
106+
datafusion-physical-plan = { git = "https://github.com/apache/datafusion.git", branch = "branch-50" }
107+
datafusion-proto = { git = "https://github.com/apache/datafusion.git", branch = "branch-50" }
108+
datafusion-sql = { git = "https://github.com/apache/datafusion.git", branch = "branch-50" }
109+
110+
# datafusion
111+
# datafusion = { path = "/Users/andrewlamb/Software/datafusion2/datafusion/core" }
112+
# datafusion-expr = { path = "/Users/andrewlamb/Software/datafusion2/datafusion/expr" }
113+
# datafusion-common = { path = "/Users/andrewlamb/Software/datafusion2/datafusion/common" }
114+
# datafusion-ffi = { path = "/Users/andrewlamb/Software/datafusion2/datafusion/ffi" }
115+
# datafusion-functions = { path = "/Users/andrewlamb/Software/datafusion2/datafusion/functions" }
116+
# datafusion-functions-aggregate = { path = "/Users/andrewlamb/Software/datafusion2/datafusion/functions-aggregate" }
117+
# datafusion-physical-expr = { path = "/Users/andrewlamb/Software/datafusion2/datafusion/physical-expr" }
118+
# datafusion-physical-plan = { path = "/Users/andrewlamb/Software/datafusion2/datafusion/physical-plan" }
119+
# datafusion-proto = { path = "/Users/andrewlamb/Software/datafusion2/datafusion/proto" }
120+
# datafusion-sql = { path = "/Users/andrewlamb/Software/datafusion2/datafusion/sql" }

crates/core/src/delta_datafusion/expr.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ use crate::{DeltaResult, DeltaTableError};
5252

5353
/// This struct is like Datafusion's MakeArray but ensures that `element` is used rather than `item
5454
/// as the field name within the list.
55-
#[derive(Debug)]
55+
#[derive(Debug, Hash, PartialEq, Eq)]
5656
struct MakeParquetArray {
5757
/// The actual upstream UDF, which we're just totally cheating and using
5858
actual: MakeArray,

crates/core/src/delta_datafusion/mod.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,8 @@ pub(crate) fn get_null_of_arrow_type(t: &ArrowDataType) -> DeltaResult<ScalarVal
389389
)),
390390
//Unsupported types...
391391
ArrowDataType::Float16
392+
| ArrowDataType::Decimal32(_, _)
393+
| ArrowDataType::Decimal64(_, _)
392394
| ArrowDataType::Decimal256(_, _)
393395
| ArrowDataType::Union(_, _)
394396
| ArrowDataType::LargeList(_)
@@ -2185,10 +2187,10 @@ mod tests {
21852187
assert_eq!("a", small.iter().next().unwrap().unwrap());
21862188

21872189
let expected = vec![
2188-
ObjectStoreOperation::GetRange(LocationType::Data, 4952..4960),
2189-
ObjectStoreOperation::GetRange(LocationType::Data, 2399..4952),
2190+
ObjectStoreOperation::GetRange(LocationType::Data, 957..965),
2191+
ObjectStoreOperation::GetRange(LocationType::Data, 326..957),
21902192
#[expect(clippy::single_range_in_vec_init)]
2191-
ObjectStoreOperation::GetRanges(LocationType::Data, vec![4..58]),
2193+
ObjectStoreOperation::GetRanges(LocationType::Data, vec![4..46]),
21922194
];
21932195
let mut actual = Vec::new();
21942196
operations.recv_many(&mut actual, 3).await;

crates/core/src/kernel/scalars.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,8 @@ impl ScalarExt for Scalar {
250250
))
251251
}
252252
Float16
253+
| Decimal32(_, _)
254+
| Decimal64(_, _)
253255
| Decimal256(_, _)
254256
| List(_)
255257
| LargeList(_)

crates/core/src/operations/optimize.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1111,7 +1111,7 @@ pub(super) mod zorder {
11111111
}
11121112

11131113
// DataFusion UDF impl for zorder_key
1114-
#[derive(Debug)]
1114+
#[derive(Debug, Hash, PartialEq, Eq)]
11151115
pub struct ZOrderUDF;
11161116

11171117
impl ScalarUDFImpl for ZOrderUDF {

python/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ doc = false
2020
[dependencies]
2121
delta_kernel.workspace = true
2222

23-
pyo3-arrow = { version = "0.9.0", default-features = false }
23+
pyo3-arrow = { version = "0.11.0", default-features = false }
2424

2525
# arrow
2626
arrow-schema = { workspace = true, features = ["serde"] }
@@ -63,7 +63,7 @@ jemallocator = { version = "0.5", features = [
6363
jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
6464

6565
[dependencies.pyo3]
66-
version = "0.24.0"
66+
version = "0.25.1"
6767
features = ["extension-module", "abi3", "abi3-py39"]
6868

6969
[dependencies.deltalake]

python/src/datafusion.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,10 @@ mod tests {
175175
}
176176

177177
impl LazyBatchGenerator for TestBatchGenerator {
178+
fn as_any(&self) -> &dyn Any {
179+
self
180+
}
181+
178182
fn generate_next_batch(&mut self) -> DataFusionResult<Option<RecordBatch>> {
179183
if self.current_index < self.data.len() {
180184
let batch = self.data[self.current_index].clone();

python/src/writer.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
//! This module contains helper functions to create a LazyTableProvider from an ArrowArrayStreamReader
2+
3+
use std::any::Any;
24
use std::fmt::{self};
35
use std::sync::{Arc, Mutex};
46

@@ -66,6 +68,10 @@ impl ArrowStreamBatchGenerator {
6668
}
6769

6870
impl LazyBatchGenerator for ArrowStreamBatchGenerator {
71+
fn as_any(&self) -> &dyn Any {
72+
self
73+
}
74+
6975
fn generate_next_batch(
7076
&mut self,
7177
) -> deltalake::datafusion::error::Result<Option<deltalake::arrow::array::RecordBatch>> {

0 commit comments

Comments
 (0)