From 9d3335bebd68699078a76a1f1d8549d3427f9141 Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Fri, 5 Sep 2025 13:05:23 +0200 Subject: [PATCH 01/39] init tokio runtime --- Cargo.lock | 724 ++++++++++++++++++++++++++++++++++++++++++++++++++++- Cargo.toml | 4 +- src/lib.rs | 148 ++++++++--- 3 files changed, 832 insertions(+), 44 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6cdaad3..2d99f53 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -98,6 +98,9 @@ name = "anyhow" version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" +dependencies = [ + "backtrace", +] [[package]] name = "apache-avro" @@ -277,13 +280,37 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b0f477b951e452a0b6b4a10b53ccd569042d1d01729b519e02074a9c0958a063" +[[package]] +name = "async-channel" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35" +dependencies = [ + "concurrent-queue", + "event-listener 2.5.3", + "futures-core", +] + +[[package]] +name = "async-compression" +version = "0.4.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "977eb15ea9efd848bb8a4a1a2500347ed7f0bf794edf0dc3ddcf439f43d36b23" +dependencies = [ + "compression-codecs", + "compression-core", + "futures-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "async-lock" version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff6e472cdea888a4bd64f342f09b3f50e1886d32afe8df3d663c01140b811b18" dependencies = [ - "event-listener", + "event-listener 5.4.0", "event-listener-strategy", "pin-project-lite", ] @@ -308,6 +335,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "atty" version = "0.2.14" @@ -325,6 +358,17 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "backoff" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1" +dependencies = [ + "getrandom 0.2.16", + "instant", + "rand 0.8.5", +] + [[package]] name = "backon" version = "1.5.2" @@ -581,6 +625,34 @@ dependencies = [ "os_str_bytes", ] +[[package]] +name = "cmake" +version = "0.1.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0" +dependencies = [ + "cc", +] + +[[package]] +name = "compression-codecs" +version = "0.4.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "485abf41ac0c8047c07c87c72c8fb3eb5197f6e9d7ded615dfd1a00ae00a0f64" +dependencies = [ + "compression-core", + "flate2", + "memchr", + "zstd", + "zstd-safe", +] + +[[package]] +name = "compression-core" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e47641d3deaf41fb1538ac1f54735925e275eaf3bf4d55c81b137fba797e5cbb" + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -616,6 +688,16 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -676,6 +758,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -739,6 +830,12 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728" +[[package]] +name = "data-encoding" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" + [[package]] name = "deranged" version = "0.4.0" @@ -830,6 +927,24 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "endian-type" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" + +[[package]] +name = "enum-as-inner" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.104", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -846,6 +961,12 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "event-listener" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" + [[package]] name = "event-listener" version = "5.4.0" @@ -863,7 +984,7 @@ version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" dependencies = [ - "event-listener", + "event-listener 5.4.0", "pin-project-lite", ] @@ -900,16 +1021,44 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" dependencies = [ "crc32fast", + "libz-ng-sys", "libz-rs-sys", "miniz_oxide", ] +[[package]] +name = "flume" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095" +dependencies = [ + "futures-core", + "futures-sink", + "nanorand", + "spin", +] + [[package]] name = "fnv" version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -1083,6 +1232,25 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "h2" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http", + "indexmap 2.10.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "half" version = "2.6.0" @@ -1152,6 +1320,51 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "hickory-proto" +version = "0.24.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92652067c9ce6f66ce53cc38d1169daa36e6e7eb7dd3b63b5103bd9d97117248" +dependencies = [ + "async-trait", + "cfg-if", + "data-encoding", + "enum-as-inner", + "futures-channel", + "futures-io", + "futures-util", + "idna", + "ipnet", + "once_cell", + "rand 0.8.5", + "thiserror 1.0.69", + "tinyvec", + "tokio", + "tracing", + "url", +] + +[[package]] +name = "hickory-resolver" +version = "0.24.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbb117a1ca520e111743ab2f6688eddee69db4e0ea242545a604dce8a66fd22e" +dependencies = [ + "cfg-if", + "futures-util", + "hickory-proto", + "ipconfig", + "lru-cache", + "once_cell", + "parking_lot", + "rand 0.8.5", + "resolv-conf", + "smallvec", + "thiserror 1.0.69", + "tokio", + "tracing", +] + [[package]] name = "hmac" version = "0.12.1" @@ -1210,6 +1423,12 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "humantime" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" + [[package]] name = "hyper" version = "1.6.0" @@ -1219,6 +1438,7 @@ dependencies = [ "bytes", "futures-channel", "futures-util", + "h2", "http", "http-body", "httparse", @@ -1239,6 +1459,7 @@ dependencies = [ "hyper", "hyper-util", "rustls", + "rustls-native-certs", "rustls-pki-types", "tokio", "tokio-rustls", @@ -1357,11 +1578,10 @@ dependencies = [ "arrow-array", "arrow-ipc", "cbindgen", - "futures", "iceberg", "libc", + "object_store_ffi", "tempfile", - "tokio", ] [[package]] @@ -1499,6 +1719,15 @@ dependencies = [ "serde", ] +[[package]] +name = "instant" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +dependencies = [ + "cfg-if", +] + [[package]] name = "integer-encoding" version = "3.0.4" @@ -1516,6 +1745,18 @@ dependencies = [ "libc", ] +[[package]] +name = "ipconfig" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b58db92f96b720de98181bbbe63c831e87005ab460c1bf306eb2622b4707997f" +dependencies = [ + "socket2 0.5.10", + "widestring", + "windows-sys 0.48.0", + "winreg", +] + [[package]] name = "ipnet" version = "2.11.0" @@ -1673,6 +1914,16 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" +[[package]] +name = "libz-ng-sys" +version = "1.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7118c2c2a3c7b6edc279a8b19507672b9c4d716f95e671172dfa4e23f9fd824" +dependencies = [ + "cmake", + "libc", +] + [[package]] name = "libz-rs-sys" version = "0.5.1" @@ -1682,6 +1933,12 @@ dependencies = [ "zlib-rs", ] +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + [[package]] name = "linux-raw-sys" version = "0.9.4" @@ -1723,6 +1980,15 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "lru-cache" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e24f1ad8321ca0e8a1e0ac13f23cb668e6f5466c2c57319f6a5cf1cc8e3b1c" +dependencies = [ + "linked-hash-map", +] + [[package]] name = "lru-slab" version = "0.1.2" @@ -1763,6 +2029,35 @@ version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" +[[package]] +name = "metrics" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3045b4193fbdc5b5681f32f11070da9be3609f189a79f3390706d42587f46bb5" +dependencies = [ + "ahash 0.8.12", + "portable-atomic", +] + +[[package]] +name = "metrics-util" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4259040465c955f9f2f1a4a8a16dc46726169bca0f88e8fb2dbeced487c3e828" +dependencies = [ + "aho-corasick", + "crossbeam-epoch", + "crossbeam-utils", + "hashbrown 0.14.5", + "indexmap 2.10.0", + "metrics", + "num_cpus", + "ordered-float 4.6.0", + "quanta", + "radix_trie", + "sketches-ddsketch", +] + [[package]] name = "miniz_oxide" version = "0.8.9" @@ -1793,7 +2088,7 @@ dependencies = [ "crossbeam-channel", "crossbeam-epoch", "crossbeam-utils", - "event-listener", + "event-listener 5.4.0", "futures-util", "loom", "parking_lot", @@ -1811,6 +2106,24 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9252111cf132ba0929b6f8e030cac2a24b507f3a4d6db6fb2896f27b354c714b" +[[package]] +name = "nanorand" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3" +dependencies = [ + "getrandom 0.2.16", +] + +[[package]] +name = "nibble_vec" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43" +dependencies = [ + "smallvec", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -1921,6 +2234,79 @@ dependencies = [ "memchr", ] +[[package]] +name = "object_store" +version = "0.11.2" +source = "git+https://github.com/RelationalAI/arrow-rs.git?tag=v0.11.3-beta1#fa77acbd1e5e3acbf0824443b2c1d1df8609b457" +dependencies = [ + "async-trait", + "base64", + "bytes", + "chrono", + "futures", + "httparse", + "humantime", + "hyper", + "itertools", + "md-5", + "parking_lot", + "percent-encoding", + "quick-xml", + "rand 0.8.5", + "reqwest", + "ring", + "serde", + "serde_json", + "snafu", + "tokio", + "tracing", + "url", + "walkdir", +] + +[[package]] +name = "object_store_ffi" +version = "0.12.3" +dependencies = [ + "anyhow", + "async-channel", + "async-compression", + "async-trait", + "backoff", + "base64", + "bytes", + "chrono", + "crossbeam-queue", + "flate2", + "flume", + "futures-util", + "hickory-resolver", + "hyper", + "metrics", + "metrics-util", + "moka", + "object_store", + "once_cell", + "openssl", + "pin-project", + "quanta", + "rand 0.8.5", + "regex", + "reqwest", + "serde", + "serde_json", + "serde_path_to_error", + "thiserror 1.0.69", + "tokio", + "tokio-util", + "tracing", + "tracing-subscriber", + "url", + "uuid", + "walkdir", + "zeroize", +] + [[package]] name = "once_cell" version = "1.21.3" @@ -1955,6 +2341,60 @@ dependencies = [ "uuid", ] +[[package]] +name = "openssl" +version = "0.10.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8" +dependencies = [ + "bitflags 2.9.1", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.104", +] + +[[package]] +name = "openssl-probe" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" + +[[package]] +name = "openssl-src" +version = "300.5.2+3.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d270b79e2926f5150189d475bc7e9d2c69f9c4697b185fa917d5a32b792d21b4" +dependencies = [ + "cc", +] + +[[package]] +name = "openssl-sys" +version = "0.9.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90096e2e47630d78b7d1c20952dc621f957103f8bc2c8359ec81290d75238571" +dependencies = [ + "cc", + "libc", + "openssl-src", + "pkg-config", + "vcpkg", +] + [[package]] name = "ordered-float" version = "2.10.1" @@ -2071,6 +2511,26 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "pin-project" +version = "1.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.104", +] + [[package]] name = "pin-project-lite" version = "0.2.16" @@ -2163,6 +2623,21 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" +[[package]] +name = "quanta" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3ab5a9d756f0d97bdc89019bd2e4ea098cf9cde50ee7564dde6b81ccc8f06c7" +dependencies = [ + "crossbeam-utils", + "libc", + "once_cell", + "raw-cpuid", + "wasi 0.11.1+wasi-snapshot-preview1", + "web-sys", + "winapi", +] + [[package]] name = "quick-xml" version = "0.37.5" @@ -2249,6 +2724,16 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" +[[package]] +name = "radix_trie" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd" +dependencies = [ + "endian-type", + "nibble_vec", +] + [[package]] name = "rand" version = "0.8.5" @@ -2308,6 +2793,15 @@ dependencies = [ "getrandom 0.3.3", ] +[[package]] +name = "raw-cpuid" +version = "11.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186" +dependencies = [ + "bitflags 2.9.1", +] + [[package]] name = "redox_syscall" version = "0.5.17" @@ -2435,6 +2929,8 @@ dependencies = [ "bytes", "futures-core", "futures-util", + "h2", + "hickory-resolver", "http", "http-body", "http-body-util", @@ -2443,10 +2939,12 @@ dependencies = [ "hyper-util", "js-sys", "log", + "once_cell", "percent-encoding", "pin-project-lite", "quinn", "rustls", + "rustls-native-certs", "rustls-pki-types", "serde", "serde_json", @@ -2466,6 +2964,12 @@ dependencies = [ "webpki-roots", ] +[[package]] +name = "resolv-conf" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95325155c684b1c89f7765e30bc1c42e4a6da51ca513615660cb8a62ef9a88e3" + [[package]] name = "ring" version = "0.17.14" @@ -2599,6 +3103,18 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-native-certs" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework", +] + [[package]] name = "rustls-pki-types" version = "1.12.0" @@ -2632,6 +3148,24 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "schannel" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" +dependencies = [ + "windows-sys 0.59.0", +] + [[package]] name = "schemars" version = "0.9.0" @@ -2674,6 +3208,29 @@ version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" +[[package]] +name = "security-framework" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80fb1d92c5028aa318b4b8bd7302a5bfcf48be96a37fc6fc790f806b0004ee0c" +dependencies = [ + "bitflags 2.9.1", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "semver" version = "1.0.26" @@ -2727,6 +3284,16 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59fab13f937fa393d08645bf3a84bdfe86e296747b506ada67bb15f10f218b2a" +dependencies = [ + "itoa", + "serde", +] + [[package]] name = "serde_repr" version = "0.1.20" @@ -2834,6 +3401,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" +[[package]] +name = "sketches-ddsketch" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85636c14b73d81f541e525f585c0a2109e6744e1565b5c1668e31c70c10ed65c" + [[package]] name = "slab" version = "0.4.10" @@ -2846,6 +3419,27 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "snafu" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e84b3f4eacbf3a1ce05eac6763b4d629d60cbc94d632e4092c54ade71f1e1a2" +dependencies = [ + "snafu-derive", +] + +[[package]] +name = "snafu-derive" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.104", +] + [[package]] name = "snap" version = "1.1.1" @@ -2872,6 +3466,15 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -3171,7 +3774,6 @@ dependencies = [ "io-uring", "libc", "mio", - "parking_lot", "pin-project-lite", "signal-hook-registry", "slab", @@ -3292,9 +3894,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ "pin-project-lite", + "tracing-attributes", "tracing-core", ] +[[package]] +name = "tracing-attributes" +version = "0.1.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.104", +] + [[package]] name = "tracing-core" version = "0.1.34" @@ -3439,12 +4053,28 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version_check" version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -3582,6 +4212,12 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "widestring" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd7cf3379ca1aac9eea11fba24fd7e315d621f8dfe35c8d7d2be8b793726e07d" + [[package]] name = "winapi" version = "0.3.9" @@ -3715,6 +4351,15 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + [[package]] name = "windows-sys" version = "0.52.0" @@ -3742,6 +4387,21 @@ dependencies = [ "windows-targets 0.53.3", ] +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + [[package]] name = "windows-targets" version = "0.52.6" @@ -3784,6 +4444,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -3796,6 +4462,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" @@ -3808,6 +4480,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -3832,6 +4510,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_i686_msvc" version = "0.52.6" @@ -3844,6 +4528,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" @@ -3856,6 +4546,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" @@ -3868,6 +4564,12 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" @@ -3889,6 +4591,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "winreg" +version = "0.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + [[package]] name = "wit-bindgen-rt" version = "0.39.0" diff --git a/Cargo.toml b/Cargo.toml index 0db631d..0272076 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,12 +12,14 @@ cbindgen = "0.26" [dependencies] iceberg = "0.6.0" +object_store_ffi = { path = "../object_store_ffi" } tokio = { version = "1.0", features = ["full"] } -libc = "0.2" futures = "0.3" +libc = "0.2" anyhow = "1.0" arrow-array = "55.2.0" arrow-ipc = "55.2.0" +tracing-subscriber = "0.3" [dev-dependencies] tempfile = "3.0" diff --git a/src/lib.rs b/src/lib.rs index ce10979..7095bbf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,4 @@ -use std::ffi::{CStr, CString}; -use std::os::raw::c_char; +use std::ffi::{CStr, CString, c_char, c_void}; use std::ptr; use std::sync::Mutex; use std::sync::OnceLock; @@ -11,13 +10,44 @@ use futures::stream::StreamExt; use iceberg::io::FileIOBuilder; use iceberg::table::StaticTable; use iceberg::TableIdent; -use std::env; use tokio::runtime::Runtime; // cbindgen annotations #[allow(non_camel_case_types)] #[allow(non_snake_case)] +// Global runtime using OnceLock for thread safety +static RUNTIME: OnceLock = OnceLock::new(); + +// Configuration for iceberg runtime - much simpler than object_store_ffi +#[derive(Copy, Clone)] +#[repr(C)] +pub struct IcebergConfig { + n_threads: usize, +} + +impl Default for IcebergConfig { + fn default() -> Self { + IcebergConfig { + n_threads: 0, // 0 means use tokio's default + } + } +} + +// Result type +#[repr(C)] +pub enum IcebergResult { + IcebergOk = 0, + IcebergError = -1, + IcebergNullPointer = -2, + IcebergIoError = -3, + IcebergInvalidTable = -4, + IcebergEndOfStream = -5, +} + +// Callback types for Julia integration +type PanicCallback = unsafe extern "C" fn() -> i32; + // Internal structures for Rust implementation struct IcebergTableInternal { table: iceberg::table::Table, @@ -29,15 +59,6 @@ struct IcebergScanInternal { stream: Option>>>, } -// Global Tokio runtime using OnceLock for thread safety -// TODO: Might want to share tokio runtime between here and object_store_ffi.jl, e.g., -// by passing object store in and using its runtime. -static RUNTIME: OnceLock = OnceLock::new(); - -fn get_runtime() -> &'static Runtime { - RUNTIME.get_or_init(|| Runtime::new().expect("Failed to create Tokio runtime")) -} - // Thread-local error storage thread_local! { static LAST_ERROR: std::cell::RefCell> = std::cell::RefCell::new(None); @@ -55,6 +76,10 @@ fn clear_error() { }); } +fn get_runtime() -> &'static Runtime { + RUNTIME.get().expect("iceberg runtime not initialized - call iceberg_init_runtime first") +} + // C API structures #[repr(C)] pub struct IcebergTable { @@ -70,21 +95,10 @@ pub struct IcebergScan { pub struct ArrowBatch { pub data: *const u8, pub length: usize, - pub rust_ptr: *mut std::ffi::c_void, -} - -#[repr(C)] -pub enum IcebergResult { - IcebergOk = 0, - IcebergError = -1, - IcebergNullPointer = -2, - IcebergIoError = -3, - IcebergInvalidTable = -4, - IcebergEndOfStream = -5, + pub rust_ptr: *mut c_void, } // Helper function to create ArrowBatch from RecordBatch -// TODO: This should be zero-copy... fn serialize_record_batch(batch: RecordBatch) -> Result { let buffer = Vec::new(); let mut stream_writer = StreamWriter::try_new(buffer, &batch.schema())?; @@ -95,7 +109,7 @@ fn serialize_record_batch(batch: RecordBatch) -> Result { let boxed_data = Box::new(serialized_data); let data_ptr = boxed_data.as_ptr(); let length = boxed_data.len(); - let rust_ptr = Box::into_raw(boxed_data) as *mut std::ffi::c_void; + let rust_ptr = Box::into_raw(boxed_data) as *mut c_void; Ok(ArrowBatch { data: data_ptr, @@ -104,6 +118,63 @@ fn serialize_record_batch(batch: RecordBatch) -> Result { }) } +// Initialize iceberg runtime - modeled after object_store_ffi but simpler +#[no_mangle] +pub extern "C" fn iceberg_init_runtime( + config: IcebergConfig, + panic_callback: PanicCallback, +) -> IcebergResult { + // Set up panic hook + let prev = std::panic::take_hook(); + std::panic::set_hook(Box::new(move |info| { + prev(info); + unsafe { panic_callback() }; + })); + + // Set up logging if not already configured + if std::env::var("RUST_LOG").is_err() { + unsafe { std::env::set_var("RUST_LOG", "iceberg_rust_ffi=warn,iceberg=warn") } + } + + // Initialize tracing subscriber + let _ = tracing_subscriber::fmt::try_init(); + + // Build tokio runtime + let mut rt_builder = tokio::runtime::Builder::new_multi_thread(); + rt_builder.enable_all(); + + // Configure Julia thread adoption if needed + rt_builder.on_thread_start(|| { + // Note: We might need Julia thread adoption here in the future + // For now, we'll keep it simple + }); + + // Set number of worker threads + if config.n_threads > 0 { + rt_builder.worker_threads(config.n_threads); + } + + // Create and store the runtime + let runtime = rt_builder.build() + .map_err(|e| { + set_error(format!("Failed to create tokio runtime: {}", e)); + e + }).ok(); + + match runtime { + Some(rt) => { + match RUNTIME.set(rt) { + Ok(_) => IcebergResult::IcebergOk, + Err(_) => { + set_error("Runtime was already initialized".to_string()); + IcebergResult::IcebergError + } + } + }, + None => IcebergResult::IcebergError + } +} + // C API functions #[no_mangle] pub extern "C" fn iceberg_table_open( @@ -138,37 +209,28 @@ pub extern "C" fn iceberg_table_open( } }; - // TODO: Perhaps we should have full asynchronicity that includes the caller code (e.g. Julia) instead of blocking here. + // Use the iceberg runtime for async operations let result: Result = get_runtime().block_on(async { - // println!("DEBUG: Table path: {}", path_str); - // println!("DEBUG: Metadata path: {}", metadata_path_str); - - // Construct the full S3 path by combining table_path and metadata_path + // Construct the full metadata path let full_metadata_path = if metadata_path_str.starts_with('/') { - // If metadata_path starts with /, it's absolute, so use it as is metadata_path_str.to_string() } else { - // Otherwise, combine table_path with metadata_path let table_path_trimmed = path_str.trim_end_matches('/'); let metadata_path_trimmed = metadata_path_str.trim_start_matches('/'); format!("{}/{}", table_path_trimmed, metadata_path_trimmed) }; - // println!("DEBUG: Full metadata file path: {}", full_metadata_path); - - let _ = env::var("AWS_ACCESS_KEY_ID").expect("AWS_ACCESS_KEY_ID must be set"); - // Create file IO for S3 let file_io = FileIOBuilder::new("s3").build()?; // Create table identifier let table_ident = TableIdent::from_strs(["default", "table"])?; + // Load the static table let static_table = StaticTable::from_metadata_file(&full_metadata_path, table_ident, file_io).await?; let iceberg_table = static_table.into_table(); - Ok(iceberg_table) }); @@ -294,6 +356,7 @@ pub extern "C" fn iceberg_scan_next_batch( if scan_ref.stream.is_none() { if let Some(table) = &scan_ref.table { let columns = scan_ref.columns.clone(); + let stream_result = get_runtime().block_on(async { let mut scan_builder = table.scan(); @@ -394,3 +457,14 @@ pub extern "C" fn iceberg_error_message() -> *const c_char { } }) } + +// Utility function for cleanup +#[no_mangle] +pub extern "C" fn iceberg_destroy_cstring(string: *mut c_char) -> IcebergResult { + if !string.is_null() { + unsafe { + let _ = CString::from_raw(string); + } + } + IcebergResult::IcebergOk +} \ No newline at end of file From 922008ed72bafa1b16a72194f1c229ee7c0f0a18 Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Fri, 5 Sep 2025 13:37:36 +0200 Subject: [PATCH 02/39] use macro for async ops, and reuse RT and RESULT_CB --- src/lib.rs | 535 +++++++++++++++++++++++++---------------------------- 1 file changed, 253 insertions(+), 282 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 7095bbf..e1db583 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,6 @@ use std::ffi::{CStr, CString, c_char, c_void}; use std::ptr; use std::sync::Mutex; -use std::sync::OnceLock; use anyhow::Result; use arrow_array::RecordBatch; @@ -10,16 +9,22 @@ use futures::stream::StreamExt; use iceberg::io::FileIOBuilder; use iceberg::table::StaticTable; use iceberg::TableIdent; -use tokio::runtime::Runtime; + +// Import from object_store_ffi +use object_store_ffi::{ + RT, RESULT_CB, ResultCallback, + CResult, Context, RawResponse, ResponseGuard, + with_cancellation, export_runtime_op, destroy_cstring, current_metrics +}; // cbindgen annotations #[allow(non_camel_case_types)] #[allow(non_snake_case)] -// Global runtime using OnceLock for thread safety -static RUNTIME: OnceLock = OnceLock::new(); +// Callback types for Julia integration +type PanicCallback = unsafe extern "C" fn() -> i32; -// Configuration for iceberg runtime - much simpler than object_store_ffi +// Simple config for iceberg - only what we need #[derive(Copy, Clone)] #[repr(C)] pub struct IcebergConfig { @@ -34,68 +39,119 @@ impl Default for IcebergConfig { } } -// Result type +// Direct structures - no opaque wrappers #[repr(C)] -pub enum IcebergResult { - IcebergOk = 0, - IcebergError = -1, - IcebergNullPointer = -2, - IcebergIoError = -3, - IcebergInvalidTable = -4, - IcebergEndOfStream = -5, +pub struct IcebergTable { + pub table: iceberg::table::Table, } -// Callback types for Julia integration -type PanicCallback = unsafe extern "C" fn() -> i32; - -// Internal structures for Rust implementation -struct IcebergTableInternal { - table: iceberg::table::Table, +#[repr(C)] +pub struct IcebergScan { + pub table: Option, + pub columns: Option>, + pub stream: Option>>>, } -struct IcebergScanInternal { - table: Option, - columns: Option>, - stream: Option>>>, +#[repr(C)] +pub struct ArrowBatch { + pub data: *const u8, + pub length: usize, + pub rust_ptr: *mut c_void, } -// Thread-local error storage -thread_local! { - static LAST_ERROR: std::cell::RefCell> = std::cell::RefCell::new(None); +// Response types for async operations +#[repr(C)] +pub struct IcebergTableResponse { + result: CResult, + table: *mut IcebergTable, + error_message: *mut c_char, + context: *const Context, } -fn set_error(error: String) { - LAST_ERROR.with(|e| { - *e.borrow_mut() = Some(error); - }); -} +unsafe impl Send for IcebergTableResponse {} -fn clear_error() { - LAST_ERROR.with(|e| { - *e.borrow_mut() = None; - }); +impl RawResponse for IcebergTableResponse { + type Payload = *mut IcebergTable; + fn result_mut(&mut self) -> &mut CResult { + &mut self.result + } + fn context_mut(&mut self) -> &mut *const Context { + &mut self.context + } + fn error_message_mut(&mut self) -> &mut *mut c_char { + &mut self.error_message + } + fn set_payload(&mut self, payload: Option) { + match payload { + Some(table_ptr) => self.table = table_ptr, + None => self.table = ptr::null_mut(), + } + } } -fn get_runtime() -> &'static Runtime { - RUNTIME.get().expect("iceberg runtime not initialized - call iceberg_init_runtime first") +#[repr(C)] +pub struct IcebergScanResponse { + result: CResult, + scan: *mut IcebergScan, + error_message: *mut c_char, + context: *const Context, } -// C API structures -#[repr(C)] -pub struct IcebergTable { - _private: [u8; 0], // Opaque type for C +unsafe impl Send for IcebergScanResponse {} + +impl RawResponse for IcebergScanResponse { + type Payload = *mut IcebergScan; + fn result_mut(&mut self) -> &mut CResult { + &mut self.result + } + fn context_mut(&mut self) -> &mut *const Context { + &mut self.context + } + fn error_message_mut(&mut self) -> &mut *mut c_char { + &mut self.error_message + } + fn set_payload(&mut self, payload: Option) { + match payload { + Some(scan_ptr) => self.scan = scan_ptr, + None => self.scan = ptr::null_mut(), + } + } } #[repr(C)] -pub struct IcebergScan { - _private: [u8; 0], // Opaque type for C +pub struct IcebergBatchResponse { + result: CResult, + batch: *mut ArrowBatch, + end_of_stream: bool, + error_message: *mut c_char, + context: *const Context, } -#[repr(C)] -pub struct ArrowBatch { - pub data: *const u8, - pub length: usize, - pub rust_ptr: *mut c_void, +unsafe impl Send for IcebergBatchResponse {} + +impl RawResponse for IcebergBatchResponse { + type Payload = (*mut ArrowBatch, bool); + fn result_mut(&mut self) -> &mut CResult { + &mut self.result + } + fn context_mut(&mut self) -> &mut *const Context { + &mut self.context + } + fn error_message_mut(&mut self) -> &mut *mut c_char { + &mut self.error_message + } + fn set_payload(&mut self, payload: Option) { + match payload { + Some((batch_ptr, is_end)) => { + self.batch = batch_ptr; + self.end_of_stream = is_end; + } + None => { + self.batch = ptr::null_mut(); + self.end_of_stream = false; + } + } + } } // Helper function to create ArrowBatch from RecordBatch @@ -118,12 +174,18 @@ fn serialize_record_batch(batch: RecordBatch) -> Result { }) } -// Initialize iceberg runtime - modeled after object_store_ffi but simpler +// Initialize runtime - configure RT and RESULT_CB directly #[no_mangle] pub extern "C" fn iceberg_init_runtime( config: IcebergConfig, panic_callback: PanicCallback, -) -> IcebergResult { + result_callback: ResultCallback, +) -> CResult { + // Set the result callback + if let Err(_) = RESULT_CB.set(result_callback) { + return CResult::Error; // Already initialized + } + // Set up panic hook let prev = std::panic::take_hook(); std::panic::set_hook(Box::new(move |info| { @@ -143,79 +205,48 @@ pub extern "C" fn iceberg_init_runtime( let mut rt_builder = tokio::runtime::Builder::new_multi_thread(); rt_builder.enable_all(); - // Configure Julia thread adoption if needed + // Configure Julia thread adoption if needed in the future rt_builder.on_thread_start(|| { - // Note: We might need Julia thread adoption here in the future - // For now, we'll keep it simple + // For future Julia integration }); - // Set number of worker threads if config.n_threads > 0 { rt_builder.worker_threads(config.n_threads); } - // Create and store the runtime let runtime = rt_builder.build() - .map_err(|e| { - set_error(format!("Failed to create tokio runtime: {}", e)); - e - }).ok(); - - match runtime { - Some(rt) => { - match RUNTIME.set(rt) { - Ok(_) => IcebergResult::IcebergOk, - Err(_) => { - set_error("Runtime was already initialized".to_string()); - IcebergResult::IcebergError - } - } - }, - None => IcebergResult::IcebergError - } -} + .map_err(|_| CResult::Error)?; -// C API functions -#[no_mangle] -pub extern "C" fn iceberg_table_open( - table_path: *const c_char, - metadata_path: *const c_char, - table: *mut *mut IcebergTable, -) -> IcebergResult { - if table_path.is_null() || metadata_path.is_null() || table.is_null() { - set_error("Null pointer provided".to_string()); - return IcebergResult::IcebergNullPointer; - } - - clear_error(); + RT.set(runtime) + .map_err(|_| CResult::Error)?; - let path_str = unsafe { - match CStr::from_ptr(table_path).to_str() { - Ok(s) => s, - Err(e) => { - set_error(format!("Invalid UTF-8 in table path: {}", e)); - return IcebergResult::IcebergError; - } - } - }; - - let metadata_path_str = unsafe { - match CStr::from_ptr(metadata_path).to_str() { - Ok(s) => s, - Err(e) => { - set_error(format!("Invalid UTF-8 in metadata path: {}", e)); - return IcebergResult::IcebergError; - } - } - }; + CResult::Ok +} - // Use the iceberg runtime for async operations - let result: Result = get_runtime().block_on(async { +// Use export_runtime_op! macro for table opening +export_runtime_op!( + iceberg_table_open, + IcebergTableResponse, + || { + let table_path_str = unsafe { + CStr::from_ptr(table_path).to_str() + .map_err(|e| anyhow::anyhow!("Invalid UTF-8 in table path: {}", e))? + }; + let metadata_path_str = unsafe { + CStr::from_ptr(metadata_path).to_str() + .map_err(|e| anyhow::anyhow!("Invalid UTF-8 in metadata path: {}", e))? + }; + Ok((table_path_str.to_string(), metadata_path_str.to_string())) + }, + paths, + async { + let (table_path_str, metadata_path_str) = paths; + // Construct the full metadata path let full_metadata_path = if metadata_path_str.starts_with('/') { - metadata_path_str.to_string() + metadata_path_str } else { - let table_path_trimmed = path_str.trim_end_matches('/'); + let table_path_trimmed = table_path_str.trim_end_matches('/'); let metadata_path_trimmed = metadata_path_str.trim_start_matches('/'); format!("{}/{}", table_path_trimmed, metadata_path_trimmed) }; @@ -231,93 +262,142 @@ pub extern "C" fn iceberg_table_open( StaticTable::from_metadata_file(&full_metadata_path, table_ident, file_io).await?; let iceberg_table = static_table.into_table(); - Ok(iceberg_table) - }); - - match result { - Ok(iceberg_table) => { - let table_ptr = Box::into_raw(Box::new(IcebergTableInternal { - table: iceberg_table, - })); - unsafe { - *table = table_ptr as *mut IcebergTable; + + let table_ptr = Box::into_raw(Box::new(IcebergTable { + table: iceberg_table, + })); + + Ok(table_ptr) + }, + table_path: *const c_char, + metadata_path: *const c_char +); + +// Use export_runtime_op! macro for scan creation +export_runtime_op!( + iceberg_table_scan, + IcebergScanResponse, + || { + if table.is_null() { + return Err(anyhow::anyhow!("Null table pointer provided")); + } + let table_ref = unsafe { &*table }; + Ok(table_ref.table.clone()) + }, + iceberg_table, + async { + let scan_ptr = Box::into_raw(Box::new(IcebergScan { + table: Some(iceberg_table), + columns: None, + stream: None, + })); + Ok(scan_ptr) + }, + table: *mut IcebergTable +); + +// Use export_runtime_op! macro for next batch +export_runtime_op!( + iceberg_scan_next_batch, + IcebergBatchResponse, + || { + if scan.is_null() { + return Err(anyhow::anyhow!("Null scan pointer provided")); + } + let scan_ref = unsafe { &mut *scan }; + + // Get or create the stream + if scan_ref.stream.is_none() { + if let Some(table) = &scan_ref.table { + let columns = scan_ref.columns.clone(); + let table_clone = table.clone(); + Ok((table_clone, columns, scan_ref as *mut IcebergScan)) + } else { + Err(anyhow::anyhow!("Table not available")) } - IcebergResult::IcebergOk + } else { + // Stream already exists, just get the scan pointer + let table = scan_ref.table.as_ref().unwrap().clone(); + Ok((table, None, scan_ref as *mut IcebergScan)) } - Err(e) => { - set_error(format!("Failed to open table: {}", e)); - IcebergResult::IcebergError + }, + scan_data, + async { + let (table, columns, scan_ptr) = scan_data; + let scan_ref = unsafe { &mut *scan_ptr }; + + // Initialize stream if not already done + if scan_ref.stream.is_none() { + let mut scan_builder = table.scan(); + + if let Some(cols) = columns { + scan_builder = scan_builder.select(cols); + } + + let table_scan = scan_builder.build()?; + let stream = table_scan.to_arrow().await?; + scan_ref.stream = Some(Mutex::new(stream)); } - } -} + // Get next batch from stream + if let Some(stream_mutex) = &scan_ref.stream { + let result = { + let mut stream = stream_mutex.lock().unwrap(); + stream.next().await + }; + + match result { + Some(Ok(record_batch)) => { + let arrow_batch = serialize_record_batch(record_batch)?; + let batch_ptr = Box::into_raw(Box::new(arrow_batch)); + Ok((batch_ptr, false)) // not end of stream + } + Some(Err(e)) => Err(anyhow::anyhow!("Error reading batch: {}", e)), + None => { + // End of stream + Ok((ptr::null_mut(), true)) // end of stream + } + } + } else { + Err(anyhow::anyhow!("Stream not initialized")) + } + }, + scan: *mut IcebergScan +); + +// Synchronous operations #[no_mangle] pub extern "C" fn iceberg_table_free(table: *mut IcebergTable) { if !table.is_null() { unsafe { - let _ = Box::from_raw(table as *mut IcebergTableInternal); + let _ = Box::from_raw(table); } } } -#[no_mangle] -pub extern "C" fn iceberg_table_scan( - table: *mut IcebergTable, - scan: *mut *mut IcebergScan, -) -> IcebergResult { - if table.is_null() || scan.is_null() { - set_error("Null pointer provided".to_string()); - return IcebergResult::IcebergNullPointer; - } - - clear_error(); - - let table_ref = unsafe { &*(table as *const IcebergTableInternal) }; - - let scan_ptr = Box::into_raw(Box::new(IcebergScanInternal { - table: Some(table_ref.table.clone()), - columns: None, - stream: None, - })); - - unsafe { - *scan = scan_ptr as *mut IcebergScan; - } - - IcebergResult::IcebergOk -} - #[no_mangle] pub extern "C" fn iceberg_scan_select_columns( scan: *mut IcebergScan, column_names: *const *const c_char, num_columns: usize, -) -> IcebergResult { +) -> CResult { if scan.is_null() || column_names.is_null() { - set_error("Null pointer provided".to_string()); - return IcebergResult::IcebergNullPointer; + return CResult::Error; } - clear_error(); - - let scan_ref = unsafe { &mut *(scan as *mut IcebergScanInternal) }; - + let scan_ref = unsafe { &mut *scan }; let mut columns = Vec::new(); for i in 0..num_columns { let col_ptr = unsafe { *column_names.add(i) }; if col_ptr.is_null() { - set_error("Null column name pointer".to_string()); - return IcebergResult::IcebergNullPointer; + return CResult::Error; } let col_str = unsafe { match CStr::from_ptr(col_ptr).to_str() { Ok(s) => s, - Err(e) => { - set_error(format!("Invalid UTF-8 in column name: {}", e)); - return IcebergResult::IcebergError; - } + Err(_) => return CResult::Error, } }; @@ -325,113 +405,18 @@ pub extern "C" fn iceberg_scan_select_columns( } scan_ref.columns = Some(columns); - - IcebergResult::IcebergOk + CResult::Ok } #[no_mangle] pub extern "C" fn iceberg_scan_free(scan: *mut IcebergScan) { if !scan.is_null() { unsafe { - let _ = Box::from_raw(scan as *mut IcebergScanInternal); + let _ = Box::from_raw(scan); } } } -#[no_mangle] -pub extern "C" fn iceberg_scan_next_batch( - scan: *mut IcebergScan, - batch: *mut *mut ArrowBatch, -) -> IcebergResult { - if scan.is_null() || batch.is_null() { - set_error("Null pointer provided".to_string()); - return IcebergResult::IcebergNullPointer; - } - - clear_error(); - - let scan_ref = unsafe { &mut *(scan as *mut IcebergScanInternal) }; - - // Initialize stream if not already done - if scan_ref.stream.is_none() { - if let Some(table) = &scan_ref.table { - let columns = scan_ref.columns.clone(); - - let stream_result = get_runtime().block_on(async { - let mut scan_builder = table.scan(); - - if let Some(cols) = columns { - scan_builder = scan_builder.select(cols); - } - - match scan_builder.build() { - Ok(table_scan) => match table_scan.to_arrow().await { - Ok(stream) => Ok(stream), - Err(e) => { - set_error(format!("Failed to create arrow stream: {}", e)); - Err(e) - } - }, - Err(e) => { - set_error(format!("Failed to build scan: {}", e)); - Err(e) - } - } - }); - - match stream_result { - Ok(stream) => { - scan_ref.stream = Some(Mutex::new(stream)); - } - Err(_) => { - return IcebergResult::IcebergError; - } - } - } else { - set_error("Table not available".to_string()); - return IcebergResult::IcebergError; - } - } - - // Get next batch from stream - if let Some(stream_mutex) = &scan_ref.stream { - let result = get_runtime().block_on(async { - let mut stream = stream_mutex.lock().unwrap(); - stream.next().await - }); - - match result { - Some(Ok(record_batch)) => match serialize_record_batch(record_batch) { - Ok(arrow_batch) => { - let batch_ptr = Box::into_raw(Box::new(arrow_batch)); - unsafe { - *batch = batch_ptr; - } - IcebergResult::IcebergOk - } - Err(e) => { - set_error(format!("Failed to serialize batch: {}", e)); - IcebergResult::IcebergError - } - }, - Some(Err(e)) => { - set_error(format!("Error reading batch: {}", e)); - IcebergResult::IcebergError - } - None => { - // End of stream - unsafe { - *batch = ptr::null_mut(); - } - IcebergResult::IcebergEndOfStream - } - } - } else { - set_error("Stream not initialized".to_string()); - IcebergResult::IcebergError - } -} - #[no_mangle] pub extern "C" fn iceberg_arrow_batch_free(batch: *mut ArrowBatch) { if !batch.is_null() { @@ -444,27 +429,13 @@ pub extern "C" fn iceberg_arrow_batch_free(batch: *mut ArrowBatch) { } } +// Re-export object_store_ffi utilities #[no_mangle] -pub extern "C" fn iceberg_error_message() -> *const c_char { - LAST_ERROR.with(|e| { - if let Some(ref error) = *e.borrow() { - match CString::new(error.clone()) { - Ok(cstring) => cstring.into_raw(), - Err(_) => ptr::null(), - } - } else { - ptr::null() - } - }) +pub extern "C" fn iceberg_destroy_cstring(string: *mut c_char) -> CResult { + destroy_cstring(string) } -// Utility function for cleanup #[no_mangle] -pub extern "C" fn iceberg_destroy_cstring(string: *mut c_char) -> IcebergResult { - if !string.is_null() { - unsafe { - let _ = CString::from_raw(string); - } - } - IcebergResult::IcebergOk +pub extern "C" fn iceberg_current_metrics() -> *const c_char { + current_metrics() } \ No newline at end of file From 8c5e345b150d16376cd70d3bdf57a362b313d5c9 Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Fri, 5 Sep 2025 17:18:49 +0200 Subject: [PATCH 03/39] async wait for batch and sync fetch of the buffer --- Cargo.lock | 6 + Cargo.toml | 4 +- include/iceberg_rust_ffi.h | 93 ++++++++----- run_integration_test.sh | 15 ++- src/lib.rs | 270 ++++++++++++++++++++++++++++++------- tests/integration_test.c | 269 +++++++++++++++++++++++------------- 6 files changed, 476 insertions(+), 181 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2d99f53..26f431f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1578,10 +1578,15 @@ dependencies = [ "arrow-array", "arrow-ipc", "cbindgen", + "futures", "iceberg", "libc", "object_store_ffi", + "once_cell", "tempfile", + "tokio", + "tracing", + "tracing-subscriber", ] [[package]] @@ -3774,6 +3779,7 @@ dependencies = [ "io-uring", "libc", "mio", + "parking_lot", "pin-project-lite", "signal-hook-registry", "slab", diff --git a/Cargo.toml b/Cargo.toml index 0272076..66e7f96 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ cbindgen = "0.26" [dependencies] iceberg = "0.6.0" -object_store_ffi = { path = "../object_store_ffi" } +object_store_ffi = { path = "../object_store_ffi", default-features = false } tokio = { version = "1.0", features = ["full"] } futures = "0.3" libc = "0.2" @@ -20,6 +20,8 @@ anyhow = "1.0" arrow-array = "55.2.0" arrow-ipc = "55.2.0" tracing-subscriber = "0.3" +tracing = "0.1" +once_cell = "1.19" [dev-dependencies] tempfile = "3.0" diff --git a/include/iceberg_rust_ffi.h b/include/iceberg_rust_ffi.h index 7fb7a10..9389a1d 100644 --- a/include/iceberg_rust_ffi.h +++ b/include/iceberg_rust_ffi.h @@ -12,51 +12,80 @@ extern "C" { // Forward declarations typedef struct IcebergTable IcebergTable; typedef struct IcebergScan IcebergScan; +typedef struct Context Context; + +// Configuration for iceberg runtime +typedef struct { + size_t n_threads; +} IcebergConfig; + +// Result types from object_store_ffi +typedef enum { + CRESULT_OK = 0, + CRESULT_ERROR = -1, + CRESULT_BACKOFF = -2, + CRESULT_UNINITIALIZED = -3 +} CResult; // Arrow batch as serialized bytes -typedef struct ArrowBatch { - const uint8_t* data; // Pointer to serialized Arrow IPC data - size_t length; // Length of the data in bytes - void* rust_ptr; // Internal Rust pointer for memory management +typedef struct { + const uint8_t* data; + size_t length; + void* rust_ptr; } ArrowBatch; -typedef enum { - ICEBERG_OK = 0, - ICEBERG_ERROR = -1, - ICEBERG_NULL_POINTER = -2, - ICEBERG_IO_ERROR = -3, - ICEBERG_INVALID_TABLE = -4, - ICEBERG_END_OF_STREAM = -5 -} IcebergResult; - -// Table operations -IcebergResult iceberg_table_open(const char* table_path, const char* metadata_path, IcebergTable** table); +// Response structures for async operations +typedef struct { + CResult result; + IcebergTable* table; + char* error_message; + const Context* context; +} IcebergTableResponse; + +typedef struct { + CResult result; + IcebergScan* scan; + char* error_message; + const Context* context; +} IcebergScanResponse; + +typedef struct { + CResult result; + ArrowBatch* batch; + bool end_of_stream; + char* error_message; + const Context* context; +} IcebergBatchResponse; + +// Callback types +typedef int (*PanicCallback)(); +typedef int (*ResultCallback)(const void* task); + +// Runtime initialization +CResult iceberg_init_runtime(IcebergConfig config, PanicCallback panic_callback, ResultCallback result_callback); + +// Async table operations +CResult iceberg_table_open(const char* table_path, const char* metadata_path, IcebergTableResponse* response, const void* handle); void iceberg_table_free(IcebergTable* table); -// Scan operations -IcebergResult iceberg_table_scan(IcebergTable* table, IcebergScan** scan); -IcebergResult iceberg_scan_select_columns(IcebergScan* scan, const char** column_names, size_t num_columns); +// Async scan operations +CResult iceberg_table_scan(IcebergTable* table, IcebergScanResponse* response, const void* handle); +CResult iceberg_scan_select_columns(IcebergScan* scan, const char** column_names, size_t num_columns); void iceberg_scan_free(IcebergScan* scan); -// Arrow batch operations -IcebergResult iceberg_scan_next_batch(IcebergScan* scan, ArrowBatch** batch); +// Async batch operations +CResult iceberg_scan_next_batch(IcebergScan* scan, IcebergBatchResponse* response, const void* handle); void iceberg_arrow_batch_free(ArrowBatch* batch); -// Error handling -const char* iceberg_error_message(); +// Utility functions +CResult iceberg_destroy_cstring(char* string); +const char* iceberg_current_metrics(); -// Function pointer typedefs for dynamic loading -typedef IcebergResult (*iceberg_table_open_func_t)(const char* table_path, const char* metadata_path, IcebergTable** table); -typedef void (*iceberg_table_free_func_t)(IcebergTable* table); -typedef IcebergResult (*iceberg_table_scan_func_t)(IcebergTable* table, IcebergScan** scan); -typedef IcebergResult (*iceberg_scan_select_columns_func_t)(IcebergScan* scan, const char** column_names, size_t num_columns); -typedef void (*iceberg_scan_free_func_t)(IcebergScan* scan); -typedef IcebergResult (*iceberg_scan_next_batch_func_t)(IcebergScan* scan, ArrowBatch** batch); -typedef void (*iceberg_arrow_batch_free_func_t)(ArrowBatch* batch); -typedef const char* (*iceberg_error_message_func_t)(); +// Backward compatibility +const char* iceberg_error_message(); #ifdef __cplusplus } #endif -#endif // ICEBERG_RUST_FFI_H +#endif // ICEBERG_RUST_FFI_H \ No newline at end of file diff --git a/run_integration_test.sh b/run_integration_test.sh index 1ffca99..78cc309 100755 --- a/run_integration_test.sh +++ b/run_integration_test.sh @@ -84,7 +84,20 @@ fi # Step 3: Run the integration test print_status "Running integration test..." echo "==========================================" -if ./integration_test; then + +# Determine the exact library filename +LIBRARY="" +if [ -f "$LIB_PATH/libiceberg_rust_ffi.dylib" ]; then + LIBRARY="$LIB_PATH/libiceberg_rust_ffi.dylib" +elif [ -f "$LIB_PATH/libiceberg_rust_ffi.so" ]; then + LIBRARY="$LIB_PATH/libiceberg_rust_ffi.so" +else + print_error "Could not find dynamic library" + exit 1 +fi + +print_status "Using library: $LIBRARY" +if ./integration_test "$LIBRARY"; then echo "==========================================" print_success "Integration test completed successfully!" else diff --git a/src/lib.rs b/src/lib.rs index e1db583..b98524b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,6 @@ -use std::ffi::{CStr, CString, c_char, c_void}; +use std::ffi::{CStr, c_char, c_void}; use std::ptr; -use std::sync::Mutex; +use tokio::sync::Mutex as AsyncMutex; use anyhow::Result; use arrow_array::RecordBatch; @@ -13,10 +13,16 @@ use iceberg::TableIdent; // Import from object_store_ffi use object_store_ffi::{ RT, RESULT_CB, ResultCallback, - CResult, Context, RawResponse, ResponseGuard, + CResult, Context, RawResponse, ResponseGuard, NotifyGuard, with_cancellation, export_runtime_op, destroy_cstring, current_metrics }; +// Stream wrapper for FFI - using async mutex to avoid blocking calls +#[repr(C)] +pub struct IcebergStream { + pub stream: AsyncMutex>>, +} + // cbindgen annotations #[allow(non_camel_case_types)] #[allow(non_snake_case)] @@ -49,7 +55,9 @@ pub struct IcebergTable { pub struct IcebergScan { pub table: Option, pub columns: Option>, - pub stream: Option>>>, + pub stream: Option<*mut IcebergStream>, + pub current_batch: Option<*mut ArrowBatch>, + pub end_of_stream: bool, } #[repr(C)] @@ -123,6 +131,7 @@ pub struct IcebergBatchResponse { result: CResult, batch: *mut ArrowBatch, end_of_stream: bool, + new_stream_ptr: *mut IcebergStream, error_message: *mut c_char, context: *const Context, } @@ -130,7 +139,7 @@ pub struct IcebergBatchResponse { unsafe impl Send for IcebergBatchResponse {} impl RawResponse for IcebergBatchResponse { - type Payload = (*mut ArrowBatch, bool); + type Payload = (*mut ArrowBatch, bool, Option<*mut IcebergStream>); fn result_mut(&mut self) -> &mut CResult { &mut self.result } @@ -142,13 +151,15 @@ impl RawResponse for IcebergBatchResponse { } fn set_payload(&mut self, payload: Option) { match payload { - Some((batch_ptr, is_end)) => { + Some((batch_ptr, is_end, stream_ptr)) => { self.batch = batch_ptr; self.end_of_stream = is_end; + self.new_stream_ptr = stream_ptr.unwrap_or(ptr::null_mut()); } None => { self.batch = ptr::null_mut(); self.end_of_stream = false; + self.new_stream_ptr = ptr::null_mut(); } } } @@ -214,11 +225,14 @@ pub extern "C" fn iceberg_init_runtime( rt_builder.worker_threads(config.n_threads); } - let runtime = rt_builder.build() - .map_err(|_| CResult::Error)?; + let runtime = match rt_builder.build() { + Ok(rt) => rt, + Err(_) => return CResult::Error, + }; - RT.set(runtime) - .map_err(|_| CResult::Error)?; + if RT.set(runtime).is_err() { + return CResult::Error; + } CResult::Ok } @@ -258,16 +272,18 @@ export_runtime_op!( let table_ident = TableIdent::from_strs(["default", "table"])?; // Load the static table + tracing::info!("Loading static table from metadata path: {}", full_metadata_path); let static_table = StaticTable::from_metadata_file(&full_metadata_path, table_ident, file_io).await?; + tracing::info!("Successfully loaded static table, converting to table"); let iceberg_table = static_table.into_table(); let table_ptr = Box::into_raw(Box::new(IcebergTable { table: iceberg_table, })); - Ok(table_ptr) + Ok::<*mut IcebergTable, anyhow::Error>(table_ptr) }, table_path: *const c_char, metadata_path: *const c_char @@ -290,81 +306,217 @@ export_runtime_op!( table: Some(iceberg_table), columns: None, stream: None, + current_batch: None, + end_of_stream: false, })); - Ok(scan_ptr) + Ok::<*mut IcebergScan, anyhow::Error>(scan_ptr) }, table: *mut IcebergTable ); -// Use export_runtime_op! macro for next batch +// Async function to wait for next batch with proper stream persistence export_runtime_op!( - iceberg_scan_next_batch, + iceberg_scan_wait_batch, IcebergBatchResponse, || { if scan.is_null() { return Err(anyhow::anyhow!("Null scan pointer provided")); } - let scan_ref = unsafe { &mut *scan }; + let scan_ref = unsafe { &*scan }; - // Get or create the stream - if scan_ref.stream.is_none() { - if let Some(table) = &scan_ref.table { - let columns = scan_ref.columns.clone(); - let table_clone = table.clone(); - Ok((table_clone, columns, scan_ref as *mut IcebergScan)) - } else { - Err(anyhow::anyhow!("Table not available")) - } + // Check if we already have a stream or need to create one + let need_new_stream = scan_ref.stream.is_none(); + + if let Some(table) = &scan_ref.table { + let columns = scan_ref.columns.clone(); + let table_clone = table.clone(); + Ok((table_clone, columns, need_new_stream)) } else { - // Stream already exists, just get the scan pointer - let table = scan_ref.table.as_ref().unwrap().clone(); - Ok((table, None, scan_ref as *mut IcebergScan)) + Err(anyhow::anyhow!("Table not available")) } }, scan_data, async { - let (table, columns, scan_ptr) = scan_data; - let scan_ref = unsafe { &mut *scan_ptr }; + let (table, columns, need_new_stream) = scan_data; - // Initialize stream if not already done - if scan_ref.stream.is_none() { + if need_new_stream { + // Create new stream and get first batch let mut scan_builder = table.scan(); - if let Some(cols) = columns { scan_builder = scan_builder.select(cols); } - + let table_scan = scan_builder.build()?; - let stream = table_scan.to_arrow().await?; - scan_ref.stream = Some(Mutex::new(stream)); - } - - // Get next batch from stream - if let Some(stream_mutex) = &scan_ref.stream { - let result = { - let mut stream = stream_mutex.lock().unwrap(); - stream.next().await - }; - - match result { + let mut stream = table_scan.to_arrow().await?; + + // Get first batch from stream + match stream.next().await { Some(Ok(record_batch)) => { + tracing::info!("Successfully got first batch with {} rows, {} columns", + record_batch.num_rows(), record_batch.num_columns()); let arrow_batch = serialize_record_batch(record_batch)?; let batch_ptr = Box::into_raw(Box::new(arrow_batch)); - Ok((batch_ptr, false)) // not end of stream + + // Create stream wrapper and store it + let iceberg_stream = Box::new(IcebergStream { + stream: AsyncMutex::new(stream), + }); + let stream_ptr = Box::into_raw(iceberg_stream); + + tracing::info!("Created batch and stream pointers successfully"); + Ok((batch_ptr, false, Some(stream_ptr))) + } + Some(Err(e)) => { + tracing::error!("Error reading first batch: {}", e); + Err(anyhow::anyhow!("Error reading batch: {}", e)) } - Some(Err(e)) => Err(anyhow::anyhow!("Error reading batch: {}", e)), None => { - // End of stream - Ok((ptr::null_mut(), true)) // end of stream + // End of stream immediately + tracing::warn!("Stream ended immediately - no data found"); + Ok((ptr::null_mut(), true, None)) } } } else { - Err(anyhow::anyhow!("Stream not initialized")) + // This case means we need to use an existing stream + // We'll handle this case differently - return a marker that indicates existing stream usage + Err(anyhow::anyhow!("USE_EXISTING_STREAM")) } }, scan: *mut IcebergScan ); +// Async function to get next batch from existing stream +export_runtime_op!( + iceberg_scan_wait_batch_existing, + IcebergBatchResponse, + || { + if scan.is_null() { + return Err(anyhow::anyhow!("Null scan pointer provided")); + } + let scan_ref = unsafe { &*scan }; + + if let Some(stream_ptr) = scan_ref.stream { + // Return the stream pointer - we'll dereference it in the async block + Ok(stream_ptr as usize) // Convert to usize to make it Send + } else { + Err(anyhow::anyhow!("No stream available")) + } + }, + stream_ptr_addr, + async { + let stream_ptr = stream_ptr_addr as *mut IcebergStream; + let stream_ref = unsafe { &*stream_ptr }; + + let mut stream_guard = stream_ref.stream.lock().await; + + match stream_guard.next().await { + Some(Ok(record_batch)) => { + let arrow_batch = serialize_record_batch(record_batch)?; + let batch_ptr = Box::into_raw(Box::new(arrow_batch)); + Ok((batch_ptr, false, None)) + } + Some(Err(e)) => Err(anyhow::anyhow!("Error reading batch: {}", e)), + None => { + // End of stream + Ok((ptr::null_mut(), true, None)) + } + } + }, + scan: *mut IcebergScan +); + +// Simplified storage function - just call the right async function +#[no_mangle] +pub extern "C" fn iceberg_scan_wait_batch_with_storage( + scan: *mut IcebergScan, + response: *mut IcebergBatchResponse, + handle: *const c_void, +) -> CResult { + let scan_ref = unsafe { &*scan }; + + // Check if we need to use existing stream or create new one + if scan_ref.stream.is_none() { + // Call the async function for new stream + tracing::info!("Calling async function for new stream"); + iceberg_scan_wait_batch(scan, response, handle) + } else { + // Call the async function for existing stream + tracing::info!("Calling async function for existing stream"); + iceberg_scan_wait_batch_existing(scan, response, handle) + } +} + +// Helper function to store the batch result in the scan after async completion +#[no_mangle] +pub extern "C" fn iceberg_scan_store_batch_result( + scan: *mut IcebergScan, + response: *const IcebergBatchResponse, +) -> CResult { + if scan.is_null() || response.is_null() { + return CResult::Error; + } + + let scan_ref = unsafe { &mut *scan }; + let response_ref = unsafe { &*response }; + + // Store batch in scan + if response_ref.batch.is_null() { + tracing::warn!("Storing NULL batch pointer - end of stream or error"); + scan_ref.current_batch = None; + } else { + tracing::info!("Storing batch pointer {:?} in scan", response_ref.batch); + scan_ref.current_batch = Some(response_ref.batch); + } + scan_ref.end_of_stream = response_ref.end_of_stream; + + // If a new stream was created, store its pointer in scan + if !response_ref.new_stream_ptr.is_null() && scan_ref.stream.is_none() { + tracing::info!("Storing new stream pointer {:?} in scan", response_ref.new_stream_ptr); + scan_ref.stream = Some(response_ref.new_stream_ptr); + } + + CResult::Ok +} + +// Synchronous function to get the current batch from the scan +#[no_mangle] +pub extern "C" fn iceberg_scan_next_batch( + scan: *mut IcebergScan, + response: *mut IcebergBatchResponse, + _handle: *const c_void, +) -> CResult { + if scan.is_null() || response.is_null() { + return CResult::Error; + } + + let scan_ref = unsafe { &mut *scan }; + let response_ref = unsafe { &mut *response }; + + // Initialize response + *response_ref = IcebergBatchResponse { + result: CResult::Ok, + batch: ptr::null_mut(), + end_of_stream: false, + new_stream_ptr: ptr::null_mut(), + error_message: ptr::null_mut(), + context: ptr::null(), + }; + + // Return the current batch from the scan + if let Some(batch_ptr) = scan_ref.current_batch.take() { + tracing::info!("Returning stored batch pointer: {:?}", batch_ptr); + response_ref.batch = batch_ptr; + response_ref.end_of_stream = false; + } else { + tracing::warn!("No current batch stored, end_of_stream: {}", scan_ref.end_of_stream); + // No current batch - either end of stream or need to wait for one + response_ref.batch = ptr::null_mut(); + response_ref.end_of_stream = scan_ref.end_of_stream; + } + + CResult::Ok +} + // Synchronous operations #[no_mangle] pub extern "C" fn iceberg_table_free(table: *mut IcebergTable) { @@ -412,7 +564,15 @@ pub extern "C" fn iceberg_scan_select_columns( pub extern "C" fn iceberg_scan_free(scan: *mut IcebergScan) { if !scan.is_null() { unsafe { - let _ = Box::from_raw(scan); + let scan_ref = Box::from_raw(scan); + // Clean up any current batch + if let Some(batch_ptr) = scan_ref.current_batch { + let _ = Box::from_raw(batch_ptr); + } + // Clean up any stream + if let Some(stream_ptr) = scan_ref.stream { + let _ = Box::from_raw(stream_ptr); + } } } } @@ -429,6 +589,14 @@ pub extern "C" fn iceberg_arrow_batch_free(batch: *mut ArrowBatch) { } } +// Backward compatibility function for error messages +#[no_mangle] +pub extern "C" fn iceberg_error_message() -> *const c_char { + // For backward compatibility, return a generic message + // In the new async API, errors are returned through response structures + b"Error: Use new async API with response structures for detailed error information\0".as_ptr() as *const c_char +} + // Re-export object_store_ffi utilities #[no_mangle] pub extern "C" fn iceberg_destroy_cstring(string: *mut c_char) -> CResult { diff --git a/tests/integration_test.c b/tests/integration_test.c index 57c6f07..d599c83 100644 --- a/tests/integration_test.c +++ b/tests/integration_test.c @@ -2,20 +2,33 @@ #include #include #include - -// Global function pointers -static iceberg_table_open_func_t iceberg_table_open_func = NULL; -static iceberg_table_free_func_t iceberg_table_free_func = NULL; -static iceberg_table_scan_func_t iceberg_table_scan_func = NULL; -static iceberg_scan_select_columns_func_t iceberg_scan_select_columns_func = NULL; -static iceberg_scan_free_func_t iceberg_scan_free_func = NULL; -static iceberg_scan_next_batch_func_t iceberg_scan_next_batch_func = NULL; -static iceberg_arrow_batch_free_func_t iceberg_arrow_batch_free_func = NULL; -static iceberg_error_message_func_t iceberg_error_message_func = NULL; +#include +#include + +// Global function pointers for new async API +static int (*iceberg_init_runtime_func)(IcebergConfig config, int (*panic_callback)(), int (*result_callback)(const void*)) = NULL; +static int (*iceberg_table_open_func)(const char*, const char*, IcebergTableResponse*, const void*) = NULL; +static int (*iceberg_table_scan_func)(IcebergTable*, IcebergScanResponse*, const void*) = NULL; +static int (*iceberg_scan_next_batch_func)(IcebergScan*, IcebergBatchResponse*, const void*) = NULL; +static void (*iceberg_table_free_func)(IcebergTable*) = NULL; +static void (*iceberg_scan_free_func)(IcebergScan*) = NULL; +static void (*iceberg_arrow_batch_free_func)(ArrowBatch*) = NULL; +static int (*iceberg_destroy_cstring_func)(char*) = NULL; // Library handle static void* lib_handle = NULL; +// Callback implementations +int panic_callback() { + printf("🚨 Rust panic occurred!\n"); + return 1; +} + +int result_callback(const void* task) { + // Simple result callback - in a real implementation this would notify Julia + return 0; +} + // Function to load the library and resolve symbols int load_iceberg_library(const char* library_path) { printf("Loading Iceberg C API library from %s...\n", library_path); @@ -32,52 +45,52 @@ int load_iceberg_library(const char* library_path) { // Clear any existing error dlerror(); - // Resolve function symbols - iceberg_table_open_func = (iceberg_table_open_func_t)dlsym(lib_handle, "iceberg_table_open"); - if (!iceberg_table_open_func) { - fprintf(stderr, "❌ Failed to resolve iceberg_table_open: %s\n", dlerror()); + // Resolve function symbols for new async API + iceberg_init_runtime_func = (int (*)(IcebergConfig, int (*)(), int (*)(const void*)))dlsym(lib_handle, "iceberg_init_runtime"); + if (!iceberg_init_runtime_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_init_runtime: %s\n", dlerror()); return 0; } - iceberg_table_free_func = (iceberg_table_free_func_t)dlsym(lib_handle, "iceberg_table_free"); - if (!iceberg_table_free_func) { - fprintf(stderr, "❌ Failed to resolve iceberg_table_free: %s\n", dlerror()); + iceberg_table_open_func = (int (*)(const char*, const char*, IcebergTableResponse*, const void*))dlsym(lib_handle, "iceberg_table_open"); + if (!iceberg_table_open_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_table_open: %s\n", dlerror()); return 0; } - iceberg_table_scan_func = (iceberg_table_scan_func_t)dlsym(lib_handle, "iceberg_table_scan"); + iceberg_table_scan_func = (int (*)(IcebergTable*, IcebergScanResponse*, const void*))dlsym(lib_handle, "iceberg_table_scan"); if (!iceberg_table_scan_func) { fprintf(stderr, "❌ Failed to resolve iceberg_table_scan: %s\n", dlerror()); return 0; } - iceberg_scan_select_columns_func = (iceberg_scan_select_columns_func_t)dlsym(lib_handle, "iceberg_scan_select_columns"); - if (!iceberg_scan_select_columns_func) { - fprintf(stderr, "❌ Failed to resolve iceberg_scan_select_columns: %s\n", dlerror()); + iceberg_scan_next_batch_func = (int (*)(IcebergScan*, IcebergBatchResponse*, const void*))dlsym(lib_handle, "iceberg_scan_next_batch"); + if (!iceberg_scan_next_batch_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_scan_next_batch: %s\n", dlerror()); return 0; } - iceberg_scan_free_func = (iceberg_scan_free_func_t)dlsym(lib_handle, "iceberg_scan_free"); - if (!iceberg_scan_free_func) { - fprintf(stderr, "❌ Failed to resolve iceberg_scan_free: %s\n", dlerror()); + iceberg_table_free_func = (void (*)(IcebergTable*))dlsym(lib_handle, "iceberg_table_free"); + if (!iceberg_table_free_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_table_free: %s\n", dlerror()); return 0; } - iceberg_scan_next_batch_func = (iceberg_scan_next_batch_func_t)dlsym(lib_handle, "iceberg_scan_next_batch"); - if (!iceberg_scan_next_batch_func) { - fprintf(stderr, "❌ Failed to resolve iceberg_scan_next_batch: %s\n", dlerror()); + iceberg_scan_free_func = (void (*)(IcebergScan*))dlsym(lib_handle, "iceberg_scan_free"); + if (!iceberg_scan_free_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_scan_free: %s\n", dlerror()); return 0; } - iceberg_arrow_batch_free_func = (iceberg_arrow_batch_free_func_t)dlsym(lib_handle, "iceberg_arrow_batch_free"); + iceberg_arrow_batch_free_func = (void (*)(ArrowBatch*))dlsym(lib_handle, "iceberg_arrow_batch_free"); if (!iceberg_arrow_batch_free_func) { fprintf(stderr, "❌ Failed to resolve iceberg_arrow_batch_free: %s\n", dlerror()); return 0; } - iceberg_error_message_func = (iceberg_error_message_func_t)dlsym(lib_handle, "iceberg_error_message"); - if (!iceberg_error_message_func) { - fprintf(stderr, "❌ Failed to resolve iceberg_error_message: %s\n", dlerror()); + iceberg_destroy_cstring_func = (int (*)(char*))dlsym(lib_handle, "iceberg_destroy_cstring"); + if (!iceberg_destroy_cstring_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_destroy_cstring: %s\n", dlerror()); return 0; } @@ -94,8 +107,14 @@ void unload_iceberg_library() { } } +// Helper function to wait for async operation to complete +void wait_for_async_completion() { + // Simple busy wait - in a real implementation you'd use proper synchronization + usleep(100000); // 100ms +} + int main(int argc, char* argv[]) { - printf("Starting Iceberg C API integration test with dynamic loading...\n"); + printf("Starting Iceberg C API integration test with new async API...\n"); // Check for one command line argument (the path to the library) if (argc < 2) { @@ -109,96 +128,154 @@ int main(int argc, char* argv[]) { return 1; } - IcebergTable* table = NULL; - IcebergScan* scan = NULL; + // 1. Initialize the runtime + printf("Initializing Iceberg runtime...\n"); + IcebergConfig config = {0}; // Default config - 0 threads means use default + int result = iceberg_init_runtime_func(config, panic_callback, result_callback); + if (result != CRESULT_OK) { + printf("❌ Failed to initialize runtime\n"); + unload_iceberg_library(); + return 1; + } + printf("✅ Runtime initialized successfully\n"); - // 1. Open table from folder path - const char* table_path = "s3://warehouse/tpch.sf01/nation"; - const char* metadata_path = "metadata/00001-4f9722c5-8764-4988-8063-874c3d453268.metadata.json"; + // 2. Open table using async API + const char* table_path = "s3://vustef-dev/tpch-sf0.1-no-part/nation"; + const char* metadata_path = "metadata/00001-1744d9f4-1472-4f8c-ac86-b0b7c291248e.metadata.json"; printf("Opening table at: %s\n", table_path); printf("Using metadata file: %s\n", metadata_path); - IcebergResult result = iceberg_table_open_func(table_path, metadata_path, &table); - if (result != ICEBERG_OK) { - printf("❌ Failed to open table: %s\n", iceberg_error_message_func()); + IcebergTableResponse table_response = {0}; + result = iceberg_table_open_func(table_path, metadata_path, &table_response, NULL); + + if (result != CRESULT_OK) { + printf("❌ Failed to initiate table open operation\n"); + unload_iceberg_library(); + return 1; + } + + // Wait for async operation to complete + printf("⏳ Waiting for table open to complete...\n"); + wait_for_async_completion(); + + // Check if the operation was successful + if (table_response.result != CRESULT_OK) { + printf("❌ Failed to open table"); + if (table_response.error_message) { + printf(": %s", table_response.error_message); + iceberg_destroy_cstring_func(table_response.error_message); + } + printf("\n"); unload_iceberg_library(); return 1; } + + if (!table_response.table) { + printf("❌ No table returned from open operation\n"); + unload_iceberg_library(); + return 1; + } + printf("✅ Table opened successfully\n"); - // 2. Create a scan - result = iceberg_table_scan_func(table, &scan); - if (result != ICEBERG_OK) { - printf("❌ Failed to create scan: %s\n", iceberg_error_message_func()); - iceberg_table_free_func(table); + // 3. Create a scan using async API + IcebergScanResponse scan_response = {0}; + result = iceberg_table_scan_func(table_response.table, &scan_response, NULL); + + if (result != CRESULT_OK) { + printf("❌ Failed to initiate scan creation\n"); + iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; } - printf("✅ Scan created successfully\n"); - - // 3. Optionally select specific columns (commented out since we don't know schema yet) - // const char* columns[] = {"id", "value"}; - // iceberg_scan_select_columns_func(scan, columns, 2); - - // 4. Iterate through Arrow batches as serialized bytes - int batch_count = 0; - size_t total_bytes = 0; - - while (true) { - ArrowBatch* batch = NULL; - - result = iceberg_scan_next_batch_func(scan, &batch); - - if (result == ICEBERG_END_OF_STREAM) { - printf("✅ Reached end of stream\n"); - break; - } - - if (result != ICEBERG_OK) { - printf("❌ Failed to get next batch: %s\n", iceberg_error_message_func()); - break; + + // Wait for async operation to complete + printf("⏳ Waiting for scan creation to complete...\n"); + wait_for_async_completion(); + + // Check if the operation was successful + if (scan_response.result != CRESULT_OK) { + printf("❌ Failed to create scan"); + if (scan_response.error_message) { + printf(": %s", scan_response.error_message); + iceberg_destroy_cstring_func(scan_response.error_message); } + printf("\n"); + iceberg_table_free_func(table_response.table); + unload_iceberg_library(); + return 1; + } + + if (!scan_response.scan) { + printf("❌ No scan returned from scan creation\n"); + iceberg_table_free_func(table_response.table); + unload_iceberg_library(); + return 1; + } + + printf("✅ Scan created successfully\n"); - if (batch == NULL) { - printf("❌ Received NULL batch\n"); - break; + // 4. Try to get a batch using async API + printf("Attempting to get first batch...\n"); + IcebergBatchResponse batch_response = {0}; + result = iceberg_scan_next_batch_func(scan_response.scan, &batch_response, NULL); + + if (result != CRESULT_OK) { + printf("❌ Failed to initiate batch retrieval\n"); + iceberg_scan_free_func(scan_response.scan); + iceberg_table_free_func(table_response.table); + unload_iceberg_library(); + return 1; + } + + // Wait for async operation to complete + printf("⏳ Waiting for batch retrieval to complete...\n"); + wait_for_async_completion(); + + // Check if the operation was successful + if (batch_response.result != CRESULT_OK) { + printf("❌ Failed to get batch"); + if (batch_response.error_message) { + printf(": %s", batch_response.error_message); + iceberg_destroy_cstring_func(batch_response.error_message); } - - batch_count++; - total_bytes += batch->length; - - printf("📦 Batch %d:\n", batch_count); - printf(" - Serialized size: %zu bytes\n", batch->length); - printf(" - Data pointer: %p\n", (void*)batch->data); + printf("\n"); + iceberg_scan_free_func(scan_response.scan); + iceberg_table_free_func(table_response.table); + unload_iceberg_library(); + return 1; + } + + if (batch_response.end_of_stream) { + printf("✅ Reached end of stream (table might be empty)\n"); + } else if (batch_response.batch) { + printf("✅ Successfully retrieved batch!\n"); + printf("📦 Batch details:\n"); + printf(" - Serialized size: %zu bytes\n", batch_response.batch->length); + printf(" - Data pointer: %p\n", (void*)batch_response.batch->data); printf(" - First few bytes: "); - + // Print first 8 bytes as hex for verification - size_t print_len = (batch->length < 8) ? batch->length : 8; + size_t print_len = (batch_response.batch->length < 8) ? batch_response.batch->length : 8; for (size_t i = 0; i < print_len; i++) { - printf("%02x ", batch->data[i]); + printf("%02x ", batch_response.batch->data[i]); } printf("\n"); - - // This is where you would pass the serialized Arrow data to Julia - // In Julia, you would: - // 1. Create an IOBuffer from the bytes: IOBuffer(unsafe_wrap(Array, batch->data, batch->length)) - // 2. Use Arrow.jl to read: Arrow.Stream(io_buffer) printf(" → Arrow IPC bytes ready for Julia Arrow.Stream()\n"); - - // Free the batch (this calls back to Rust to free memory) - iceberg_arrow_batch_free_func(batch); + + // Free the batch + iceberg_arrow_batch_free_func(batch_response.batch); + } else { + printf("⚠️ No batch data returned\n"); } - printf("📊 Summary:\n"); - printf(" - Total batches: %d\n", batch_count); - printf(" - Total bytes processed: %zu\n", total_bytes); - // 5. Cleanup - iceberg_scan_free_func(scan); - iceberg_table_free_func(table); + printf("Cleaning up resources...\n"); + iceberg_scan_free_func(scan_response.scan); + iceberg_table_free_func(table_response.table); unload_iceberg_library(); printf("✅ Integration test completed successfully!\n"); - printf("🚀 Ready for Julia bindings integration\n"); + printf("🚀 New async API is working correctly\n"); return 0; } \ No newline at end of file From fbee1ea2993a593ba5f1b2a5666c2e7f7ad2bc11 Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Fri, 5 Sep 2025 17:31:26 +0200 Subject: [PATCH 04/39] Fix integration test too --- include/iceberg_rust_ffi.h | 3 + run_integration_test.sh | 4 ++ tests/integration_test.c | 135 ++++++++++++++++++++++++++++--------- 3 files changed, 110 insertions(+), 32 deletions(-) diff --git a/include/iceberg_rust_ffi.h b/include/iceberg_rust_ffi.h index 9389a1d..422e6cb 100644 --- a/include/iceberg_rust_ffi.h +++ b/include/iceberg_rust_ffi.h @@ -53,6 +53,7 @@ typedef struct { CResult result; ArrowBatch* batch; bool end_of_stream; + void* new_stream_ptr; char* error_message; const Context* context; } IcebergBatchResponse; @@ -74,7 +75,9 @@ CResult iceberg_scan_select_columns(IcebergScan* scan, const char** column_names void iceberg_scan_free(IcebergScan* scan); // Async batch operations +CResult iceberg_scan_wait_batch_with_storage(IcebergScan* scan, IcebergBatchResponse* response, const void* handle); CResult iceberg_scan_next_batch(IcebergScan* scan, IcebergBatchResponse* response, const void* handle); +CResult iceberg_scan_store_batch_result(IcebergScan* scan, const IcebergBatchResponse* response); void iceberg_arrow_batch_free(ArrowBatch* batch); // Utility functions diff --git a/run_integration_test.sh b/run_integration_test.sh index 78cc309..40ff9dc 100755 --- a/run_integration_test.sh +++ b/run_integration_test.sh @@ -97,6 +97,10 @@ else fi print_status "Using library: $LIBRARY" +# Pass through RUST_LOG environment variable if set +if [ -n "$RUST_LOG" ]; then + export RUST_LOG="$RUST_LOG" +fi if ./integration_test "$LIBRARY"; then echo "==========================================" print_success "Integration test completed successfully!" diff --git a/tests/integration_test.c b/tests/integration_test.c index d599c83..167f332 100644 --- a/tests/integration_test.c +++ b/tests/integration_test.c @@ -4,12 +4,15 @@ #include #include #include +#include // Global function pointers for new async API static int (*iceberg_init_runtime_func)(IcebergConfig config, int (*panic_callback)(), int (*result_callback)(const void*)) = NULL; static int (*iceberg_table_open_func)(const char*, const char*, IcebergTableResponse*, const void*) = NULL; static int (*iceberg_table_scan_func)(IcebergTable*, IcebergScanResponse*, const void*) = NULL; +static int (*iceberg_scan_wait_batch_func)(IcebergScan*, IcebergBatchResponse*, const void*) = NULL; static int (*iceberg_scan_next_batch_func)(IcebergScan*, IcebergBatchResponse*, const void*) = NULL; +static int (*iceberg_scan_store_batch_func)(IcebergScan*, const IcebergBatchResponse*) = NULL; static void (*iceberg_table_free_func)(IcebergTable*) = NULL; static void (*iceberg_scan_free_func)(IcebergScan*) = NULL; static void (*iceberg_arrow_batch_free_func)(ArrowBatch*) = NULL; @@ -24,8 +27,11 @@ int panic_callback() { return 1; } +volatile int async_completed = 0; + int result_callback(const void* task) { - // Simple result callback - in a real implementation this would notify Julia + // Signal that async operation completed + async_completed = 1; return 0; } @@ -64,12 +70,24 @@ int load_iceberg_library(const char* library_path) { return 0; } + iceberg_scan_wait_batch_func = (int (*)(IcebergScan*, IcebergBatchResponse*, const void*))dlsym(lib_handle, "iceberg_scan_wait_batch_with_storage"); + if (!iceberg_scan_wait_batch_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_scan_wait_batch_with_storage: %s\n", dlerror()); + return 0; + } + iceberg_scan_next_batch_func = (int (*)(IcebergScan*, IcebergBatchResponse*, const void*))dlsym(lib_handle, "iceberg_scan_next_batch"); if (!iceberg_scan_next_batch_func) { fprintf(stderr, "❌ Failed to resolve iceberg_scan_next_batch: %s\n", dlerror()); return 0; } + iceberg_scan_store_batch_func = (int (*)(IcebergScan*, const IcebergBatchResponse*))dlsym(lib_handle, "iceberg_scan_store_batch_result"); + if (!iceberg_scan_store_batch_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_scan_store_batch_result: %s\n", dlerror()); + return 0; + } + iceberg_table_free_func = (void (*)(IcebergTable*))dlsym(lib_handle, "iceberg_table_free"); if (!iceberg_table_free_func) { fprintf(stderr, "❌ Failed to resolve iceberg_table_free: %s\n", dlerror()); @@ -107,11 +125,6 @@ void unload_iceberg_library() { } } -// Helper function to wait for async operation to complete -void wait_for_async_completion() { - // Simple busy wait - in a real implementation you'd use proper synchronization - usleep(100000); // 100ms -} int main(int argc, char* argv[]) { printf("Starting Iceberg C API integration test with new async API...\n"); @@ -122,6 +135,14 @@ int main(int argc, char* argv[]) { return 1; } + // Check if environment variables are set + printf("Environment variables:\n"); + printf(" AWS_ACCESS_KEY_ID: %s\n", getenv("AWS_ACCESS_KEY_ID") ? "SET" : "NOT SET"); + printf(" AWS_SECRET_ACCESS_KEY: %s\n", getenv("AWS_SECRET_ACCESS_KEY") ? "SET" : "NOT SET"); + printf(" AWS_DEFAULT_REGION: %s\n", getenv("AWS_DEFAULT_REGION") ? getenv("AWS_DEFAULT_REGION") : "NOT SET"); + printf(" AWS_ENDPOINT_URL: %s\n", getenv("AWS_ENDPOINT_URL") ? getenv("AWS_ENDPOINT_URL") : "NOT SET"); + + // Load the library if (!load_iceberg_library(argv[1])) { fprintf(stderr, "Failed to load Iceberg library\n"); @@ -146,7 +167,8 @@ int main(int argc, char* argv[]) { printf("Using metadata file: %s\n", metadata_path); IcebergTableResponse table_response = {0}; - result = iceberg_table_open_func(table_path, metadata_path, &table_response, NULL); + async_completed = 0; // Reset flag + result = iceberg_table_open_func(table_path, metadata_path, &table_response, &async_completed); if (result != CRESULT_OK) { printf("❌ Failed to initiate table open operation\n"); @@ -156,11 +178,21 @@ int main(int argc, char* argv[]) { // Wait for async operation to complete printf("⏳ Waiting for table open to complete...\n"); - wait_for_async_completion(); + int timeout = 100; // 10 second timeout + while (!async_completed && timeout > 0) { + usleep(100000); // 100ms + timeout--; + } + + if (!async_completed) { + printf("❌ Async operation timed out\n"); + unload_iceberg_library(); + return 1; + } // Check if the operation was successful if (table_response.result != CRESULT_OK) { - printf("❌ Failed to open table"); + printf("❌ Failed to open table (result=%d)", table_response.result); if (table_response.error_message) { printf(": %s", table_response.error_message); iceberg_destroy_cstring_func(table_response.error_message); @@ -180,7 +212,8 @@ int main(int argc, char* argv[]) { // 3. Create a scan using async API IcebergScanResponse scan_response = {0}; - result = iceberg_table_scan_func(table_response.table, &scan_response, NULL); + async_completed = 0; // Reset flag + result = iceberg_table_scan_func(table_response.table, &scan_response, &async_completed); if (result != CRESULT_OK) { printf("❌ Failed to initiate scan creation\n"); @@ -191,7 +224,18 @@ int main(int argc, char* argv[]) { // Wait for async operation to complete printf("⏳ Waiting for scan creation to complete...\n"); - wait_for_async_completion(); + timeout = 100; // 10 second timeout + while (!async_completed && timeout > 0) { + usleep(100000); // 100ms + timeout--; + } + + if (!async_completed) { + printf("❌ Scan creation async operation timed out\n"); + iceberg_table_free_func(table_response.table); + unload_iceberg_library(); + return 1; + } // Check if the operation was successful if (scan_response.result != CRESULT_OK) { @@ -215,56 +259,83 @@ int main(int argc, char* argv[]) { printf("✅ Scan created successfully\n"); - // 4. Try to get a batch using async API - printf("Attempting to get first batch...\n"); + // 4. Try to get a batch using new two-step async API + printf("Step 1: Waiting for batch asynchronously...\n"); IcebergBatchResponse batch_response = {0}; - result = iceberg_scan_next_batch_func(scan_response.scan, &batch_response, NULL); + async_completed = 0; // Reset flag + result = iceberg_scan_wait_batch_func(scan_response.scan, &batch_response, &async_completed); + + if (result == CRESULT_OK) { + // Wait for async operation to complete + timeout = 100; // 10 second timeout + while (!async_completed && timeout > 0) { + usleep(100000); // 100ms + timeout--; + } + + if (!async_completed) { + printf("❌ Batch wait async operation timed out\n"); + iceberg_scan_free_func(scan_response.scan); + iceberg_table_free_func(table_response.table); + unload_iceberg_library(); + return 1; + } + } if (result != CRESULT_OK) { - printf("❌ Failed to initiate batch retrieval\n"); + printf("❌ Failed to wait for batch\n"); + if (batch_response.error_message) { + printf(" Error: %s\n", batch_response.error_message); + iceberg_destroy_cstring_func(batch_response.error_message); + } iceberg_scan_free_func(scan_response.scan); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; } - // Wait for async operation to complete - printf("⏳ Waiting for batch retrieval to complete...\n"); - wait_for_async_completion(); + // Store the batch result in the scan + result = iceberg_scan_store_batch_func(scan_response.scan, &batch_response); + if (result != CRESULT_OK) { + printf("❌ Failed to store batch result\n"); + iceberg_scan_free_func(scan_response.scan); + iceberg_table_free_func(table_response.table); + unload_iceberg_library(); + return 1; + } + + printf("Step 2: Retrieving stored batch synchronously...\n"); + IcebergBatchResponse sync_batch_response = {0}; + result = iceberg_scan_next_batch_func(scan_response.scan, &sync_batch_response, NULL); // Check if the operation was successful - if (batch_response.result != CRESULT_OK) { - printf("❌ Failed to get batch"); - if (batch_response.error_message) { - printf(": %s", batch_response.error_message); - iceberg_destroy_cstring_func(batch_response.error_message); - } - printf("\n"); + if (result != CRESULT_OK) { + printf("❌ Failed to get stored batch\n"); iceberg_scan_free_func(scan_response.scan); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; } - if (batch_response.end_of_stream) { + if (sync_batch_response.end_of_stream) { printf("✅ Reached end of stream (table might be empty)\n"); - } else if (batch_response.batch) { + } else if (sync_batch_response.batch) { printf("✅ Successfully retrieved batch!\n"); printf("📦 Batch details:\n"); - printf(" - Serialized size: %zu bytes\n", batch_response.batch->length); - printf(" - Data pointer: %p\n", (void*)batch_response.batch->data); + printf(" - Serialized size: %zu bytes\n", sync_batch_response.batch->length); + printf(" - Data pointer: %p\n", (void*)sync_batch_response.batch->data); printf(" - First few bytes: "); // Print first 8 bytes as hex for verification - size_t print_len = (batch_response.batch->length < 8) ? batch_response.batch->length : 8; + size_t print_len = (sync_batch_response.batch->length < 8) ? sync_batch_response.batch->length : 8; for (size_t i = 0; i < print_len; i++) { - printf("%02x ", batch_response.batch->data[i]); + printf("%02x ", sync_batch_response.batch->data[i]); } printf("\n"); printf(" → Arrow IPC bytes ready for Julia Arrow.Stream()\n"); // Free the batch - iceberg_arrow_batch_free_func(batch_response.batch); + iceberg_arrow_batch_free_func(sync_batch_response.batch); } else { printf("⚠️ No batch data returned\n"); } From 4718e3c95f1a732394ba343ab1fb028c6ae0c11f Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Sat, 6 Sep 2025 22:16:04 +0200 Subject: [PATCH 05/39] Switch to minimal async api. next step is removing the sync method --- build.rs | 21 ---- include/iceberg_rust_ffi.h | 16 ++- src/lib.rs | 221 +++++++++++++++++-------------------- tests/integration_test.c | 72 ++++++++---- 4 files changed, 162 insertions(+), 168 deletions(-) delete mode 100644 build.rs diff --git a/build.rs b/build.rs deleted file mode 100644 index d2de405..0000000 --- a/build.rs +++ /dev/null @@ -1,21 +0,0 @@ -use std::env; -use std::path::PathBuf; - -fn main() { - let crate_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); - let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); - - let config = cbindgen::Config::default(); - - cbindgen::Builder::new() - .with_crate(&crate_dir) - .with_config(config) - .generate() - .expect("Unable to generate bindings") - .write_to_file(out_dir.join("iceberg_rust_ffi.h")); - - // Note: We're using a manually created header file instead of the cbindgen-generated one - // The cbindgen output is available in the build output directory if needed for reference - println!("cargo:rerun-if-changed=src/lib.rs"); - println!("cargo:rerun-if-changed=include/iceberg_rust_ffi.h"); -} diff --git a/include/iceberg_rust_ffi.h b/include/iceberg_rust_ffi.h index 422e6cb..a582a04 100644 --- a/include/iceberg_rust_ffi.h +++ b/include/iceberg_rust_ffi.h @@ -19,7 +19,7 @@ typedef struct { size_t n_threads; } IcebergConfig; -// Result types from object_store_ffi +// Result types typedef enum { CRESULT_OK = 0, CRESULT_ERROR = -1, @@ -53,11 +53,17 @@ typedef struct { CResult result; ArrowBatch* batch; bool end_of_stream; - void* new_stream_ptr; char* error_message; const Context* context; } IcebergBatchResponse; +typedef struct { + CResult result; + bool success; + char* error_message; + const Context* context; +} IcebergBoolResponse; + // Callback types typedef int (*PanicCallback)(); typedef int (*ResultCallback)(const void* task); @@ -74,10 +80,10 @@ CResult iceberg_table_scan(IcebergTable* table, IcebergScanResponse* response, c CResult iceberg_scan_select_columns(IcebergScan* scan, const char** column_names, size_t num_columns); void iceberg_scan_free(IcebergScan* scan); -// Async batch operations -CResult iceberg_scan_wait_batch_with_storage(IcebergScan* scan, IcebergBatchResponse* response, const void* handle); +// New simplified async API +CResult iceberg_scan_init_stream(IcebergScan* scan, IcebergBoolResponse* response, const void* handle); +CResult iceberg_scan_next_batch_from_stream(IcebergScan* scan, IcebergBoolResponse* response, const void* handle); CResult iceberg_scan_next_batch(IcebergScan* scan, IcebergBatchResponse* response, const void* handle); -CResult iceberg_scan_store_batch_result(IcebergScan* scan, const IcebergBatchResponse* response); void iceberg_arrow_batch_free(ArrowBatch* batch); // Utility functions diff --git a/src/lib.rs b/src/lib.rs index b98524b..82a4650 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,6 +22,38 @@ use object_store_ffi::{ pub struct IcebergStream { pub stream: AsyncMutex>>, } +unsafe impl Send for IcebergStream {} + +// Unified response type for operations that return a boolean status +#[repr(C)] +pub struct IcebergBoolResponse { + result: CResult, + success: bool, + error_message: *mut c_char, + context: *const Context, +} + +unsafe impl Send for IcebergBoolResponse {} + +impl RawResponse for IcebergBoolResponse { + type Payload = bool; + + fn result_mut(&mut self) -> &mut CResult { + &mut self.result + } + + fn context_mut(&mut self) -> &mut *const Context { + &mut self.context + } + + fn error_message_mut(&mut self) -> &mut *mut c_char { + &mut self.error_message + } + + fn set_payload(&mut self, payload: Option) { + self.success = payload.unwrap_or(false); + } +} // cbindgen annotations #[allow(non_camel_case_types)] @@ -60,6 +92,12 @@ pub struct IcebergScan { pub end_of_stream: bool, } +// SAFETY: IcebergScan can be safely sent between threads because: +// - table: iceberg::table::Table is Send +// - columns: Vec is Send +// - stream, current_batch: raw pointers are Send by our design (we control access) +unsafe impl Send for IcebergScan {} + #[repr(C)] pub struct ArrowBatch { pub data: *const u8, @@ -131,7 +169,6 @@ pub struct IcebergBatchResponse { result: CResult, batch: *mut ArrowBatch, end_of_stream: bool, - new_stream_ptr: *mut IcebergStream, error_message: *mut c_char, context: *const Context, } @@ -139,7 +176,7 @@ pub struct IcebergBatchResponse { unsafe impl Send for IcebergBatchResponse {} impl RawResponse for IcebergBatchResponse { - type Payload = (*mut ArrowBatch, bool, Option<*mut IcebergStream>); + type Payload = (*mut ArrowBatch, bool); fn result_mut(&mut self) -> &mut CResult { &mut self.result } @@ -151,15 +188,13 @@ impl RawResponse for IcebergBatchResponse { } fn set_payload(&mut self, payload: Option) { match payload { - Some((batch_ptr, is_end, stream_ptr)) => { + Some((batch_ptr, is_end)) => { self.batch = batch_ptr; self.end_of_stream = is_end; - self.new_stream_ptr = stream_ptr.unwrap_or(ptr::null_mut()); } None => { self.batch = ptr::null_mut(); - self.end_of_stream = false; - self.new_stream_ptr = ptr::null_mut(); + self.end_of_stream = true; } } } @@ -314,170 +349,119 @@ export_runtime_op!( table: *mut IcebergTable ); -// Async function to wait for next batch with proper stream persistence +// Async function to initialize stream without getting first batch export_runtime_op!( - iceberg_scan_wait_batch, - IcebergBatchResponse, + iceberg_scan_init_stream, + IcebergBoolResponse, || { if scan.is_null() { return Err(anyhow::anyhow!("Null scan pointer provided")); } let scan_ref = unsafe { &*scan }; - // Check if we already have a stream or need to create one - let need_new_stream = scan_ref.stream.is_none(); + // Only initialize if we don't already have a stream + if scan_ref.stream.is_some() { + return Err(anyhow::anyhow!("Stream already exists")); + } if let Some(table) = &scan_ref.table { let columns = scan_ref.columns.clone(); let table_clone = table.clone(); - Ok((table_clone, columns, need_new_stream)) + let scan_ref = unsafe { &mut *(scan as *mut IcebergScan) }; + Ok((table_clone, columns, scan_ref)) } else { Err(anyhow::anyhow!("Table not available")) } }, scan_data, async { - let (table, columns, need_new_stream) = scan_data; + let (table, columns, scan_ref) = scan_data; - if need_new_stream { - // Create new stream and get first batch - let mut scan_builder = table.scan(); - if let Some(cols) = columns { - scan_builder = scan_builder.select(cols); - } - - let table_scan = scan_builder.build()?; - let mut stream = table_scan.to_arrow().await?; - - // Get first batch from stream - match stream.next().await { - Some(Ok(record_batch)) => { - tracing::info!("Successfully got first batch with {} rows, {} columns", - record_batch.num_rows(), record_batch.num_columns()); - let arrow_batch = serialize_record_batch(record_batch)?; - let batch_ptr = Box::into_raw(Box::new(arrow_batch)); - - // Create stream wrapper and store it - let iceberg_stream = Box::new(IcebergStream { - stream: AsyncMutex::new(stream), - }); - let stream_ptr = Box::into_raw(iceberg_stream); - - tracing::info!("Created batch and stream pointers successfully"); - Ok((batch_ptr, false, Some(stream_ptr))) - } - Some(Err(e)) => { - tracing::error!("Error reading first batch: {}", e); - Err(anyhow::anyhow!("Error reading batch: {}", e)) - } - None => { - // End of stream immediately - tracing::warn!("Stream ended immediately - no data found"); - Ok((ptr::null_mut(), true, None)) - } - } - } else { - // This case means we need to use an existing stream - // We'll handle this case differently - return a marker that indicates existing stream usage - Err(anyhow::anyhow!("USE_EXISTING_STREAM")) + // Create new stream but don't get first batch + let mut scan_builder = table.scan(); + if let Some(cols) = columns { + scan_builder = scan_builder.select(cols); } + + let table_scan = scan_builder.build()?; + let stream = table_scan.to_arrow().await?; + + // Create stream wrapper + let iceberg_stream = Box::new(IcebergStream { + stream: AsyncMutex::new(stream), + }); + let stream_ptr = Box::into_raw(iceberg_stream); + + tracing::info!("Created stream pointer successfully: {:?}", stream_ptr); + + // Store stream in scan + scan_ref.stream = Some(stream_ptr); + + // Return success flag + Ok::(true) }, scan: *mut IcebergScan ); // Async function to get next batch from existing stream export_runtime_op!( - iceberg_scan_wait_batch_existing, - IcebergBatchResponse, + iceberg_scan_next_batch_from_stream, + IcebergBoolResponse, || { if scan.is_null() { return Err(anyhow::anyhow!("Null scan pointer provided")); } let scan_ref = unsafe { &*scan }; + tracing::debug!("Checking for stream in scan, current stream pointer: {:?}", scan_ref.stream); + if let Some(stream_ptr) = scan_ref.stream { - // Return the stream pointer - we'll dereference it in the async block - Ok(stream_ptr as usize) // Convert to usize to make it Send + tracing::debug!("Found stream pointer: {:?}", stream_ptr); + let scan_ref = unsafe { &mut *(scan as *mut IcebergScan) }; + let stream_ref = unsafe { &*stream_ptr }; + Ok((stream_ref, scan_ref)) } else { + tracing::error!("No stream available in scan"); Err(anyhow::anyhow!("No stream available")) } }, - stream_ptr_addr, + stream_data, async { - let stream_ptr = stream_ptr_addr as *mut IcebergStream; - let stream_ref = unsafe { &*stream_ptr }; + let (stream_ref, scan_ref) = stream_data; let mut stream_guard = stream_ref.stream.lock().await; - match stream_guard.next().await { + let result = match stream_guard.next().await { Some(Ok(record_batch)) => { let arrow_batch = serialize_record_batch(record_batch)?; let batch_ptr = Box::into_raw(Box::new(arrow_batch)); - Ok((batch_ptr, false, None)) + (batch_ptr, false) } - Some(Err(e)) => Err(anyhow::anyhow!("Error reading batch: {}", e)), + Some(Err(e)) => return Err(anyhow::anyhow!("Error reading batch: {}", e)), None => { // End of stream - Ok((ptr::null_mut(), true, None)) + (ptr::null_mut(), true) } + }; + + // Auto-store the result in scan + let (batch_ptr, end_of_stream) = result; + + if batch_ptr.is_null() { + tracing::warn!("Auto-storing NULL batch pointer - end of stream"); + scan_ref.current_batch = None; + } else { + tracing::info!("Auto-storing batch pointer {:?} in scan", batch_ptr); + scan_ref.current_batch = Some(batch_ptr); } + scan_ref.end_of_stream = end_of_stream; + + // Return only the end_of_stream status + Ok(end_of_stream) }, scan: *mut IcebergScan ); -// Simplified storage function - just call the right async function -#[no_mangle] -pub extern "C" fn iceberg_scan_wait_batch_with_storage( - scan: *mut IcebergScan, - response: *mut IcebergBatchResponse, - handle: *const c_void, -) -> CResult { - let scan_ref = unsafe { &*scan }; - - // Check if we need to use existing stream or create new one - if scan_ref.stream.is_none() { - // Call the async function for new stream - tracing::info!("Calling async function for new stream"); - iceberg_scan_wait_batch(scan, response, handle) - } else { - // Call the async function for existing stream - tracing::info!("Calling async function for existing stream"); - iceberg_scan_wait_batch_existing(scan, response, handle) - } -} - -// Helper function to store the batch result in the scan after async completion -#[no_mangle] -pub extern "C" fn iceberg_scan_store_batch_result( - scan: *mut IcebergScan, - response: *const IcebergBatchResponse, -) -> CResult { - if scan.is_null() || response.is_null() { - return CResult::Error; - } - - let scan_ref = unsafe { &mut *scan }; - let response_ref = unsafe { &*response }; - - // Store batch in scan - if response_ref.batch.is_null() { - tracing::warn!("Storing NULL batch pointer - end of stream or error"); - scan_ref.current_batch = None; - } else { - tracing::info!("Storing batch pointer {:?} in scan", response_ref.batch); - scan_ref.current_batch = Some(response_ref.batch); - } - scan_ref.end_of_stream = response_ref.end_of_stream; - - // If a new stream was created, store its pointer in scan - if !response_ref.new_stream_ptr.is_null() && scan_ref.stream.is_none() { - tracing::info!("Storing new stream pointer {:?} in scan", response_ref.new_stream_ptr); - scan_ref.stream = Some(response_ref.new_stream_ptr); - } - - CResult::Ok -} - // Synchronous function to get the current batch from the scan #[no_mangle] pub extern "C" fn iceberg_scan_next_batch( @@ -497,7 +481,6 @@ pub extern "C" fn iceberg_scan_next_batch( result: CResult::Ok, batch: ptr::null_mut(), end_of_stream: false, - new_stream_ptr: ptr::null_mut(), error_message: ptr::null_mut(), context: ptr::null(), }; diff --git a/tests/integration_test.c b/tests/integration_test.c index 167f332..e3986fa 100644 --- a/tests/integration_test.c +++ b/tests/integration_test.c @@ -10,9 +10,9 @@ static int (*iceberg_init_runtime_func)(IcebergConfig config, int (*panic_callback)(), int (*result_callback)(const void*)) = NULL; static int (*iceberg_table_open_func)(const char*, const char*, IcebergTableResponse*, const void*) = NULL; static int (*iceberg_table_scan_func)(IcebergTable*, IcebergScanResponse*, const void*) = NULL; -static int (*iceberg_scan_wait_batch_func)(IcebergScan*, IcebergBatchResponse*, const void*) = NULL; +static int (*iceberg_scan_init_stream_func)(IcebergScan*, IcebergBoolResponse*, const void*) = NULL; +static int (*iceberg_scan_next_batch_from_stream_func)(IcebergScan*, IcebergBoolResponse*, const void*) = NULL; static int (*iceberg_scan_next_batch_func)(IcebergScan*, IcebergBatchResponse*, const void*) = NULL; -static int (*iceberg_scan_store_batch_func)(IcebergScan*, const IcebergBatchResponse*) = NULL; static void (*iceberg_table_free_func)(IcebergTable*) = NULL; static void (*iceberg_scan_free_func)(IcebergScan*) = NULL; static void (*iceberg_arrow_batch_free_func)(ArrowBatch*) = NULL; @@ -70,9 +70,14 @@ int load_iceberg_library(const char* library_path) { return 0; } - iceberg_scan_wait_batch_func = (int (*)(IcebergScan*, IcebergBatchResponse*, const void*))dlsym(lib_handle, "iceberg_scan_wait_batch_with_storage"); - if (!iceberg_scan_wait_batch_func) { - fprintf(stderr, "❌ Failed to resolve iceberg_scan_wait_batch_with_storage: %s\n", dlerror()); + iceberg_scan_init_stream_func = (int (*)(IcebergScan*, IcebergBoolResponse*, const void*))dlsym(lib_handle, "iceberg_scan_init_stream"); + if (!iceberg_scan_init_stream_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_scan_init_stream: %s\n", dlerror()); + return 0; + } + iceberg_scan_next_batch_from_stream_func = (int (*)(IcebergScan*, IcebergBoolResponse*, const void*))dlsym(lib_handle, "iceberg_scan_next_batch_from_stream"); + if (!iceberg_scan_next_batch_from_stream_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_scan_next_batch_from_stream: %s\n", dlerror()); return 0; } @@ -82,11 +87,6 @@ int load_iceberg_library(const char* library_path) { return 0; } - iceberg_scan_store_batch_func = (int (*)(IcebergScan*, const IcebergBatchResponse*))dlsym(lib_handle, "iceberg_scan_store_batch_result"); - if (!iceberg_scan_store_batch_func) { - fprintf(stderr, "❌ Failed to resolve iceberg_scan_store_batch_result: %s\n", dlerror()); - return 0; - } iceberg_table_free_func = (void (*)(IcebergTable*))dlsym(lib_handle, "iceberg_table_free"); if (!iceberg_table_free_func) { @@ -168,7 +168,7 @@ int main(int argc, char* argv[]) { IcebergTableResponse table_response = {0}; async_completed = 0; // Reset flag - result = iceberg_table_open_func(table_path, metadata_path, &table_response, &async_completed); + result = iceberg_table_open_func(table_path, metadata_path, &table_response, (const void*)&async_completed); if (result != CRESULT_OK) { printf("❌ Failed to initiate table open operation\n"); @@ -213,7 +213,7 @@ int main(int argc, char* argv[]) { // 3. Create a scan using async API IcebergScanResponse scan_response = {0}; async_completed = 0; // Reset flag - result = iceberg_table_scan_func(table_response.table, &scan_response, &async_completed); + result = iceberg_table_scan_func(table_response.table, &scan_response, (const void*)&async_completed); if (result != CRESULT_OK) { printf("❌ Failed to initiate scan creation\n"); @@ -260,10 +260,10 @@ int main(int argc, char* argv[]) { printf("✅ Scan created successfully\n"); // 4. Try to get a batch using new two-step async API - printf("Step 1: Waiting for batch asynchronously...\n"); - IcebergBatchResponse batch_response = {0}; + printf("Step 1: Initializing stream asynchronously...\n"); + IcebergBoolResponse init_response = {0}; async_completed = 0; // Reset flag - result = iceberg_scan_wait_batch_func(scan_response.scan, &batch_response, &async_completed); + result = iceberg_scan_init_stream_func(scan_response.scan, &init_response, (const void*)&async_completed); if (result == CRESULT_OK) { // Wait for async operation to complete @@ -283,10 +283,10 @@ int main(int argc, char* argv[]) { } if (result != CRESULT_OK) { - printf("❌ Failed to wait for batch\n"); - if (batch_response.error_message) { - printf(" Error: %s\n", batch_response.error_message); - iceberg_destroy_cstring_func(batch_response.error_message); + printf("❌ Failed to initialize stream\n"); + if (init_response.error_message) { + printf(" Error: %s\n", init_response.error_message); + iceberg_destroy_cstring_func(init_response.error_message); } iceberg_scan_free_func(scan_response.scan); iceberg_table_free_func(table_response.table); @@ -294,17 +294,43 @@ int main(int argc, char* argv[]) { return 1; } - // Store the batch result in the scan - result = iceberg_scan_store_batch_func(scan_response.scan, &batch_response); + printf("✅ Stream initialized successfully\n"); + + printf("Step 2: Getting first batch from stream asynchronously...\n"); + IcebergBoolResponse batch_response = {0}; + async_completed = 0; // Reset flag + result = iceberg_scan_next_batch_from_stream_func(scan_response.scan, &batch_response, (const void*)&async_completed); + + if (result == CRESULT_OK) { + // Wait for batch retrieval to complete + timeout = 100; // 10 second timeout + while (!async_completed && timeout > 0) { + usleep(100000); // 100ms + timeout--; + } + + if (!async_completed) { + printf("❌ Batch retrieval async operation timed out\n"); + iceberg_scan_free_func(scan_response.scan); + iceberg_table_free_func(table_response.table); + unload_iceberg_library(); + return 1; + } + } + if (result != CRESULT_OK) { - printf("❌ Failed to store batch result\n"); + printf("❌ Failed to get first batch from stream\n"); + if (batch_response.error_message) { + printf(" Error: %s\n", batch_response.error_message); + iceberg_destroy_cstring_func(batch_response.error_message); + } iceberg_scan_free_func(scan_response.scan); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; } - printf("Step 2: Retrieving stored batch synchronously...\n"); + printf("Step 3: Retrieving stored batch synchronously...\n"); IcebergBatchResponse sync_batch_response = {0}; result = iceberg_scan_next_batch_func(scan_response.scan, &sync_batch_response, NULL); From c0ae68a091dacbcf3b14fd684288d5d80d20efc9 Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Sun, 7 Sep 2025 16:16:05 +0200 Subject: [PATCH 06/39] use simple accessor for the current batch --- include/iceberg_rust_ffi.h | 21 ++++----- src/lib.rs | 89 +++++++++----------------------------- tests/integration_test.c | 48 ++++++++------------ 3 files changed, 50 insertions(+), 108 deletions(-) diff --git a/include/iceberg_rust_ffi.h b/include/iceberg_rust_ffi.h index a582a04..a4717f9 100644 --- a/include/iceberg_rust_ffi.h +++ b/include/iceberg_rust_ffi.h @@ -11,7 +11,6 @@ extern "C" { // Forward declarations typedef struct IcebergTable IcebergTable; -typedef struct IcebergScan IcebergScan; typedef struct Context Context; // Configuration for iceberg runtime @@ -34,6 +33,15 @@ typedef struct { void* rust_ptr; } ArrowBatch; +// IcebergScan structure - now exposed for direct field access +typedef struct { + void* table; // Option - opaque + void* columns; // Option> - opaque + void* stream; // Option<*mut IcebergStream> - opaque + ArrowBatch* current_batch; // Option<*mut ArrowBatch> + bool end_of_stream; // bool +} IcebergScan; + // Response structures for async operations typedef struct { CResult result; @@ -49,13 +57,6 @@ typedef struct { const Context* context; } IcebergScanResponse; -typedef struct { - CResult result; - ArrowBatch* batch; - bool end_of_stream; - char* error_message; - const Context* context; -} IcebergBatchResponse; typedef struct { CResult result; @@ -83,8 +84,8 @@ void iceberg_scan_free(IcebergScan* scan); // New simplified async API CResult iceberg_scan_init_stream(IcebergScan* scan, IcebergBoolResponse* response, const void* handle); CResult iceberg_scan_next_batch_from_stream(IcebergScan* scan, IcebergBoolResponse* response, const void* handle); -CResult iceberg_scan_next_batch(IcebergScan* scan, IcebergBatchResponse* response, const void* handle); -void iceberg_arrow_batch_free(ArrowBatch* batch); +ArrowBatch* iceberg_scan_get_current_batch(IcebergScan* scan); +void iceberg_arrow_batch_free(IcebergScan* scan); // Utility functions CResult iceberg_destroy_cstring(char* string); diff --git a/src/lib.rs b/src/lib.rs index 82a4650..69457b3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -164,41 +164,6 @@ impl RawResponse for IcebergScanResponse { } } -#[repr(C)] -pub struct IcebergBatchResponse { - result: CResult, - batch: *mut ArrowBatch, - end_of_stream: bool, - error_message: *mut c_char, - context: *const Context, -} - -unsafe impl Send for IcebergBatchResponse {} - -impl RawResponse for IcebergBatchResponse { - type Payload = (*mut ArrowBatch, bool); - fn result_mut(&mut self) -> &mut CResult { - &mut self.result - } - fn context_mut(&mut self) -> &mut *const Context { - &mut self.context - } - fn error_message_mut(&mut self) -> &mut *mut c_char { - &mut self.error_message - } - fn set_payload(&mut self, payload: Option) { - match payload { - Some((batch_ptr, is_end)) => { - self.batch = batch_ptr; - self.end_of_stream = is_end; - } - None => { - self.batch = ptr::null_mut(); - self.end_of_stream = true; - } - } - } -} // Helper function to create ArrowBatch from RecordBatch fn serialize_record_batch(batch: RecordBatch) -> Result { @@ -462,42 +427,22 @@ export_runtime_op!( scan: *mut IcebergScan ); -// Synchronous function to get the current batch from the scan + +// Get current batch from scan (returns null if end of stream or no batch) #[no_mangle] -pub extern "C" fn iceberg_scan_next_batch( - scan: *mut IcebergScan, - response: *mut IcebergBatchResponse, - _handle: *const c_void, -) -> CResult { - if scan.is_null() || response.is_null() { - return CResult::Error; +pub extern "C" fn iceberg_scan_get_current_batch(scan: *mut IcebergScan) -> *mut ArrowBatch { + if scan.is_null() { + return ptr::null_mut(); } - - let scan_ref = unsafe { &mut *scan }; - let response_ref = unsafe { &mut *response }; - // Initialize response - *response_ref = IcebergBatchResponse { - result: CResult::Ok, - batch: ptr::null_mut(), - end_of_stream: false, - error_message: ptr::null_mut(), - context: ptr::null(), - }; - - // Return the current batch from the scan - if let Some(batch_ptr) = scan_ref.current_batch.take() { - tracing::info!("Returning stored batch pointer: {:?}", batch_ptr); - response_ref.batch = batch_ptr; - response_ref.end_of_stream = false; - } else { - tracing::warn!("No current batch stored, end_of_stream: {}", scan_ref.end_of_stream); - // No current batch - either end of stream or need to wait for one - response_ref.batch = ptr::null_mut(); - response_ref.end_of_stream = scan_ref.end_of_stream; + let scan_ref = unsafe { &*scan }; + + // If end of stream, return null (no more batches) + if scan_ref.end_of_stream { + return ptr::null_mut(); } - - CResult::Ok + + scan_ref.current_batch.unwrap_or(ptr::null_mut()) } // Synchronous operations @@ -561,8 +506,14 @@ pub extern "C" fn iceberg_scan_free(scan: *mut IcebergScan) { } #[no_mangle] -pub extern "C" fn iceberg_arrow_batch_free(batch: *mut ArrowBatch) { - if !batch.is_null() { +pub extern "C" fn iceberg_arrow_batch_free(scan: *mut IcebergScan) { + if scan.is_null() { + return; + } + + let scan_ref = unsafe { &mut *scan }; + + if let Some(batch) = scan_ref.current_batch.take() { unsafe { let batch_ref = Box::from_raw(batch); if !batch_ref.rust_ptr.is_null() { diff --git a/tests/integration_test.c b/tests/integration_test.c index e3986fa..214c7ca 100644 --- a/tests/integration_test.c +++ b/tests/integration_test.c @@ -12,10 +12,10 @@ static int (*iceberg_table_open_func)(const char*, const char*, IcebergTableResp static int (*iceberg_table_scan_func)(IcebergTable*, IcebergScanResponse*, const void*) = NULL; static int (*iceberg_scan_init_stream_func)(IcebergScan*, IcebergBoolResponse*, const void*) = NULL; static int (*iceberg_scan_next_batch_from_stream_func)(IcebergScan*, IcebergBoolResponse*, const void*) = NULL; -static int (*iceberg_scan_next_batch_func)(IcebergScan*, IcebergBatchResponse*, const void*) = NULL; static void (*iceberg_table_free_func)(IcebergTable*) = NULL; static void (*iceberg_scan_free_func)(IcebergScan*) = NULL; -static void (*iceberg_arrow_batch_free_func)(ArrowBatch*) = NULL; +static void (*iceberg_arrow_batch_free_func)(IcebergScan*) = NULL; +static ArrowBatch* (*iceberg_scan_get_current_batch_func)(IcebergScan*) = NULL; static int (*iceberg_destroy_cstring_func)(char*) = NULL; // Library handle @@ -81,11 +81,6 @@ int load_iceberg_library(const char* library_path) { return 0; } - iceberg_scan_next_batch_func = (int (*)(IcebergScan*, IcebergBatchResponse*, const void*))dlsym(lib_handle, "iceberg_scan_next_batch"); - if (!iceberg_scan_next_batch_func) { - fprintf(stderr, "❌ Failed to resolve iceberg_scan_next_batch: %s\n", dlerror()); - return 0; - } iceberg_table_free_func = (void (*)(IcebergTable*))dlsym(lib_handle, "iceberg_table_free"); @@ -100,12 +95,18 @@ int load_iceberg_library(const char* library_path) { return 0; } - iceberg_arrow_batch_free_func = (void (*)(ArrowBatch*))dlsym(lib_handle, "iceberg_arrow_batch_free"); + iceberg_arrow_batch_free_func = (void (*)(IcebergScan*))dlsym(lib_handle, "iceberg_arrow_batch_free"); if (!iceberg_arrow_batch_free_func) { fprintf(stderr, "❌ Failed to resolve iceberg_arrow_batch_free: %s\n", dlerror()); return 0; } + iceberg_scan_get_current_batch_func = (ArrowBatch* (*)(IcebergScan*))dlsym(lib_handle, "iceberg_scan_get_current_batch"); + if (!iceberg_scan_get_current_batch_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_scan_get_current_batch: %s\n", dlerror()); + return 0; + } + iceberg_destroy_cstring_func = (int (*)(char*))dlsym(lib_handle, "iceberg_destroy_cstring"); if (!iceberg_destroy_cstring_func) { fprintf(stderr, "❌ Failed to resolve iceberg_destroy_cstring: %s\n", dlerror()); @@ -330,40 +331,29 @@ int main(int argc, char* argv[]) { return 1; } - printf("Step 3: Retrieving stored batch synchronously...\n"); - IcebergBatchResponse sync_batch_response = {0}; - result = iceberg_scan_next_batch_func(scan_response.scan, &sync_batch_response, NULL); + printf("Step 3: Retrieving stored batch from scan...\n"); - // Check if the operation was successful - if (result != CRESULT_OK) { - printf("❌ Failed to get stored batch\n"); - iceberg_scan_free_func(scan_response.scan); - iceberg_table_free_func(table_response.table); - unload_iceberg_library(); - return 1; - } + ArrowBatch* batch = iceberg_scan_get_current_batch_func(scan_response.scan); - if (sync_batch_response.end_of_stream) { - printf("✅ Reached end of stream (table might be empty)\n"); - } else if (sync_batch_response.batch) { + if (batch) { printf("✅ Successfully retrieved batch!\n"); printf("📦 Batch details:\n"); - printf(" - Serialized size: %zu bytes\n", sync_batch_response.batch->length); - printf(" - Data pointer: %p\n", (void*)sync_batch_response.batch->data); + printf(" - Serialized size: %zu bytes\n", batch->length); + printf(" - Data pointer: %p\n", (void*)batch->data); printf(" - First few bytes: "); // Print first 8 bytes as hex for verification - size_t print_len = (sync_batch_response.batch->length < 8) ? sync_batch_response.batch->length : 8; + size_t print_len = (batch->length < 8) ? batch->length : 8; for (size_t i = 0; i < print_len; i++) { - printf("%02x ", sync_batch_response.batch->data[i]); + printf("%02x ", batch->data[i]); } printf("\n"); printf(" → Arrow IPC bytes ready for Julia Arrow.Stream()\n"); - // Free the batch - iceberg_arrow_batch_free_func(sync_batch_response.batch); + // Free the batch from the scan (clears the pointer and deallocates) + iceberg_arrow_batch_free_func(scan_response.scan); } else { - printf("⚠️ No batch data returned\n"); + printf("✅ Reached end of stream (no more batches)\n"); } // 5. Cleanup From 7e3be0ba8f0a8c0014e70a8f9544d10eeb630841 Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Sun, 7 Sep 2025 16:28:28 +0200 Subject: [PATCH 07/39] fix C compilation warnings --- tests/integration_test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration_test.c b/tests/integration_test.c index 214c7ca..01bd24b 100644 --- a/tests/integration_test.c +++ b/tests/integration_test.c @@ -30,6 +30,7 @@ int panic_callback() { volatile int async_completed = 0; int result_callback(const void* task) { + (void)task; // Suppress unused parameter warning // Signal that async operation completed async_completed = 1; return 0; From 23777ce220357436c35e1f2ec0134320e995cf43 Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Sun, 7 Sep 2025 16:32:48 +0200 Subject: [PATCH 08/39] debug log instead of warn --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 69457b3..7468b39 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -413,7 +413,7 @@ export_runtime_op!( let (batch_ptr, end_of_stream) = result; if batch_ptr.is_null() { - tracing::warn!("Auto-storing NULL batch pointer - end of stream"); + tracing::debug!("Auto-storing NULL batch pointer - end of stream"); scan_ref.current_batch = None; } else { tracing::info!("Auto-storing batch pointer {:?} in scan", batch_ptr); From f1b577eca154b401f69d39719c49653114c56830 Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Sun, 7 Sep 2025 16:44:07 +0200 Subject: [PATCH 09/39] cancellation --- include/iceberg_rust_ffi.h | 4 +++ src/lib.rs | 14 ++++++++- tests/integration_test.c | 58 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 1 deletion(-) diff --git a/include/iceberg_rust_ffi.h b/include/iceberg_rust_ffi.h index a4717f9..77e66aa 100644 --- a/include/iceberg_rust_ffi.h +++ b/include/iceberg_rust_ffi.h @@ -91,6 +91,10 @@ void iceberg_arrow_batch_free(IcebergScan* scan); CResult iceberg_destroy_cstring(char* string); const char* iceberg_current_metrics(); +// Context management functions for cancellation support +CResult iceberg_cancel_context(const Context* ctx); +CResult iceberg_destroy_context(const Context* ctx); + // Backward compatibility const char* iceberg_error_message(); diff --git a/src/lib.rs b/src/lib.rs index 7468b39..be82caa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,7 +14,8 @@ use iceberg::TableIdent; use object_store_ffi::{ RT, RESULT_CB, ResultCallback, CResult, Context, RawResponse, ResponseGuard, NotifyGuard, - with_cancellation, export_runtime_op, destroy_cstring, current_metrics + with_cancellation, export_runtime_op, destroy_cstring, current_metrics, + cancel_context, destroy_context }; // Stream wrapper for FFI - using async mutex to avoid blocking calls @@ -540,4 +541,15 @@ pub extern "C" fn iceberg_destroy_cstring(string: *mut c_char) -> CResult { #[no_mangle] pub extern "C" fn iceberg_current_metrics() -> *const c_char { current_metrics() +} + +// Re-export context management functions for cancellation support +#[no_mangle] +pub extern "C" fn iceberg_cancel_context(ctx_ptr: *const Context) -> CResult { + cancel_context(ctx_ptr) +} + +#[no_mangle] +pub extern "C" fn iceberg_destroy_context(ctx_ptr: *const Context) -> CResult { + destroy_context(ctx_ptr) } \ No newline at end of file diff --git a/tests/integration_test.c b/tests/integration_test.c index 01bd24b..116740b 100644 --- a/tests/integration_test.c +++ b/tests/integration_test.c @@ -17,6 +17,8 @@ static void (*iceberg_scan_free_func)(IcebergScan*) = NULL; static void (*iceberg_arrow_batch_free_func)(IcebergScan*) = NULL; static ArrowBatch* (*iceberg_scan_get_current_batch_func)(IcebergScan*) = NULL; static int (*iceberg_destroy_cstring_func)(char*) = NULL; +static int (*iceberg_cancel_context_func)(const void*) = NULL; +static int (*iceberg_destroy_context_func)(const void*) = NULL; // Library handle static void* lib_handle = NULL; @@ -114,6 +116,18 @@ int load_iceberg_library(const char* library_path) { return 0; } + iceberg_cancel_context_func = (int (*)(const void*))dlsym(lib_handle, "iceberg_cancel_context"); + if (!iceberg_cancel_context_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_cancel_context: %s\n", dlerror()); + return 0; + } + + iceberg_destroy_context_func = (int (*)(const void*))dlsym(lib_handle, "iceberg_destroy_context"); + if (!iceberg_destroy_context_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_destroy_context: %s\n", dlerror()); + return 0; + } + printf("✅ All function symbols resolved successfully\n"); return 1; } @@ -357,6 +371,50 @@ int main(int argc, char* argv[]) { printf("✅ Reached end of stream (no more batches)\n"); } + // 4. Test context cancellation functions + printf("Testing context cancellation functions...\n"); + + // Test that cancellation functions can be called with valid context pointers + if (table_response.context != NULL) { + printf(" - Testing cancel_context with table context...\n"); + int cancel_result = iceberg_cancel_context_func(table_response.context); + if (cancel_result == 0) { + printf(" ✅ cancel_context succeeded\n"); + } else { + printf(" ⚠️ cancel_context returned: %d\n", cancel_result); + } + + printf(" - Testing destroy_context with table context...\n"); + int destroy_result = iceberg_destroy_context_func(table_response.context); + if (destroy_result == 0) { + printf(" ✅ destroy_context succeeded\n"); + } else { + printf(" ⚠️ destroy_context returned: %d\n", destroy_result); + } + table_response.context = NULL; // Mark as cleaned up + } + + if (scan_response.context != NULL) { + printf(" - Testing cancel_context with scan context...\n"); + int cancel_result = iceberg_cancel_context_func(scan_response.context); + if (cancel_result == 0) { + printf(" ✅ cancel_context succeeded\n"); + } else { + printf(" ⚠️ cancel_context returned: %d\n", cancel_result); + } + + printf(" - Testing destroy_context with scan context...\n"); + int destroy_result = iceberg_destroy_context_func(scan_response.context); + if (destroy_result == 0) { + printf(" ✅ destroy_context succeeded\n"); + } else { + printf(" ⚠️ destroy_context returned: %d\n", destroy_result); + } + scan_response.context = NULL; // Mark as cleaned up + } + + printf("✅ Context cancellation functions tested successfully\n"); + // 5. Cleanup printf("Cleaning up resources...\n"); iceberg_scan_free_func(scan_response.scan); From dc1e9b58f15e55577f11b06c07d5a0140bbb32a0 Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Sun, 7 Sep 2025 16:51:48 +0200 Subject: [PATCH 10/39] cleanup error codes --- include/iceberg_rust_ffi.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/iceberg_rust_ffi.h b/include/iceberg_rust_ffi.h index 77e66aa..0281f1b 100644 --- a/include/iceberg_rust_ffi.h +++ b/include/iceberg_rust_ffi.h @@ -21,9 +21,7 @@ typedef struct { // Result types typedef enum { CRESULT_OK = 0, - CRESULT_ERROR = -1, - CRESULT_BACKOFF = -2, - CRESULT_UNINITIALIZED = -3 + CRESULT_ERROR = 1 } CResult; // Arrow batch as serialized bytes From a510d67507fb4ad3ade55062487ef254218c2e96 Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Sun, 7 Sep 2025 17:01:16 +0200 Subject: [PATCH 11/39] todo comment --- src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib.rs b/src/lib.rs index be82caa..45d0d75 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -167,6 +167,7 @@ impl RawResponse for IcebergScanResponse { // Helper function to create ArrowBatch from RecordBatch +// TODO: Switch to zero-copy once Arrow.jl supports C API. fn serialize_record_batch(batch: RecordBatch) -> Result { let buffer = Vec::new(); let mut stream_writer = StreamWriter::try_new(buffer, &batch.schema())?; From b6f080f090c156b29daedbe746b9765663ad90f6 Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Sun, 7 Sep 2025 17:13:52 +0200 Subject: [PATCH 12/39] update build to refer to remote repo --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 66e7f96..45e45fc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ cbindgen = "0.26" [dependencies] iceberg = "0.6.0" -object_store_ffi = { path = "../object_store_ffi", default-features = false } +object_store_ffi = { git = "https://github.com/RelationalAI/object_store_ffi", rev = "db0f6a3fe282e267a53c119c0aca4b5af341df3f", default-features = false } tokio = { version = "1.0", features = ["full"] } futures = "0.3" libc = "0.2" From dbfd232c577d0c4be1da1764eb7526646e48b2b5 Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Sun, 7 Sep 2025 17:15:51 +0200 Subject: [PATCH 13/39] update version --- Cargo.lock | 3 ++- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 26f431f..29efd68 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1572,7 +1572,7 @@ dependencies = [ [[package]] name = "iceberg_rust_ffi" -version = "0.1.0" +version = "0.2.0" dependencies = [ "anyhow", "arrow-array", @@ -2272,6 +2272,7 @@ dependencies = [ [[package]] name = "object_store_ffi" version = "0.12.3" +source = "git+https://github.com/RelationalAI/object_store_ffi?rev=db0f6a3fe282e267a53c119c0aca4b5af341df3f#db0f6a3fe282e267a53c119c0aca4b5af341df3f" dependencies = [ "anyhow", "async-channel", diff --git a/Cargo.toml b/Cargo.toml index 45e45fc..7097c2a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "iceberg_rust_ffi" -version = "0.1.0" +version = "0.2.0" edition = "2021" [lib] From 665085321124dea0096ba4d70bc039f054ac095d Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Sun, 7 Sep 2025 17:17:18 +0200 Subject: [PATCH 14/39] trim whitespace --- src/lib.rs | 58 +++++++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 45d0d75..20a2f99 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -38,19 +38,19 @@ unsafe impl Send for IcebergBoolResponse {} impl RawResponse for IcebergBoolResponse { type Payload = bool; - + fn result_mut(&mut self) -> &mut CResult { &mut self.result } - + fn context_mut(&mut self) -> &mut *const Context { &mut self.context } - + fn error_message_mut(&mut self) -> &mut *mut c_char { &mut self.error_message } - + fn set_payload(&mut self, payload: Option) { self.success = payload.unwrap_or(false); } @@ -95,7 +95,7 @@ pub struct IcebergScan { // SAFETY: IcebergScan can be safely sent between threads because: // - table: iceberg::table::Table is Send -// - columns: Vec is Send +// - columns: Vec is Send // - stream, current_batch: raw pointers are Send by our design (we control access) unsafe impl Send for IcebergScan {} @@ -257,7 +257,7 @@ export_runtime_op!( paths, async { let (table_path_str, metadata_path_str) = paths; - + // Construct the full metadata path let full_metadata_path = if metadata_path_str.starts_with('/') { metadata_path_str @@ -280,11 +280,11 @@ export_runtime_op!( tracing::info!("Successfully loaded static table, converting to table"); let iceberg_table = static_table.into_table(); - + let table_ptr = Box::into_raw(Box::new(IcebergTable { table: iceberg_table, })); - + Ok::<*mut IcebergTable, anyhow::Error>(table_ptr) }, table_path: *const c_char, @@ -325,12 +325,12 @@ export_runtime_op!( return Err(anyhow::anyhow!("Null scan pointer provided")); } let scan_ref = unsafe { &*scan }; - + // Only initialize if we don't already have a stream if scan_ref.stream.is_some() { return Err(anyhow::anyhow!("Stream already exists")); } - + if let Some(table) = &scan_ref.table { let columns = scan_ref.columns.clone(); let table_clone = table.clone(); @@ -343,27 +343,27 @@ export_runtime_op!( scan_data, async { let (table, columns, scan_ref) = scan_data; - + // Create new stream but don't get first batch let mut scan_builder = table.scan(); if let Some(cols) = columns { scan_builder = scan_builder.select(cols); } - + let table_scan = scan_builder.build()?; let stream = table_scan.to_arrow().await?; - + // Create stream wrapper let iceberg_stream = Box::new(IcebergStream { stream: AsyncMutex::new(stream), }); let stream_ptr = Box::into_raw(iceberg_stream); - + tracing::info!("Created stream pointer successfully: {:?}", stream_ptr); - + // Store stream in scan scan_ref.stream = Some(stream_ptr); - + // Return success flag Ok::(true) }, @@ -379,9 +379,9 @@ export_runtime_op!( return Err(anyhow::anyhow!("Null scan pointer provided")); } let scan_ref = unsafe { &*scan }; - + tracing::debug!("Checking for stream in scan, current stream pointer: {:?}", scan_ref.stream); - + if let Some(stream_ptr) = scan_ref.stream { tracing::debug!("Found stream pointer: {:?}", stream_ptr); let scan_ref = unsafe { &mut *(scan as *mut IcebergScan) }; @@ -395,9 +395,9 @@ export_runtime_op!( stream_data, async { let (stream_ref, scan_ref) = stream_data; - + let mut stream_guard = stream_ref.stream.lock().await; - + let result = match stream_guard.next().await { Some(Ok(record_batch)) => { let arrow_batch = serialize_record_batch(record_batch)?; @@ -410,10 +410,10 @@ export_runtime_op!( (ptr::null_mut(), true) } }; - + // Auto-store the result in scan let (batch_ptr, end_of_stream) = result; - + if batch_ptr.is_null() { tracing::debug!("Auto-storing NULL batch pointer - end of stream"); scan_ref.current_batch = None; @@ -422,7 +422,7 @@ export_runtime_op!( scan_ref.current_batch = Some(batch_ptr); } scan_ref.end_of_stream = end_of_stream; - + // Return only the end_of_stream status Ok(end_of_stream) }, @@ -436,14 +436,14 @@ pub extern "C" fn iceberg_scan_get_current_batch(scan: *mut IcebergScan) -> *mut if scan.is_null() { return ptr::null_mut(); } - + let scan_ref = unsafe { &*scan }; - + // If end of stream, return null (no more batches) if scan_ref.end_of_stream { return ptr::null_mut(); } - + scan_ref.current_batch.unwrap_or(ptr::null_mut()) } @@ -512,9 +512,9 @@ pub extern "C" fn iceberg_arrow_batch_free(scan: *mut IcebergScan) { if scan.is_null() { return; } - + let scan_ref = unsafe { &mut *scan }; - + if let Some(batch) = scan_ref.current_batch.take() { unsafe { let batch_ref = Box::from_raw(batch); @@ -553,4 +553,4 @@ pub extern "C" fn iceberg_cancel_context(ctx_ptr: *const Context) -> CResult { #[no_mangle] pub extern "C" fn iceberg_destroy_context(ctx_ptr: *const Context) -> CResult { destroy_context(ctx_ptr) -} \ No newline at end of file +} From a769ccef5e16161e0c83051d38f9502774e02491 Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Sun, 7 Sep 2025 17:18:02 +0200 Subject: [PATCH 15/39] . --- tests/integration_test.c | 64 ++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/tests/integration_test.c b/tests/integration_test.c index 116740b..20299ef 100644 --- a/tests/integration_test.c +++ b/tests/integration_test.c @@ -185,13 +185,13 @@ int main(int argc, char* argv[]) { IcebergTableResponse table_response = {0}; async_completed = 0; // Reset flag result = iceberg_table_open_func(table_path, metadata_path, &table_response, (const void*)&async_completed); - + if (result != CRESULT_OK) { printf("❌ Failed to initiate table open operation\n"); unload_iceberg_library(); return 1; } - + // Wait for async operation to complete printf("⏳ Waiting for table open to complete...\n"); int timeout = 100; // 10 second timeout @@ -199,13 +199,13 @@ int main(int argc, char* argv[]) { usleep(100000); // 100ms timeout--; } - + if (!async_completed) { printf("❌ Async operation timed out\n"); unload_iceberg_library(); return 1; } - + // Check if the operation was successful if (table_response.result != CRESULT_OK) { printf("❌ Failed to open table (result=%d)", table_response.result); @@ -217,27 +217,27 @@ int main(int argc, char* argv[]) { unload_iceberg_library(); return 1; } - + if (!table_response.table) { printf("❌ No table returned from open operation\n"); unload_iceberg_library(); return 1; } - + printf("✅ Table opened successfully\n"); // 3. Create a scan using async API IcebergScanResponse scan_response = {0}; async_completed = 0; // Reset flag result = iceberg_table_scan_func(table_response.table, &scan_response, (const void*)&async_completed); - + if (result != CRESULT_OK) { printf("❌ Failed to initiate scan creation\n"); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; } - + // Wait for async operation to complete printf("⏳ Waiting for scan creation to complete...\n"); timeout = 100; // 10 second timeout @@ -245,14 +245,14 @@ int main(int argc, char* argv[]) { usleep(100000); // 100ms timeout--; } - + if (!async_completed) { printf("❌ Scan creation async operation timed out\n"); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; } - + // Check if the operation was successful if (scan_response.result != CRESULT_OK) { printf("❌ Failed to create scan"); @@ -265,22 +265,22 @@ int main(int argc, char* argv[]) { unload_iceberg_library(); return 1; } - + if (!scan_response.scan) { printf("❌ No scan returned from scan creation\n"); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; } - + printf("✅ Scan created successfully\n"); - // 4. Try to get a batch using new two-step async API + // 4. Try to get a batch using new two-step async API printf("Step 1: Initializing stream asynchronously...\n"); IcebergBoolResponse init_response = {0}; async_completed = 0; // Reset flag result = iceberg_scan_init_stream_func(scan_response.scan, &init_response, (const void*)&async_completed); - + if (result == CRESULT_OK) { // Wait for async operation to complete timeout = 100; // 10 second timeout @@ -288,7 +288,7 @@ int main(int argc, char* argv[]) { usleep(100000); // 100ms timeout--; } - + if (!async_completed) { printf("❌ Batch wait async operation timed out\n"); iceberg_scan_free_func(scan_response.scan); @@ -297,7 +297,7 @@ int main(int argc, char* argv[]) { return 1; } } - + if (result != CRESULT_OK) { printf("❌ Failed to initialize stream\n"); if (init_response.error_message) { @@ -309,14 +309,14 @@ int main(int argc, char* argv[]) { unload_iceberg_library(); return 1; } - + printf("✅ Stream initialized successfully\n"); - + printf("Step 2: Getting first batch from stream asynchronously...\n"); IcebergBoolResponse batch_response = {0}; async_completed = 0; // Reset flag result = iceberg_scan_next_batch_from_stream_func(scan_response.scan, &batch_response, (const void*)&async_completed); - + if (result == CRESULT_OK) { // Wait for batch retrieval to complete timeout = 100; // 10 second timeout @@ -324,7 +324,7 @@ int main(int argc, char* argv[]) { usleep(100000); // 100ms timeout--; } - + if (!async_completed) { printf("❌ Batch retrieval async operation timed out\n"); iceberg_scan_free_func(scan_response.scan); @@ -333,7 +333,7 @@ int main(int argc, char* argv[]) { return 1; } } - + if (result != CRESULT_OK) { printf("❌ Failed to get first batch from stream\n"); if (batch_response.error_message) { @@ -345,18 +345,18 @@ int main(int argc, char* argv[]) { unload_iceberg_library(); return 1; } - + printf("Step 3: Retrieving stored batch from scan...\n"); - + ArrowBatch* batch = iceberg_scan_get_current_batch_func(scan_response.scan); - + if (batch) { printf("✅ Successfully retrieved batch!\n"); printf("📦 Batch details:\n"); printf(" - Serialized size: %zu bytes\n", batch->length); printf(" - Data pointer: %p\n", (void*)batch->data); printf(" - First few bytes: "); - + // Print first 8 bytes as hex for verification size_t print_len = (batch->length < 8) ? batch->length : 8; for (size_t i = 0; i < print_len; i++) { @@ -364,7 +364,7 @@ int main(int argc, char* argv[]) { } printf("\n"); printf(" → Arrow IPC bytes ready for Julia Arrow.Stream()\n"); - + // Free the batch from the scan (clears the pointer and deallocates) iceberg_arrow_batch_free_func(scan_response.scan); } else { @@ -373,7 +373,7 @@ int main(int argc, char* argv[]) { // 4. Test context cancellation functions printf("Testing context cancellation functions...\n"); - + // Test that cancellation functions can be called with valid context pointers if (table_response.context != NULL) { printf(" - Testing cancel_context with table context...\n"); @@ -383,7 +383,7 @@ int main(int argc, char* argv[]) { } else { printf(" ⚠️ cancel_context returned: %d\n", cancel_result); } - + printf(" - Testing destroy_context with table context...\n"); int destroy_result = iceberg_destroy_context_func(table_response.context); if (destroy_result == 0) { @@ -393,7 +393,7 @@ int main(int argc, char* argv[]) { } table_response.context = NULL; // Mark as cleaned up } - + if (scan_response.context != NULL) { printf(" - Testing cancel_context with scan context...\n"); int cancel_result = iceberg_cancel_context_func(scan_response.context); @@ -402,7 +402,7 @@ int main(int argc, char* argv[]) { } else { printf(" ⚠️ cancel_context returned: %d\n", cancel_result); } - + printf(" - Testing destroy_context with scan context...\n"); int destroy_result = iceberg_destroy_context_func(scan_response.context); if (destroy_result == 0) { @@ -412,7 +412,7 @@ int main(int argc, char* argv[]) { } scan_response.context = NULL; // Mark as cleaned up } - + printf("✅ Context cancellation functions tested successfully\n"); // 5. Cleanup @@ -424,4 +424,4 @@ int main(int argc, char* argv[]) { printf("✅ Integration test completed successfully!\n"); printf("🚀 New async API is working correctly\n"); return 0; -} \ No newline at end of file +} From 4092b954ac561886fd74fd94264bc51e7a68bd00 Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Sun, 7 Sep 2025 17:23:10 +0200 Subject: [PATCH 16/39] cargo fmt --- src/lib.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 20a2f99..82b7648 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,4 @@ -use std::ffi::{CStr, c_char, c_void}; +use std::ffi::{c_char, c_void, CStr}; use std::ptr; use tokio::sync::Mutex as AsyncMutex; @@ -12,16 +12,16 @@ use iceberg::TableIdent; // Import from object_store_ffi use object_store_ffi::{ - RT, RESULT_CB, ResultCallback, - CResult, Context, RawResponse, ResponseGuard, NotifyGuard, - with_cancellation, export_runtime_op, destroy_cstring, current_metrics, - cancel_context, destroy_context + cancel_context, current_metrics, destroy_context, destroy_cstring, export_runtime_op, + with_cancellation, CResult, Context, NotifyGuard, RawResponse, ResponseGuard, ResultCallback, + RESULT_CB, RT, }; // Stream wrapper for FFI - using async mutex to avoid blocking calls #[repr(C)] pub struct IcebergStream { - pub stream: AsyncMutex>>, + pub stream: + AsyncMutex>>, } unsafe impl Send for IcebergStream {} @@ -165,7 +165,6 @@ impl RawResponse for IcebergScanResponse { } } - // Helper function to create ArrowBatch from RecordBatch // TODO: Switch to zero-copy once Arrow.jl supports C API. fn serialize_record_batch(batch: RecordBatch) -> Result { @@ -429,7 +428,6 @@ export_runtime_op!( scan: *mut IcebergScan ); - // Get current batch from scan (returns null if end of stream or no batch) #[no_mangle] pub extern "C" fn iceberg_scan_get_current_batch(scan: *mut IcebergScan) -> *mut ArrowBatch { @@ -530,7 +528,8 @@ pub extern "C" fn iceberg_arrow_batch_free(scan: *mut IcebergScan) { pub extern "C" fn iceberg_error_message() -> *const c_char { // For backward compatibility, return a generic message // In the new async API, errors are returned through response structures - b"Error: Use new async API with response structures for detailed error information\0".as_ptr() as *const c_char + b"Error: Use new async API with response structures for detailed error information\0".as_ptr() + as *const c_char } // Re-export object_store_ffi utilities From 677892da16327a2d1f6860899c437820a4c621c7 Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Sun, 7 Sep 2025 17:53:16 +0200 Subject: [PATCH 17/39] Fix integration test --- tests/integration_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration_test.c b/tests/integration_test.c index 20299ef..497bb48 100644 --- a/tests/integration_test.c +++ b/tests/integration_test.c @@ -177,8 +177,8 @@ int main(int argc, char* argv[]) { printf("✅ Runtime initialized successfully\n"); // 2. Open table using async API - const char* table_path = "s3://vustef-dev/tpch-sf0.1-no-part/nation"; - const char* metadata_path = "metadata/00001-1744d9f4-1472-4f8c-ac86-b0b7c291248e.metadata.json"; + const char* table_path = "s3://warehouse/tpch.sf01/nation"; + const char* metadata_path = "metadata/00001-4f9722c5-8764-4988-8063-874c3d453268.metadata.json"; printf("Opening table at: %s\n", table_path); printf("Using metadata file: %s\n", metadata_path); From b0c5f1a8d9dd6cae7fe887deb29ec768915b23f0 Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Sun, 7 Sep 2025 17:58:02 +0200 Subject: [PATCH 18/39] julia features --- Cargo.toml | 4 ++++ run_integration_test.sh | 4 ++-- src/lib.rs | 20 ++++++++++++++++++-- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 7097c2a..097ea5a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,10 @@ crate-type = ["cdylib"] [build-dependencies] cbindgen = "0.26" +[features] +default = ["julia"] +julia = [] + [dependencies] iceberg = "0.6.0" object_store_ffi = { git = "https://github.com/RelationalAI/object_store_ffi", rev = "db0f6a3fe282e267a53c119c0aca4b5af341df3f", default-features = false } diff --git a/run_integration_test.sh b/run_integration_test.sh index 40ff9dc..5b02b1e 100755 --- a/run_integration_test.sh +++ b/run_integration_test.sh @@ -46,9 +46,9 @@ if [ -f ".env" ]; then set +a fi -# Step 1: Build the Rust library +# Step 1: Build the Rust library (without julia feature for standalone C integration) print_status "Building Rust library..." -if cargo build; then +if cargo build --no-default-features; then print_success "Rust library built successfully" else print_error "Failed to build Rust library" diff --git a/src/lib.rs b/src/lib.rs index 82b7648..f56382a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,6 +17,17 @@ use object_store_ffi::{ RESULT_CB, RT, }; +// We use `jl_adopt_thread` to ensure Rust can call into Julia when notifying +// the Base.Event that is waiting for the Rust result. +// Note that this will be linked in from the Julia process, we do not try +// to link it while building this Rust lib. +#[cfg(feature = "julia")] +extern "C" { + fn jl_adopt_thread() -> i32; + fn jl_gc_safe_enter() -> i32; + fn jl_gc_disable_finalizers_internal() -> c_void; +} + // Stream wrapper for FFI - using async mutex to avoid blocking calls #[repr(C)] pub struct IcebergStream { @@ -217,9 +228,14 @@ pub extern "C" fn iceberg_init_runtime( let mut rt_builder = tokio::runtime::Builder::new_multi_thread(); rt_builder.enable_all(); - // Configure Julia thread adoption if needed in the future + // Configure Julia thread adoption for Julia integration rt_builder.on_thread_start(|| { - // For future Julia integration + #[cfg(feature = "julia")] + { + unsafe { jl_adopt_thread() }; + unsafe { jl_gc_safe_enter() }; + unsafe { jl_gc_disable_finalizers_internal() }; + } }); if config.n_threads > 0 { From d88bb243077d76ded2ce9bf477897bf2823bf91d Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Sun, 7 Sep 2025 20:01:05 +0200 Subject: [PATCH 19/39] fix linker error --- .cargo/config.toml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .cargo/config.toml diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..a31cba7 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,5 @@ +[target.aarch64-apple-darwin] +rustflags = ["-Clink-arg=-undefined","-Clink-arg=dynamic_lookup"] + +[target.x86_64-apple-darwin] +rustflags = ["-Clink-arg=-undefined","-Clink-arg=dynamic_lookup"] \ No newline at end of file From da943766ccb32731c9de79c700e712438f9c84c8 Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Sun, 7 Sep 2025 20:44:16 +0200 Subject: [PATCH 20/39] fix integration test warnings --- include/iceberg_rust_ffi.h | 8 ++++---- run_integration_test.sh | 2 +- tests/integration_test.c | 23 ++++++++++++----------- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/include/iceberg_rust_ffi.h b/include/iceberg_rust_ffi.h index 0281f1b..218243b 100644 --- a/include/iceberg_rust_ffi.h +++ b/include/iceberg_rust_ffi.h @@ -64,7 +64,7 @@ typedef struct { } IcebergBoolResponse; // Callback types -typedef int (*PanicCallback)(); +typedef int (*PanicCallback)(void); typedef int (*ResultCallback)(const void* task); // Runtime initialization @@ -87,17 +87,17 @@ void iceberg_arrow_batch_free(IcebergScan* scan); // Utility functions CResult iceberg_destroy_cstring(char* string); -const char* iceberg_current_metrics(); +const char* iceberg_current_metrics(void); // Context management functions for cancellation support CResult iceberg_cancel_context(const Context* ctx); CResult iceberg_destroy_context(const Context* ctx); // Backward compatibility -const char* iceberg_error_message(); +const char* iceberg_error_message(void); #ifdef __cplusplus } #endif -#endif // ICEBERG_RUST_FFI_H \ No newline at end of file +#endif // ICEBERG_RUST_FFI_H diff --git a/run_integration_test.sh b/run_integration_test.sh index 5b02b1e..c881531 100755 --- a/run_integration_test.sh +++ b/run_integration_test.sh @@ -74,7 +74,7 @@ print_status "Using library from: $LIB_PATH" # Step 2: Build the integration test print_status "Building integration test..." -if gcc -o integration_test tests/integration_test.c -Iinclude -L"$LIB_PATH" -liceberg_rust_ffi -lpthread -ldl -lm; then +if gcc -Wall -Wextra -o integration_test tests/integration_test.c -Iinclude -L"$LIB_PATH" -liceberg_rust_ffi -lpthread -ldl -lm; then print_success "Integration test built successfully" else print_error "Failed to build integration test" diff --git a/tests/integration_test.c b/tests/integration_test.c index 497bb48..2cc5e2a 100644 --- a/tests/integration_test.c +++ b/tests/integration_test.c @@ -5,9 +5,10 @@ #include #include #include +#include // Global function pointers for new async API -static int (*iceberg_init_runtime_func)(IcebergConfig config, int (*panic_callback)(), int (*result_callback)(const void*)) = NULL; +static int (*iceberg_init_runtime_func)(IcebergConfig config, int (*panic_callback)(void), int (*result_callback)(const void*)) = NULL; static int (*iceberg_table_open_func)(const char*, const char*, IcebergTableResponse*, const void*) = NULL; static int (*iceberg_table_scan_func)(IcebergTable*, IcebergScanResponse*, const void*) = NULL; static int (*iceberg_scan_init_stream_func)(IcebergScan*, IcebergBoolResponse*, const void*) = NULL; @@ -24,14 +25,14 @@ static int (*iceberg_destroy_context_func)(const void*) = NULL; static void* lib_handle = NULL; // Callback implementations -int panic_callback() { +static int panic_callback(void) { printf("🚨 Rust panic occurred!\n"); return 1; } volatile int async_completed = 0; -int result_callback(const void* task) { +static int result_callback(const void* task) { (void)task; // Suppress unused parameter warning // Signal that async operation completed async_completed = 1; @@ -39,7 +40,7 @@ int result_callback(const void* task) { } // Function to load the library and resolve symbols -int load_iceberg_library(const char* library_path) { +static int load_iceberg_library(const char* library_path) { printf("Loading Iceberg C API library from %s...\n", library_path); // Try to open the dynamic library @@ -55,7 +56,7 @@ int load_iceberg_library(const char* library_path) { dlerror(); // Resolve function symbols for new async API - iceberg_init_runtime_func = (int (*)(IcebergConfig, int (*)(), int (*)(const void*)))dlsym(lib_handle, "iceberg_init_runtime"); + iceberg_init_runtime_func = (int (*)(IcebergConfig, int (*)(void), int (*)(const void*)))dlsym(lib_handle, "iceberg_init_runtime"); if (!iceberg_init_runtime_func) { fprintf(stderr, "❌ Failed to resolve iceberg_init_runtime: %s\n", dlerror()); return 0; @@ -133,7 +134,7 @@ int load_iceberg_library(const char* library_path) { } // Function to unload the library -void unload_iceberg_library() { +static void unload_iceberg_library(void) { if (lib_handle) { dlclose(lib_handle); lib_handle = NULL; @@ -184,7 +185,7 @@ int main(int argc, char* argv[]) { IcebergTableResponse table_response = {0}; async_completed = 0; // Reset flag - result = iceberg_table_open_func(table_path, metadata_path, &table_response, (const void*)&async_completed); + result = iceberg_table_open_func(table_path, metadata_path, &table_response, (const void*)(uintptr_t)&async_completed); if (result != CRESULT_OK) { printf("❌ Failed to initiate table open operation\n"); @@ -229,7 +230,7 @@ int main(int argc, char* argv[]) { // 3. Create a scan using async API IcebergScanResponse scan_response = {0}; async_completed = 0; // Reset flag - result = iceberg_table_scan_func(table_response.table, &scan_response, (const void*)&async_completed); + result = iceberg_table_scan_func(table_response.table, &scan_response, (const void*)(uintptr_t)&async_completed); if (result != CRESULT_OK) { printf("❌ Failed to initiate scan creation\n"); @@ -279,7 +280,7 @@ int main(int argc, char* argv[]) { printf("Step 1: Initializing stream asynchronously...\n"); IcebergBoolResponse init_response = {0}; async_completed = 0; // Reset flag - result = iceberg_scan_init_stream_func(scan_response.scan, &init_response, (const void*)&async_completed); + result = iceberg_scan_init_stream_func(scan_response.scan, &init_response, (const void*)(uintptr_t)&async_completed); if (result == CRESULT_OK) { // Wait for async operation to complete @@ -315,7 +316,7 @@ int main(int argc, char* argv[]) { printf("Step 2: Getting first batch from stream asynchronously...\n"); IcebergBoolResponse batch_response = {0}; async_completed = 0; // Reset flag - result = iceberg_scan_next_batch_from_stream_func(scan_response.scan, &batch_response, (const void*)&async_completed); + result = iceberg_scan_next_batch_from_stream_func(scan_response.scan, &batch_response, (const void*)(uintptr_t)&async_completed); if (result == CRESULT_OK) { // Wait for batch retrieval to complete @@ -354,7 +355,7 @@ int main(int argc, char* argv[]) { printf("✅ Successfully retrieved batch!\n"); printf("📦 Batch details:\n"); printf(" - Serialized size: %zu bytes\n", batch->length); - printf(" - Data pointer: %p\n", (void*)batch->data); + printf(" - Data pointer: %p\n", (const void*)batch->data); printf(" - First few bytes: "); // Print first 8 bytes as hex for verification From 4a66e3022a0f8d850c35ae9c59602ccf1ebd42c5 Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Sun, 7 Sep 2025 20:45:44 +0200 Subject: [PATCH 21/39] don't use default features for integration test, there's no Julia env there --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index aa7bc28..4f1ce11 100644 --- a/Makefile +++ b/Makefile @@ -20,9 +20,9 @@ all: build test # Generate C header generate-header: @if [ "$(TARGET)" = "local" ]; then \ - cargo build --release; \ + cargo build --release --no-default-features; \ else \ - cargo build --release --target $(TARGET); \ + cargo build --release --no-default-features --target $(TARGET); \ fi # Build the Rust library and generate header From c2d2dca252f9a0d2ecc42ccef6529bf4adf8e8ef Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Mon, 8 Sep 2025 11:08:14 +0200 Subject: [PATCH 22/39] Switch to returning batch from an async call --- include/iceberg_rust_ffi.h | 14 +++-- src/lib.rs | 103 ++++++++++++++++--------------------- tests/integration_test.c | 31 +++++------ 3 files changed, 65 insertions(+), 83 deletions(-) diff --git a/include/iceberg_rust_ffi.h b/include/iceberg_rust_ffi.h index 218243b..ed2e236 100644 --- a/include/iceberg_rust_ffi.h +++ b/include/iceberg_rust_ffi.h @@ -36,8 +36,6 @@ typedef struct { void* table; // Option - opaque void* columns; // Option> - opaque void* stream; // Option<*mut IcebergStream> - opaque - ArrowBatch* current_batch; // Option<*mut ArrowBatch> - bool end_of_stream; // bool } IcebergScan; // Response structures for async operations @@ -63,6 +61,13 @@ typedef struct { const Context* context; } IcebergBoolResponse; +typedef struct { + CResult result; + ArrowBatch* batch; + char* error_message; + const Context* context; +} IcebergBatchResponse; + // Callback types typedef int (*PanicCallback)(void); typedef int (*ResultCallback)(const void* task); @@ -81,9 +86,8 @@ void iceberg_scan_free(IcebergScan* scan); // New simplified async API CResult iceberg_scan_init_stream(IcebergScan* scan, IcebergBoolResponse* response, const void* handle); -CResult iceberg_scan_next_batch_from_stream(IcebergScan* scan, IcebergBoolResponse* response, const void* handle); -ArrowBatch* iceberg_scan_get_current_batch(IcebergScan* scan); -void iceberg_arrow_batch_free(IcebergScan* scan); +CResult iceberg_scan_next_batch(IcebergScan* scan, IcebergBatchResponse* response, const void* handle); +void iceberg_arrow_batch_free(ArrowBatch* batch); // Utility functions CResult iceberg_destroy_cstring(char* string); diff --git a/src/lib.rs b/src/lib.rs index f56382a..6aa11cb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -100,8 +100,6 @@ pub struct IcebergScan { pub table: Option, pub columns: Option>, pub stream: Option<*mut IcebergStream>, - pub current_batch: Option<*mut ArrowBatch>, - pub end_of_stream: bool, } // SAFETY: IcebergScan can be safely sent between threads because: @@ -176,6 +174,35 @@ impl RawResponse for IcebergScanResponse { } } +#[repr(C)] +pub struct IcebergBatchResponse { + result: CResult, + batch: *mut ArrowBatch, + error_message: *mut c_char, + context: *const Context, +} + +unsafe impl Send for IcebergBatchResponse {} + +impl RawResponse for IcebergBatchResponse { + type Payload = *mut ArrowBatch; + fn result_mut(&mut self) -> &mut CResult { + &mut self.result + } + fn context_mut(&mut self) -> &mut *const Context { + &mut self.context + } + fn error_message_mut(&mut self) -> &mut *mut c_char { + &mut self.error_message + } + fn set_payload(&mut self, payload: Option) { + match payload { + Some(batch_ptr) => self.batch = batch_ptr, + None => self.batch = ptr::null_mut(), + } + } +} + // Helper function to create ArrowBatch from RecordBatch // TODO: Switch to zero-copy once Arrow.jl supports C API. fn serialize_record_batch(batch: RecordBatch) -> Result { @@ -323,8 +350,6 @@ export_runtime_op!( table: Some(iceberg_table), columns: None, stream: None, - current_batch: None, - end_of_stream: false, })); Ok::<*mut IcebergScan, anyhow::Error>(scan_ptr) }, @@ -387,8 +412,8 @@ export_runtime_op!( // Async function to get next batch from existing stream export_runtime_op!( - iceberg_scan_next_batch_from_stream, - IcebergBoolResponse, + iceberg_scan_next_batch, + IcebergBatchResponse, || { if scan.is_null() { return Err(anyhow::anyhow!("Null scan pointer provided")); @@ -399,67 +424,35 @@ export_runtime_op!( if let Some(stream_ptr) = scan_ref.stream { tracing::debug!("Found stream pointer: {:?}", stream_ptr); - let scan_ref = unsafe { &mut *(scan as *mut IcebergScan) }; let stream_ref = unsafe { &*stream_ptr }; - Ok((stream_ref, scan_ref)) + Ok(stream_ref) } else { tracing::error!("No stream available in scan"); Err(anyhow::anyhow!("No stream available")) } }, - stream_data, + stream_ref, async { - let (stream_ref, scan_ref) = stream_data; let mut stream_guard = stream_ref.stream.lock().await; - let result = match stream_guard.next().await { + match stream_guard.next().await { Some(Ok(record_batch)) => { let arrow_batch = serialize_record_batch(record_batch)?; let batch_ptr = Box::into_raw(Box::new(arrow_batch)); - (batch_ptr, false) + Ok(batch_ptr) } Some(Err(e)) => return Err(anyhow::anyhow!("Error reading batch: {}", e)), None => { - // End of stream - (ptr::null_mut(), true) + // End of stream - return null pointer + tracing::debug!("End of stream reached, returning null pointer"); + Ok(ptr::null_mut()) } - }; - - // Auto-store the result in scan - let (batch_ptr, end_of_stream) = result; - - if batch_ptr.is_null() { - tracing::debug!("Auto-storing NULL batch pointer - end of stream"); - scan_ref.current_batch = None; - } else { - tracing::info!("Auto-storing batch pointer {:?} in scan", batch_ptr); - scan_ref.current_batch = Some(batch_ptr); } - scan_ref.end_of_stream = end_of_stream; - - // Return only the end_of_stream status - Ok(end_of_stream) }, scan: *mut IcebergScan ); -// Get current batch from scan (returns null if end of stream or no batch) -#[no_mangle] -pub extern "C" fn iceberg_scan_get_current_batch(scan: *mut IcebergScan) -> *mut ArrowBatch { - if scan.is_null() { - return ptr::null_mut(); - } - - let scan_ref = unsafe { &*scan }; - - // If end of stream, return null (no more batches) - if scan_ref.end_of_stream { - return ptr::null_mut(); - } - - scan_ref.current_batch.unwrap_or(ptr::null_mut()) -} // Synchronous operations #[no_mangle] @@ -509,10 +502,6 @@ pub extern "C" fn iceberg_scan_free(scan: *mut IcebergScan) { if !scan.is_null() { unsafe { let scan_ref = Box::from_raw(scan); - // Clean up any current batch - if let Some(batch_ptr) = scan_ref.current_batch { - let _ = Box::from_raw(batch_ptr); - } // Clean up any stream if let Some(stream_ptr) = scan_ref.stream { let _ = Box::from_raw(stream_ptr); @@ -522,19 +511,15 @@ pub extern "C" fn iceberg_scan_free(scan: *mut IcebergScan) { } #[no_mangle] -pub extern "C" fn iceberg_arrow_batch_free(scan: *mut IcebergScan) { - if scan.is_null() { +pub extern "C" fn iceberg_arrow_batch_free(batch: *mut ArrowBatch) { + if batch.is_null() { return; } - let scan_ref = unsafe { &mut *scan }; - - if let Some(batch) = scan_ref.current_batch.take() { - unsafe { - let batch_ref = Box::from_raw(batch); - if !batch_ref.rust_ptr.is_null() { - let _ = Box::from_raw(batch_ref.rust_ptr as *mut Vec); - } + unsafe { + let batch_ref = Box::from_raw(batch); + if !batch_ref.rust_ptr.is_null() { + let _ = Box::from_raw(batch_ref.rust_ptr as *mut Vec); } } } diff --git a/tests/integration_test.c b/tests/integration_test.c index 2cc5e2a..a7e4fbd 100644 --- a/tests/integration_test.c +++ b/tests/integration_test.c @@ -12,11 +12,10 @@ static int (*iceberg_init_runtime_func)(IcebergConfig config, int (*panic_callba static int (*iceberg_table_open_func)(const char*, const char*, IcebergTableResponse*, const void*) = NULL; static int (*iceberg_table_scan_func)(IcebergTable*, IcebergScanResponse*, const void*) = NULL; static int (*iceberg_scan_init_stream_func)(IcebergScan*, IcebergBoolResponse*, const void*) = NULL; -static int (*iceberg_scan_next_batch_from_stream_func)(IcebergScan*, IcebergBoolResponse*, const void*) = NULL; +static int (*iceberg_scan_next_batch_func)(IcebergScan*, IcebergBatchResponse*, const void*) = NULL; static void (*iceberg_table_free_func)(IcebergTable*) = NULL; static void (*iceberg_scan_free_func)(IcebergScan*) = NULL; -static void (*iceberg_arrow_batch_free_func)(IcebergScan*) = NULL; -static ArrowBatch* (*iceberg_scan_get_current_batch_func)(IcebergScan*) = NULL; +static void (*iceberg_arrow_batch_free_func)(ArrowBatch*) = NULL; static int (*iceberg_destroy_cstring_func)(char*) = NULL; static int (*iceberg_cancel_context_func)(const void*) = NULL; static int (*iceberg_destroy_context_func)(const void*) = NULL; @@ -79,9 +78,9 @@ static int load_iceberg_library(const char* library_path) { fprintf(stderr, "❌ Failed to resolve iceberg_scan_init_stream: %s\n", dlerror()); return 0; } - iceberg_scan_next_batch_from_stream_func = (int (*)(IcebergScan*, IcebergBoolResponse*, const void*))dlsym(lib_handle, "iceberg_scan_next_batch_from_stream"); - if (!iceberg_scan_next_batch_from_stream_func) { - fprintf(stderr, "❌ Failed to resolve iceberg_scan_next_batch_from_stream: %s\n", dlerror()); + iceberg_scan_next_batch_func = (int (*)(IcebergScan*, IcebergBatchResponse*, const void*))dlsym(lib_handle, "iceberg_scan_next_batch"); + if (!iceberg_scan_next_batch_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_scan_next_batch: %s\n", dlerror()); return 0; } @@ -99,18 +98,12 @@ static int load_iceberg_library(const char* library_path) { return 0; } - iceberg_arrow_batch_free_func = (void (*)(IcebergScan*))dlsym(lib_handle, "iceberg_arrow_batch_free"); + iceberg_arrow_batch_free_func = (void (*)(ArrowBatch*))dlsym(lib_handle, "iceberg_arrow_batch_free"); if (!iceberg_arrow_batch_free_func) { fprintf(stderr, "❌ Failed to resolve iceberg_arrow_batch_free: %s\n", dlerror()); return 0; } - iceberg_scan_get_current_batch_func = (ArrowBatch* (*)(IcebergScan*))dlsym(lib_handle, "iceberg_scan_get_current_batch"); - if (!iceberg_scan_get_current_batch_func) { - fprintf(stderr, "❌ Failed to resolve iceberg_scan_get_current_batch: %s\n", dlerror()); - return 0; - } - iceberg_destroy_cstring_func = (int (*)(char*))dlsym(lib_handle, "iceberg_destroy_cstring"); if (!iceberg_destroy_cstring_func) { fprintf(stderr, "❌ Failed to resolve iceberg_destroy_cstring: %s\n", dlerror()); @@ -314,9 +307,9 @@ int main(int argc, char* argv[]) { printf("✅ Stream initialized successfully\n"); printf("Step 2: Getting first batch from stream asynchronously...\n"); - IcebergBoolResponse batch_response = {0}; + IcebergBatchResponse batch_response = {0}; async_completed = 0; // Reset flag - result = iceberg_scan_next_batch_from_stream_func(scan_response.scan, &batch_response, (const void*)(uintptr_t)&async_completed); + result = iceberg_scan_next_batch_func(scan_response.scan, &batch_response, (const void*)(uintptr_t)&async_completed); if (result == CRESULT_OK) { // Wait for batch retrieval to complete @@ -347,9 +340,9 @@ int main(int argc, char* argv[]) { return 1; } - printf("Step 3: Retrieving stored batch from scan...\n"); + printf("Step 3: Checking batch result...\n"); - ArrowBatch* batch = iceberg_scan_get_current_batch_func(scan_response.scan); + ArrowBatch* batch = batch_response.batch; if (batch) { printf("✅ Successfully retrieved batch!\n"); @@ -366,8 +359,8 @@ int main(int argc, char* argv[]) { printf("\n"); printf(" → Arrow IPC bytes ready for Julia Arrow.Stream()\n"); - // Free the batch from the scan (clears the pointer and deallocates) - iceberg_arrow_batch_free_func(scan_response.scan); + // Free the batch directly + iceberg_arrow_batch_free_func(batch); } else { printf("✅ Reached end of stream (no more batches)\n"); } From a03ff377ce7cc250e6e64429e5cdeefcec19612c Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Mon, 8 Sep 2025 11:22:02 +0200 Subject: [PATCH 23/39] remove unused bool field --- include/iceberg_rust_ffi.h | 5 ++--- src/lib.rs | 21 ++++++++++----------- tests/integration_test.c | 6 +++--- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/include/iceberg_rust_ffi.h b/include/iceberg_rust_ffi.h index ed2e236..606f1e9 100644 --- a/include/iceberg_rust_ffi.h +++ b/include/iceberg_rust_ffi.h @@ -56,10 +56,9 @@ typedef struct { typedef struct { CResult result; - bool success; char* error_message; const Context* context; -} IcebergBoolResponse; +} IcebergResponse; typedef struct { CResult result; @@ -85,7 +84,7 @@ CResult iceberg_scan_select_columns(IcebergScan* scan, const char** column_names void iceberg_scan_free(IcebergScan* scan); // New simplified async API -CResult iceberg_scan_init_stream(IcebergScan* scan, IcebergBoolResponse* response, const void* handle); +CResult iceberg_scan_init_stream(IcebergScan* scan, IcebergResponse* response, const void* handle); CResult iceberg_scan_next_batch(IcebergScan* scan, IcebergBatchResponse* response, const void* handle); void iceberg_arrow_batch_free(ArrowBatch* batch); diff --git a/src/lib.rs b/src/lib.rs index 6aa11cb..0fa3b04 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -36,19 +36,18 @@ pub struct IcebergStream { } unsafe impl Send for IcebergStream {} -// Unified response type for operations that return a boolean status +// Simple response type for operations that only need success/failure status #[repr(C)] -pub struct IcebergBoolResponse { +pub struct IcebergResponse { result: CResult, - success: bool, error_message: *mut c_char, context: *const Context, } -unsafe impl Send for IcebergBoolResponse {} +unsafe impl Send for IcebergResponse {} -impl RawResponse for IcebergBoolResponse { - type Payload = bool; +impl RawResponse for IcebergResponse { + type Payload = (); fn result_mut(&mut self) -> &mut CResult { &mut self.result @@ -62,8 +61,8 @@ impl RawResponse for IcebergBoolResponse { &mut self.error_message } - fn set_payload(&mut self, payload: Option) { - self.success = payload.unwrap_or(false); + fn set_payload(&mut self, _payload: Option) { + // No payload for simple response } } @@ -359,7 +358,7 @@ export_runtime_op!( // Async function to initialize stream without getting first batch export_runtime_op!( iceberg_scan_init_stream, - IcebergBoolResponse, + IcebergResponse, || { if scan.is_null() { return Err(anyhow::anyhow!("Null scan pointer provided")); @@ -404,8 +403,8 @@ export_runtime_op!( // Store stream in scan scan_ref.stream = Some(stream_ptr); - // Return success flag - Ok::(true) + // Return success (no payload needed) + Ok::<(), anyhow::Error>(()) }, scan: *mut IcebergScan ); diff --git a/tests/integration_test.c b/tests/integration_test.c index a7e4fbd..afc3f37 100644 --- a/tests/integration_test.c +++ b/tests/integration_test.c @@ -11,7 +11,7 @@ static int (*iceberg_init_runtime_func)(IcebergConfig config, int (*panic_callback)(void), int (*result_callback)(const void*)) = NULL; static int (*iceberg_table_open_func)(const char*, const char*, IcebergTableResponse*, const void*) = NULL; static int (*iceberg_table_scan_func)(IcebergTable*, IcebergScanResponse*, const void*) = NULL; -static int (*iceberg_scan_init_stream_func)(IcebergScan*, IcebergBoolResponse*, const void*) = NULL; +static int (*iceberg_scan_init_stream_func)(IcebergScan*, IcebergResponse*, const void*) = NULL; static int (*iceberg_scan_next_batch_func)(IcebergScan*, IcebergBatchResponse*, const void*) = NULL; static void (*iceberg_table_free_func)(IcebergTable*) = NULL; static void (*iceberg_scan_free_func)(IcebergScan*) = NULL; @@ -73,7 +73,7 @@ static int load_iceberg_library(const char* library_path) { return 0; } - iceberg_scan_init_stream_func = (int (*)(IcebergScan*, IcebergBoolResponse*, const void*))dlsym(lib_handle, "iceberg_scan_init_stream"); + iceberg_scan_init_stream_func = (int (*)(IcebergScan*, IcebergResponse*, const void*))dlsym(lib_handle, "iceberg_scan_init_stream"); if (!iceberg_scan_init_stream_func) { fprintf(stderr, "❌ Failed to resolve iceberg_scan_init_stream: %s\n", dlerror()); return 0; @@ -271,7 +271,7 @@ int main(int argc, char* argv[]) { // 4. Try to get a batch using new two-step async API printf("Step 1: Initializing stream asynchronously...\n"); - IcebergBoolResponse init_response = {0}; + IcebergResponse init_response = {0}; async_completed = 0; // Reset flag result = iceberg_scan_init_stream_func(scan_response.scan, &init_response, (const void*)(uintptr_t)&async_completed); From 92871b7753612d188482339fd19c36b099c4f9b5 Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Mon, 8 Sep 2025 12:57:07 +0200 Subject: [PATCH 24/39] fix fmt --- src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 0fa3b04..c7b89ee 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -452,7 +452,6 @@ export_runtime_op!( scan: *mut IcebergScan ); - // Synchronous operations #[no_mangle] pub extern "C" fn iceberg_table_free(table: *mut IcebergTable) { From f10d52643dc25aee9459246f6a5f3a4326bd4e2b Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Tue, 9 Sep 2025 15:25:31 +0200 Subject: [PATCH 25/39] remove unused cbindgen --- Makefile | 10 +++------- cbindgen.toml | 33 --------------------------------- src/lib.rs | 4 ---- 3 files changed, 3 insertions(+), 44 deletions(-) delete mode 100644 cbindgen.toml diff --git a/Makefile b/Makefile index 4f1ce11..8e51903 100644 --- a/Makefile +++ b/Makefile @@ -17,17 +17,14 @@ TARGET = local # Default target all: build test -# Generate C header -generate-header: +# Build the Rust library +build-lib: @if [ "$(TARGET)" = "local" ]; then \ cargo build --release --no-default-features; \ else \ cargo build --release --no-default-features --target $(TARGET); \ fi -# Build the Rust library and generate header -build-lib: generate-header - # Build the integration test build-test: build-lib $(CC) $(CFLAGS) -o $(TEST_NAME) $(TEST_SOURCE) $(LDFLAGS) @@ -67,7 +64,6 @@ clean-all: clean help: @echo "Available targets:" @echo " all - Build and run integration test" - @echo " generate-header - Generate C header file using cbindgen" @echo " build-lib - Build only the Rust library" @echo " build-test - Build the integration test (requires library)" @echo " build - Build everything" @@ -78,4 +74,4 @@ help: @echo " clean-all - Clean everything including target directory" @echo " help - Show this help message" -.PHONY: all generate-header build-lib build-test build test clean clean-all help stop-containers run-containers \ No newline at end of file +.PHONY: all build-lib build-test build test clean clean-all help stop-containers run-containers \ No newline at end of file diff --git a/cbindgen.toml b/cbindgen.toml deleted file mode 100644 index a1d446e..0000000 --- a/cbindgen.toml +++ /dev/null @@ -1,33 +0,0 @@ -language = "C" -include_guard = "ICEBERG_RUST_FFI_H" -autogen_warning = "// This file is auto-generated by cbindgen. Do not edit manually." -tab_width = 4 -documentation = true -documentation_style = "doxy" -line_length = 100 -cpp_compat = false -no_includes = true - -[export] -prefix = "iceberg_" -include = ["IcebergTable", "IcebergScan", "ArrowBatch", "IcebergResult"] -rename = { - "IcebergTable" = "IcebergTable", - "IcebergScan" = "IcebergScan", - "ArrowBatch" = "ArrowBatch", - "IcebergResult" = "IcebergResult" -} - -[defines] -"target_os = \"macos\"" = "TARGET_OS_MACOS" -"target_os = \"linux\"" = "TARGET_OS_LINUX" -"target_os = \"windows\"" = "TARGET_OS_WINDOWS" - -[parse] -parse_deps = true -include = ["iceberg_rust_ffi"] -extra_bindings = [] - -[header] -prefix = "#ifndef ICEBERG_RUST_FFI_H\n#define ICEBERG_RUST_FFI_H\n\n#include \n#include \n#include \n\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n\n" -suffix = "\n#ifdef __cplusplus\n}\n#endif\n\n#endif // ICEBERG_RUST_FFI_H\n" \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index c7b89ee..3e4e684 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -66,10 +66,6 @@ impl RawResponse for IcebergResponse { } } -// cbindgen annotations -#[allow(non_camel_case_types)] -#[allow(non_snake_case)] - // Callback types for Julia integration type PanicCallback = unsafe extern "C" fn() -> i32; From 1dbb8aaa33b3ba0740c6011cde02eea2876231d0 Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Tue, 9 Sep 2025 17:02:25 +0200 Subject: [PATCH 26/39] . --- Cargo.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 097ea5a..962c6e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,9 +7,6 @@ edition = "2021" name = "iceberg_rust_ffi" crate-type = ["cdylib"] -[build-dependencies] -cbindgen = "0.26" - [features] default = ["julia"] julia = [] From 904459610831b81ad7c6230c48a39509399f7c47 Mon Sep 17 00:00:00 2001 From: Vukasin Stefanovic Date: Tue, 9 Sep 2025 17:02:35 +0200 Subject: [PATCH 27/39] minor --- src/lib.rs | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 3e4e684..00fafc7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,14 +28,6 @@ extern "C" { fn jl_gc_disable_finalizers_internal() -> c_void; } -// Stream wrapper for FFI - using async mutex to avoid blocking calls -#[repr(C)] -pub struct IcebergStream { - pub stream: - AsyncMutex>>, -} -unsafe impl Send for IcebergStream {} - // Simple response type for operations that only need success/failure status #[repr(C)] pub struct IcebergResponse { @@ -90,6 +82,15 @@ pub struct IcebergTable { pub table: iceberg::table::Table, } +// Stream wrapper for FFI - using async mutex to avoid blocking calls +#[repr(C)] +pub struct IcebergStream { + pub stream: + AsyncMutex>>, +} + +unsafe impl Send for IcebergStream {} + #[repr(C)] pub struct IcebergScan { pub table: Option, @@ -428,7 +429,6 @@ export_runtime_op!( }, stream_ref, async { - let mut stream_guard = stream_ref.stream.lock().await; match stream_guard.next().await { @@ -498,7 +498,8 @@ pub extern "C" fn iceberg_scan_free(scan: *mut IcebergScan) { let scan_ref = Box::from_raw(scan); // Clean up any stream if let Some(stream_ptr) = scan_ref.stream { - let _ = Box::from_raw(stream_ptr); + let stream = Box::from_raw(stream_ptr); + drop(stream); } } } From ee328f486d9140c833fe5197ea10235cf2b7ba6d Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 11 Sep 2025 11:08:49 +0200 Subject: [PATCH 28/39] Refactorings --- Cargo.lock | 142 ++---------------- include/iceberg_rust_ffi.h | 37 +++-- src/lib.rs | 287 +++++++++++++++++-------------------- tests/integration_test.c | 175 +++++++++++----------- 4 files changed, 258 insertions(+), 383 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 29efd68..81b7a24 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -341,17 +341,6 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi 0.1.19", - "libc", - "winapi", -] - [[package]] name = "autocfg" version = "1.5.0" @@ -421,12 +410,6 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "230c5f1ca6a325a32553f8640d31ac9b49f2411e901e427570154868b46da4f7" -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - [[package]] name = "bitflags" version = "2.9.1" @@ -544,25 +527,6 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" -[[package]] -name = "cbindgen" -version = "0.26.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da6bc11b07529f16944307272d5bd9b22530bc7d05751717c9d416586cedab49" -dependencies = [ - "clap", - "heck 0.4.1", - "indexmap 1.9.3", - "log", - "proc-macro2", - "quote", - "serde", - "serde_json", - "syn 1.0.109", - "tempfile", - "toml", -] - [[package]] name = "cc" version = "1.2.30" @@ -601,30 +565,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "clap" -version = "3.2.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" -dependencies = [ - "atty", - "bitflags 1.3.2", - "clap_lex", - "indexmap 1.9.3", - "strsim 0.10.0", - "termcolor", - "textwrap", -] - -[[package]] -name = "clap_lex" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" -dependencies = [ - "os_str_bytes", -] - [[package]] name = "cmake" version = "0.1.54" @@ -809,7 +749,7 @@ dependencies = [ "ident_case", "proc-macro2", "quote", - "strsim 0.11.1", + "strsim", "syn 2.0.104", ] @@ -939,7 +879,7 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", "syn 2.0.104", @@ -1010,7 +950,7 @@ version = "25.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1" dependencies = [ - "bitflags 2.9.1", + "bitflags", "rustc_version", ] @@ -1287,27 +1227,12 @@ version = "0.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - [[package]] name = "hermit-abi" version = "0.5.2" @@ -1577,7 +1502,6 @@ dependencies = [ "anyhow", "arrow-array", "arrow-ipc", - "cbindgen", "futures", "iceberg", "libc", @@ -1745,7 +1669,7 @@ version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4" dependencies = [ - "bitflags 2.9.1", + "bitflags", "cfg-if", "libc", ] @@ -2226,7 +2150,7 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" dependencies = [ - "hermit-abi 0.5.2", + "hermit-abi", "libc", ] @@ -2353,7 +2277,7 @@ version = "0.10.73" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8" dependencies = [ - "bitflags 2.9.1", + "bitflags", "cfg-if", "foreign-types", "libc", @@ -2429,12 +2353,6 @@ dependencies = [ "hashbrown 0.14.5", ] -[[package]] -name = "os_str_bytes" -version = "6.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" - [[package]] name = "overload" version = "0.1.1" @@ -2805,7 +2723,7 @@ version = "11.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186" dependencies = [ - "bitflags 2.9.1", + "bitflags", ] [[package]] @@ -2814,7 +2732,7 @@ version = "0.5.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" dependencies = [ - "bitflags 2.9.1", + "bitflags", ] [[package]] @@ -3088,7 +3006,7 @@ version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" dependencies = [ - "bitflags 2.9.1", + "bitflags", "errno", "libc", "linux-raw-sys", @@ -3220,7 +3138,7 @@ version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80fb1d92c5028aa318b4b8bd7302a5bfcf48be96a37fc6fc790f806b0004ee0c" dependencies = [ - "bitflags 2.9.1", + "bitflags", "core-foundation", "core-foundation-sys", "libc", @@ -3440,7 +3358,7 @@ version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", "syn 2.0.104", @@ -3493,12 +3411,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" -[[package]] -name = "strsim" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" - [[package]] name = "strsim" version = "0.11.1" @@ -3526,7 +3438,7 @@ version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", "rustversion", @@ -3539,7 +3451,7 @@ version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", "syn 2.0.104", @@ -3618,21 +3530,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "termcolor" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "textwrap" -version = "0.16.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057" - [[package]] name = "thiserror" version = "1.0.69" @@ -3823,15 +3720,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "toml" -version = "0.5.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" -dependencies = [ - "serde", -] - [[package]] name = "toml_datetime" version = "0.6.11" @@ -3870,7 +3758,7 @@ version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" dependencies = [ - "bitflags 2.9.1", + "bitflags", "bytes", "futures-util", "http", @@ -4614,7 +4502,7 @@ version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" dependencies = [ - "bitflags 2.9.1", + "bitflags", ] [[package]] diff --git a/include/iceberg_rust_ffi.h b/include/iceberg_rust_ffi.h index 606f1e9..ab48b64 100644 --- a/include/iceberg_rust_ffi.h +++ b/include/iceberg_rust_ffi.h @@ -31,13 +31,6 @@ typedef struct { void* rust_ptr; } ArrowBatch; -// IcebergScan structure - now exposed for direct field access -typedef struct { - void* table; // Option - opaque - void* columns; // Option> - opaque - void* stream; // Option<*mut IcebergStream> - opaque -} IcebergScan; - // Response structures for async operations typedef struct { CResult result; @@ -46,6 +39,9 @@ typedef struct { const Context* context; } IcebergTableResponse; +typedef struct IcebergScanBuilder IcebergScanBuilder; +typedef struct IcebergScan IcebergScan; + typedef struct { CResult result; IcebergScan* scan; @@ -53,6 +49,16 @@ typedef struct { const Context* context; } IcebergScanResponse; +typedef struct { + void *stream; +} IcebergStream; + +typedef struct { + CResult result; + IcebergStream* stream; + char* error_message; + const Context* context; +} IcebergStreamResponse; typedef struct { CResult result; @@ -76,16 +82,21 @@ CResult iceberg_init_runtime(IcebergConfig config, PanicCallback panic_callback, // Async table operations CResult iceberg_table_open(const char* table_path, const char* metadata_path, IcebergTableResponse* response, const void* handle); -void iceberg_table_free(IcebergTable* table); +void iceberg_free(IcebergTable* table); -// Async scan operations -CResult iceberg_table_scan(IcebergTable* table, IcebergScanResponse* response, const void* handle); -CResult iceberg_scan_select_columns(IcebergScan* scan, const char** column_names, size_t num_columns); +// Scan creation is synchronous +IcebergScanBuilder* iceberg_scan_builder(IcebergTable* table); +IcebergScanBuilder* iceberg_select_columns(IcebergScanBuilder* scan, const char** column_names, size_t num_columns); +IcebergScan* iceberg_scan(IcebergScanBuilder* builder); void iceberg_scan_free(IcebergScan* scan); +void iceberg_scan_builder_free(IcebergScanBuilder* builder); + +// Async scan operations // New simplified async API -CResult iceberg_scan_init_stream(IcebergScan* scan, IcebergResponse* response, const void* handle); -CResult iceberg_scan_next_batch(IcebergScan* scan, IcebergBatchResponse* response, const void* handle); +CResult iceberg_stream(IcebergScan* scan, IcebergStreamResponse* response, const void* handle); +CResult iceberg_next_batch(IcebergStream* stream, IcebergBatchResponse* response, const void* handle); +void iceberg_stream_free(IcebergStream* stream); void iceberg_arrow_batch_free(ArrowBatch* batch); // Utility functions diff --git a/src/lib.rs b/src/lib.rs index 00fafc7..401be52 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +use futures::TryStreamExt; use std::ffi::{c_char, c_void, CStr}; use std::ptr; use tokio::sync::Mutex as AsyncMutex; @@ -5,8 +6,8 @@ use tokio::sync::Mutex as AsyncMutex; use anyhow::Result; use arrow_array::RecordBatch; use arrow_ipc::writer::StreamWriter; -use futures::stream::StreamExt; use iceberg::io::FileIOBuilder; +use iceberg::scan::{TableScan, TableScanBuilder}; use iceberg::table::StaticTable; use iceberg::TableIdent; @@ -82,28 +83,27 @@ pub struct IcebergTable { pub table: iceberg::table::Table, } -// Stream wrapper for FFI - using async mutex to avoid blocking calls #[repr(C)] -pub struct IcebergStream { - pub stream: - AsyncMutex>>, +pub struct IcebergScanBuilder { + pub builder: TableScanBuilder<'static>, } -unsafe impl Send for IcebergStream {} - #[repr(C)] pub struct IcebergScan { - pub table: Option, - pub columns: Option>, - pub stream: Option<*mut IcebergStream>, + pub scan: TableScan, } -// SAFETY: IcebergScan can be safely sent between threads because: -// - table: iceberg::table::Table is Send -// - columns: Vec is Send -// - stream, current_batch: raw pointers are Send by our design (we control access) unsafe impl Send for IcebergScan {} +// Stream wrapper for FFI - using async mutex to avoid blocking calls +#[repr(C)] +pub struct IcebergStream { + pub stream: + AsyncMutex>>, +} + +unsafe impl Send for IcebergStream {} + #[repr(C)] pub struct ArrowBatch { pub data: *const u8, @@ -123,7 +123,7 @@ pub struct IcebergTableResponse { unsafe impl Send for IcebergTableResponse {} impl RawResponse for IcebergTableResponse { - type Payload = *mut IcebergTable; + type Payload = IcebergTable; fn result_mut(&mut self) -> &mut CResult { &mut self.result } @@ -135,37 +135,46 @@ impl RawResponse for IcebergTableResponse { } fn set_payload(&mut self, payload: Option) { match payload { - Some(table_ptr) => self.table = table_ptr, + Some(table) => { + let table_ptr = Box::into_raw(Box::new(table)); + self.table = table_ptr; + } None => self.table = ptr::null_mut(), } } } #[repr(C)] -pub struct IcebergScanResponse { +pub struct IcebergStreamResponse { result: CResult, - scan: *mut IcebergScan, + stream: *mut IcebergStream, error_message: *mut c_char, context: *const Context, } -unsafe impl Send for IcebergScanResponse {} +unsafe impl Send for IcebergStreamResponse {} + +impl RawResponse for IcebergStreamResponse { + type Payload = IcebergStream; -impl RawResponse for IcebergScanResponse { - type Payload = *mut IcebergScan; fn result_mut(&mut self) -> &mut CResult { &mut self.result } + fn context_mut(&mut self) -> &mut *const Context { &mut self.context } + fn error_message_mut(&mut self) -> &mut *mut c_char { &mut self.error_message } + fn set_payload(&mut self, payload: Option) { match payload { - Some(scan_ptr) => self.scan = scan_ptr, - None => self.scan = ptr::null_mut(), + Some(stream) => { + self.stream = Box::into_raw(Box::new(stream)); + } + None => self.stream = ptr::null_mut(), } } } @@ -181,7 +190,7 @@ pub struct IcebergBatchResponse { unsafe impl Send for IcebergBatchResponse {} impl RawResponse for IcebergBatchResponse { - type Payload = *mut ArrowBatch; + type Payload = Option; fn result_mut(&mut self) -> &mut CResult { &mut self.result } @@ -192,8 +201,8 @@ impl RawResponse for IcebergBatchResponse { &mut self.error_message } fn set_payload(&mut self, payload: Option) { - match payload { - Some(batch_ptr) => self.batch = batch_ptr, + match payload.flatten() { + Some(batch) => self.batch = Box::into_raw(Box::new(batch)), None => self.batch = ptr::null_mut(), } } @@ -317,135 +326,123 @@ export_runtime_op!( StaticTable::from_metadata_file(&full_metadata_path, table_ident, file_io).await?; tracing::info!("Successfully loaded static table, converting to table"); - let iceberg_table = static_table.into_table(); - - let table_ptr = Box::into_raw(Box::new(IcebergTable { - table: iceberg_table, - })); - - Ok::<*mut IcebergTable, anyhow::Error>(table_ptr) + Ok::(IcebergTable { table: static_table.into_table() }) }, table_path: *const c_char, metadata_path: *const c_char ); -// Use export_runtime_op! macro for scan creation -export_runtime_op!( - iceberg_table_scan, - IcebergScanResponse, - || { - if table.is_null() { - return Err(anyhow::anyhow!("Null table pointer provided")); +#[no_mangle] +pub extern "C" fn iceberg_scan_builder(table: *mut IcebergTable) -> *mut IcebergScanBuilder { + if table.is_null() { + return ptr::null_mut(); + } + let table_ref = unsafe { &*table }; + let scan_builder = table_ref.table.scan(); + return Box::into_raw(Box::new(IcebergScanBuilder { + builder: scan_builder, + })); +} + +#[no_mangle] +pub extern "C" fn iceberg_select_columns( + builder: *mut IcebergScanBuilder, + column_names: *const *const c_char, + num_columns: usize, +) -> *mut IcebergScanBuilder { + if builder.is_null() || column_names.is_null() { + return ptr::null_mut(); + } + + let mut columns = Vec::new(); + + for i in 0..num_columns { + let col_ptr = unsafe { *column_names.add(i) }; + if col_ptr.is_null() { + return ptr::null_mut(); } - let table_ref = unsafe { &*table }; - Ok(table_ref.table.clone()) - }, - iceberg_table, - async { - let scan_ptr = Box::into_raw(Box::new(IcebergScan { - table: Some(iceberg_table), - columns: None, - stream: None, - })); - Ok::<*mut IcebergScan, anyhow::Error>(scan_ptr) - }, - table: *mut IcebergTable -); -// Async function to initialize stream without getting first batch + let col_str = unsafe { + match CStr::from_ptr(col_ptr).to_str() { + Ok(s) => s, + Err(_) => return ptr::null_mut(), + } + }; + columns.push(col_str.to_string()); + } + + let builder = unsafe { Box::from_raw(builder).builder }; + + Box::into_raw(Box::new(IcebergScanBuilder { + builder: builder.select(columns), + })) +} + +#[no_mangle] +pub extern "C" fn iceberg_scan(builder: *mut IcebergScanBuilder) -> *mut IcebergScan { + if builder.is_null() { + return ptr::null_mut(); + } + let builder = unsafe { Box::from_raw(builder).builder }; + match builder.build() { + Ok(table_scan) => { + let scan_ptr = Box::into_raw(Box::new(IcebergScan { scan: table_scan })); + scan_ptr + } + Err(_) => ptr::null_mut(), + } +} + +// Async function to initialize stream from a table scan without getting first batch export_runtime_op!( - iceberg_scan_init_stream, - IcebergResponse, + iceberg_stream, + IcebergStreamResponse, || { if scan.is_null() { return Err(anyhow::anyhow!("Null scan pointer provided")); } - let scan_ref = unsafe { &*scan }; - - // Only initialize if we don't already have a stream - if scan_ref.stream.is_some() { - return Err(anyhow::anyhow!("Stream already exists")); - } - - if let Some(table) = &scan_ref.table { - let columns = scan_ref.columns.clone(); - let table_clone = table.clone(); - let scan_ref = unsafe { &mut *(scan as *mut IcebergScan) }; - Ok((table_clone, columns, scan_ref)) - } else { - Err(anyhow::anyhow!("Table not available")) - } + let scan_ref = unsafe { &((*scan).scan) }; + return Ok(scan_ref) }, - scan_data, + scan_ref, async { - let (table, columns, scan_ref) = scan_data; - - // Create new stream but don't get first batch - let mut scan_builder = table.scan(); - if let Some(cols) = columns { - scan_builder = scan_builder.select(cols); - } - - let table_scan = scan_builder.build()?; - let stream = table_scan.to_arrow().await?; - - // Create stream wrapper - let iceberg_stream = Box::new(IcebergStream { + let stream = scan_ref.to_arrow().await?; + Ok::(IcebergStream { stream: AsyncMutex::new(stream), - }); - let stream_ptr = Box::into_raw(iceberg_stream); - - tracing::info!("Created stream pointer successfully: {:?}", stream_ptr); - - // Store stream in scan - scan_ref.stream = Some(stream_ptr); - - // Return success (no payload needed) - Ok::<(), anyhow::Error>(()) + }) }, scan: *mut IcebergScan ); // Async function to get next batch from existing stream export_runtime_op!( - iceberg_scan_next_batch, + iceberg_next_batch, IcebergBatchResponse, || { - if scan.is_null() { - return Err(anyhow::anyhow!("Null scan pointer provided")); - } - let scan_ref = unsafe { &*scan }; - - tracing::debug!("Checking for stream in scan, current stream pointer: {:?}", scan_ref.stream); - - if let Some(stream_ptr) = scan_ref.stream { - tracing::debug!("Found stream pointer: {:?}", stream_ptr); - let stream_ref = unsafe { &*stream_ptr }; - Ok(stream_ref) - } else { - tracing::error!("No stream available in scan"); - Err(anyhow::anyhow!("No stream available")) + if stream.is_null() { + return Err(anyhow::anyhow!("Null stream pointer provided")); } + let stream_ref = unsafe { &*stream }; + Ok(stream_ref) }, stream_ref, async { let mut stream_guard = stream_ref.stream.lock().await; - match stream_guard.next().await { - Some(Ok(record_batch)) => { + match stream_guard.try_next().await { + Ok(Some(record_batch)) => { let arrow_batch = serialize_record_batch(record_batch)?; - let batch_ptr = Box::into_raw(Box::new(arrow_batch)); - Ok(batch_ptr) + Ok(Some(arrow_batch)) } - Some(Err(e)) => return Err(anyhow::anyhow!("Error reading batch: {}", e)), - None => { - // End of stream - return null pointer - tracing::debug!("End of stream reached, returning null pointer"); - Ok(ptr::null_mut()) + Ok(None) => { + // End of stream + tracing::debug!("End of stream reached"); + Ok(None) } + Err(e) => Err(anyhow::anyhow!("Error reading batch: {}", e)), } }, - scan: *mut IcebergScan + stream: *mut IcebergStream ); // Synchronous operations @@ -459,48 +456,28 @@ pub extern "C" fn iceberg_table_free(table: *mut IcebergTable) { } #[no_mangle] -pub extern "C" fn iceberg_scan_select_columns( - scan: *mut IcebergScan, - column_names: *const *const c_char, - num_columns: usize, -) -> CResult { - if scan.is_null() || column_names.is_null() { - return CResult::Error; +pub extern "C" fn iceberg_scan_free(scan: *mut IcebergScan) { + if !scan.is_null() { + unsafe { + let _ = Box::from_raw(scan); + } } +} - let scan_ref = unsafe { &mut *scan }; - let mut columns = Vec::new(); - - for i in 0..num_columns { - let col_ptr = unsafe { *column_names.add(i) }; - if col_ptr.is_null() { - return CResult::Error; +#[no_mangle] +pub extern "C" fn iceberg_scan_builder_free(builder: *mut IcebergScanBuilder) { + if !builder.is_null() { + unsafe { + let _ = Box::from_raw(builder); } - - let col_str = unsafe { - match CStr::from_ptr(col_ptr).to_str() { - Ok(s) => s, - Err(_) => return CResult::Error, - } - }; - - columns.push(col_str.to_string()); } - - scan_ref.columns = Some(columns); - CResult::Ok } #[no_mangle] -pub extern "C" fn iceberg_scan_free(scan: *mut IcebergScan) { - if !scan.is_null() { +pub extern "C" fn iceberg_stream_free(stream: *mut IcebergStream) { + if !stream.is_null() { unsafe { - let scan_ref = Box::from_raw(scan); - // Clean up any stream - if let Some(stream_ptr) = scan_ref.stream { - let stream = Box::from_raw(stream_ptr); - drop(stream); - } + let _ = Box::from_raw(stream); } } } diff --git a/tests/integration_test.c b/tests/integration_test.c index 79f1ffb..86dc6df 100644 --- a/tests/integration_test.c +++ b/tests/integration_test.c @@ -10,11 +10,14 @@ // Global function pointers for new async API static int (*iceberg_init_runtime_func)(IcebergConfig config, int (*panic_callback)(void), int (*result_callback)(const void*)) = NULL; static int (*iceberg_table_open_func)(const char*, const char*, IcebergTableResponse*, const void*) = NULL; -static int (*iceberg_table_scan_func)(IcebergTable*, IcebergScanResponse*, const void*) = NULL; -static int (*iceberg_scan_init_stream_func)(IcebergScan*, IcebergResponse*, const void*) = NULL; -static int (*iceberg_scan_next_batch_func)(IcebergScan*, IcebergBatchResponse*, const void*) = NULL; +static IcebergScan* (*iceberg_scan_func)(IcebergScanBuilder*) = NULL; +static IcebergScanBuilder* (*iceberg_scan_builder_func)(IcebergTable*) = NULL; +static int (*iceberg_stream_func)(IcebergScan*, IcebergStreamResponse*, const void*) = NULL; +static int (*iceberg_next_batch_func)(IcebergStream*, IcebergBatchResponse*, const void*) = NULL; static void (*iceberg_table_free_func)(IcebergTable*) = NULL; static void (*iceberg_scan_free_func)(IcebergScan*) = NULL; +static void (*iceberg_scan_builder_free_func)(IcebergScanBuilder*) = NULL; +static void (*iceberg_stream_free_func)(IcebergStream*) = NULL; static void (*iceberg_arrow_batch_free_func)(ArrowBatch*) = NULL; static int (*iceberg_destroy_cstring_func)(char*) = NULL; static int (*iceberg_cancel_context_func)(const void*) = NULL; @@ -67,24 +70,34 @@ static int load_iceberg_library(const char* library_path) { return 0; } - iceberg_table_scan_func = (int (*)(IcebergTable*, IcebergScanResponse*, const void*))dlsym(lib_handle, "iceberg_table_scan"); - if (!iceberg_table_scan_func) { - fprintf(stderr, "❌ Failed to resolve iceberg_table_scan: %s\n", dlerror()); + iceberg_scan_func = (IcebergScan* (*)(IcebergScanBuilder*))dlsym(lib_handle, "iceberg_scan"); + if (!iceberg_scan_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_scan: %s\n", dlerror()); return 0; } - iceberg_scan_init_stream_func = (int (*)(IcebergScan*, IcebergResponse*, const void*))dlsym(lib_handle, "iceberg_scan_init_stream"); - if (!iceberg_scan_init_stream_func) { - fprintf(stderr, "❌ Failed to resolve iceberg_scan_init_stream: %s\n", dlerror()); + iceberg_scan_builder_func = (IcebergScanBuilder* (*)(IcebergTable*))dlsym(lib_handle, "iceberg_scan_builder"); + if (!iceberg_scan_builder_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_scan_builder: %s\n", dlerror()); return 0; } - iceberg_scan_next_batch_func = (int (*)(IcebergScan*, IcebergBatchResponse*, const void*))dlsym(lib_handle, "iceberg_scan_next_batch"); - if (!iceberg_scan_next_batch_func) { - fprintf(stderr, "❌ Failed to resolve iceberg_scan_next_batch: %s\n", dlerror()); + + iceberg_scan_builder_free_func = (void (*)(IcebergScanBuilder*))dlsym(lib_handle, "iceberg_scan_builder_free"); + if (!iceberg_scan_builder_free_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_scan_builder_free: %s\n", dlerror()); return 0; } - + iceberg_stream_func = (int (*)(IcebergScan*, IcebergStreamResponse*, const void*))dlsym(lib_handle, "iceberg_stream"); + if (!iceberg_stream_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_stream: %s\n", dlerror()); + return 0; + } + iceberg_next_batch_func = (int (*)(IcebergStream*, IcebergBatchResponse*, const void*))dlsym(lib_handle, "iceberg_next_batch"); + if (!iceberg_next_batch_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_next_batch: %s\n", dlerror()); + return 0; + } iceberg_table_free_func = (void (*)(IcebergTable*))dlsym(lib_handle, "iceberg_table_free"); if (!iceberg_table_free_func) { @@ -98,6 +111,12 @@ static int load_iceberg_library(const char* library_path) { return 0; } + iceberg_stream_free_func = (void (*)(IcebergStream*))dlsym(lib_handle, "iceberg_stream_free"); + if (!iceberg_stream_free_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_stream_free: %s\n", dlerror()); + return 0; + } + iceberg_arrow_batch_free_func = (void (*)(ArrowBatch*))dlsym(lib_handle, "iceberg_arrow_batch_free"); if (!iceberg_arrow_batch_free_func) { fprintf(stderr, "❌ Failed to resolve iceberg_arrow_batch_free: %s\n", dlerror()); @@ -221,19 +240,42 @@ int main(int argc, char* argv[]) { printf("✅ Table opened successfully\n"); // 3. Create a scan using async API - IcebergScanResponse scan_response = {0}; - async_completed = 0; // Reset flag - result = iceberg_table_scan_func(table_response.table, &scan_response, (const void*)(uintptr_t)&async_completed); + IcebergScanBuilder *builder = iceberg_scan_builder_func(table_response.table); - if (result != CRESULT_OK) { + if (builder == NULL) { + printf("❌ Failed to create scan builder\n"); + iceberg_table_free_func(table_response.table); + unload_iceberg_library(); + return 1; + } + + IcebergScan *scan = iceberg_scan_func(builder); + + if (scan == NULL) { printf("❌ Failed to initiate scan creation\n"); + iceberg_scan_builder_free_func(builder); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; } - // Wait for async operation to complete - printf("⏳ Waiting for scan creation to complete...\n"); + printf("✅ Scan created successfully\n"); + + printf("Step 1: Initializing stream asynchronously...\n"); + IcebergStreamResponse stream_response = {0}; + async_completed = 0; + result = iceberg_stream_func(scan, &stream_response, (const void*)(uintptr_t)&async_completed); + if (result != CRESULT_OK) { + printf("❌ Failed to create stream\n"); + iceberg_scan_free_func(scan); + iceberg_scan_builder_free_func(builder); + iceberg_table_free_func(table_response.table); + unload_iceberg_library(); + return 1; + } + + // Wait for completion + printf("⏳ Waiting for stream creation to complete...\n"); timeout = 100; // 10 second timeout while (!async_completed && timeout > 0) { usleep(100000); // 100ms @@ -241,75 +283,45 @@ int main(int argc, char* argv[]) { } if (!async_completed) { - printf("❌ Scan creation async operation timed out\n"); + printf("❌ Stream creation async operation timed out\n"); + iceberg_scan_free_func(scan); + iceberg_scan_builder_free_func(builder); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; } - // Check if the operation was successful - if (scan_response.result != CRESULT_OK) { - printf("❌ Failed to create scan"); - if (scan_response.error_message) { - printf(": %s", scan_response.error_message); - iceberg_destroy_cstring_func(scan_response.error_message); + // Check if operation was successful + if (stream_response.result != CRESULT_OK) { + printf("❌ Failed to create stream"); + if (stream_response.error_message) { + printf(": %s", stream_response.error_message); + iceberg_destroy_cstring_func(stream_response.error_message); } printf("\n"); + iceberg_scan_free_func(scan); + iceberg_scan_builder_free_func(builder); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; } - if (!scan_response.scan) { - printf("❌ No scan returned from scan creation\n"); + if (!stream_response.stream) { + printf("❌ No stream returned from stream creation\n"); + iceberg_scan_free_func(scan); + iceberg_scan_builder_free_func(builder); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; } - printf("✅ Scan created successfully\n"); + printf("✅ Stream created successfully\n"); // 4. Try to get a batch using new two-step async API - printf("Step 1: Initializing stream asynchronously...\n"); - IcebergResponse init_response = {0}; - async_completed = 0; // Reset flag - result = iceberg_scan_init_stream_func(scan_response.scan, &init_response, (const void*)(uintptr_t)&async_completed); - - if (result == CRESULT_OK) { - // Wait for async operation to complete - timeout = 100; // 10 second timeout - while (!async_completed && timeout > 0) { - usleep(100000); // 100ms - timeout--; - } - - if (!async_completed) { - printf("❌ Batch wait async operation timed out\n"); - iceberg_scan_free_func(scan_response.scan); - iceberg_table_free_func(table_response.table); - unload_iceberg_library(); - return 1; - } - } - - if (result != CRESULT_OK) { - printf("❌ Failed to initialize stream\n"); - if (init_response.error_message) { - printf(" Error: %s\n", init_response.error_message); - iceberg_destroy_cstring_func(init_response.error_message); - } - iceberg_scan_free_func(scan_response.scan); - iceberg_table_free_func(table_response.table); - unload_iceberg_library(); - return 1; - } - - printf("✅ Stream initialized successfully\n"); - printf("Step 2: Getting first batch from stream asynchronously...\n"); IcebergBatchResponse batch_response = {0}; async_completed = 0; // Reset flag - result = iceberg_scan_next_batch_func(scan_response.scan, &batch_response, (const void*)(uintptr_t)&async_completed); + result = iceberg_next_batch_func(stream_response.stream, &batch_response, (const void*)(uintptr_t)&async_completed); if (result == CRESULT_OK) { // Wait for batch retrieval to complete @@ -321,7 +333,9 @@ int main(int argc, char* argv[]) { if (!async_completed) { printf("❌ Batch retrieval async operation timed out\n"); - iceberg_scan_free_func(scan_response.scan); + iceberg_stream_free_func(stream_response.stream); + iceberg_scan_free_func(scan); + iceberg_scan_builder_free_func(builder); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; @@ -334,7 +348,9 @@ int main(int argc, char* argv[]) { printf(" Error: %s\n", batch_response.error_message); iceberg_destroy_cstring_func(batch_response.error_message); } - iceberg_scan_free_func(scan_response.scan); + iceberg_stream_free_func(stream_response.stream); + iceberg_scan_free_func(scan); + iceberg_scan_builder_free_func(builder); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; @@ -388,30 +404,13 @@ int main(int argc, char* argv[]) { table_response.context = NULL; // Mark as cleaned up } - if (scan_response.context != NULL) { - printf(" - Testing cancel_context with scan context...\n"); - int cancel_result = iceberg_cancel_context_func(scan_response.context); - if (cancel_result == 0) { - printf(" ✅ cancel_context succeeded\n"); - } else { - printf(" ⚠️ cancel_context returned: %d\n", cancel_result); - } - - printf(" - Testing destroy_context with scan context...\n"); - int destroy_result = iceberg_destroy_context_func(scan_response.context); - if (destroy_result == 0) { - printf(" ✅ destroy_context succeeded\n"); - } else { - printf(" ⚠️ destroy_context returned: %d\n", destroy_result); - } - scan_response.context = NULL; // Mark as cleaned up - } - printf("✅ Context cancellation functions tested successfully\n"); // 5. Cleanup printf("Cleaning up resources...\n"); - iceberg_scan_free_func(scan_response.scan); + iceberg_stream_free_func(stream_response.stream); + iceberg_scan_free_func(scan); + iceberg_scan_builder_free_func(builder); iceberg_table_free_func(table_response.table); unload_iceberg_library(); From 07ee491d22b67b243c7eb82e544a1929758a6c06 Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 11 Sep 2025 11:17:02 +0200 Subject: [PATCH 29/39] More PR comments --- include/iceberg_rust_ffi.h | 10 ++++------ src/lib.rs | 25 +++++++++++++++++-------- tests/integration_test.c | 6 +++--- 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/include/iceberg_rust_ffi.h b/include/iceberg_rust_ffi.h index ab48b64..0828f24 100644 --- a/include/iceberg_rust_ffi.h +++ b/include/iceberg_rust_ffi.h @@ -16,7 +16,7 @@ typedef struct Context Context; // Configuration for iceberg runtime typedef struct { size_t n_threads; -} IcebergConfig; +} IcebergStaticConfig; // Result types typedef enum { @@ -78,22 +78,20 @@ typedef int (*PanicCallback)(void); typedef int (*ResultCallback)(const void* task); // Runtime initialization -CResult iceberg_init_runtime(IcebergConfig config, PanicCallback panic_callback, ResultCallback result_callback); +CResult iceberg_init_runtime(IcebergStaticConfig config, PanicCallback panic_callback, ResultCallback result_callback); // Async table operations CResult iceberg_table_open(const char* table_path, const char* metadata_path, IcebergTableResponse* response, const void* handle); void iceberg_free(IcebergTable* table); -// Scan creation is synchronous +// Synchronous scan creation IcebergScanBuilder* iceberg_scan_builder(IcebergTable* table); IcebergScanBuilder* iceberg_select_columns(IcebergScanBuilder* scan, const char** column_names, size_t num_columns); IcebergScan* iceberg_scan(IcebergScanBuilder* builder); void iceberg_scan_free(IcebergScan* scan); void iceberg_scan_builder_free(IcebergScanBuilder* builder); -// Async scan operations - -// New simplified async API +// Async streaming API CResult iceberg_stream(IcebergScan* scan, IcebergStreamResponse* response, const void* handle); CResult iceberg_next_batch(IcebergStream* stream, IcebergBatchResponse* response, const void* handle); void iceberg_stream_free(IcebergStream* stream); diff --git a/src/lib.rs b/src/lib.rs index 401be52..50b54d8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -65,13 +65,13 @@ type PanicCallback = unsafe extern "C" fn() -> i32; // Simple config for iceberg - only what we need #[derive(Copy, Clone)] #[repr(C)] -pub struct IcebergConfig { +pub struct IcebergStaticConfig { n_threads: usize, } -impl Default for IcebergConfig { +impl Default for IcebergStaticConfig { fn default() -> Self { - IcebergConfig { + IcebergStaticConfig { n_threads: 0, // 0 means use tokio's default } } @@ -190,7 +190,7 @@ pub struct IcebergBatchResponse { unsafe impl Send for IcebergBatchResponse {} impl RawResponse for IcebergBatchResponse { - type Payload = Option; + type Payload = Option; fn result_mut(&mut self) -> &mut CResult { &mut self.result } @@ -202,7 +202,17 @@ impl RawResponse for IcebergBatchResponse { } fn set_payload(&mut self, payload: Option) { match payload.flatten() { - Some(batch) => self.batch = Box::into_raw(Box::new(batch)), + Some(batch) => { + let arrow_batch = serialize_record_batch(batch); + match arrow_batch { + Ok(arrow_batch) => { + self.batch = Box::into_raw(Box::new(arrow_batch)); + } + Err(_) => { + self.batch = ptr::null_mut(); + } + } + } None => self.batch = ptr::null_mut(), } } @@ -232,7 +242,7 @@ fn serialize_record_batch(batch: RecordBatch) -> Result { // Initialize runtime - configure RT and RESULT_CB directly #[no_mangle] pub extern "C" fn iceberg_init_runtime( - config: IcebergConfig, + config: IcebergStaticConfig, panic_callback: PanicCallback, result_callback: ResultCallback, ) -> CResult { @@ -431,8 +441,7 @@ export_runtime_op!( match stream_guard.try_next().await { Ok(Some(record_batch)) => { - let arrow_batch = serialize_record_batch(record_batch)?; - Ok(Some(arrow_batch)) + Ok(Some(record_batch)) } Ok(None) => { // End of stream diff --git a/tests/integration_test.c b/tests/integration_test.c index 86dc6df..0a90afb 100644 --- a/tests/integration_test.c +++ b/tests/integration_test.c @@ -8,7 +8,7 @@ #include // Global function pointers for new async API -static int (*iceberg_init_runtime_func)(IcebergConfig config, int (*panic_callback)(void), int (*result_callback)(const void*)) = NULL; +static int (*iceberg_init_runtime_func)(IcebergStaticConfig config, int (*panic_callback)(void), int (*result_callback)(const void*)) = NULL; static int (*iceberg_table_open_func)(const char*, const char*, IcebergTableResponse*, const void*) = NULL; static IcebergScan* (*iceberg_scan_func)(IcebergScanBuilder*) = NULL; static IcebergScanBuilder* (*iceberg_scan_builder_func)(IcebergTable*) = NULL; @@ -58,7 +58,7 @@ static int load_iceberg_library(const char* library_path) { dlerror(); // Resolve function symbols for new async API - iceberg_init_runtime_func = (int (*)(IcebergConfig, int (*)(void), int (*)(const void*)))dlsym(lib_handle, "iceberg_init_runtime"); + iceberg_init_runtime_func = (int (*)(IcebergStaticConfig, int (*)(void), int (*)(const void*)))dlsym(lib_handle, "iceberg_init_runtime"); if (!iceberg_init_runtime_func) { fprintf(stderr, "❌ Failed to resolve iceberg_init_runtime: %s\n", dlerror()); return 0; @@ -180,7 +180,7 @@ int main(int argc, char* argv[]) { // 1. Initialize the runtime printf("Initializing Iceberg runtime...\n"); - IcebergConfig config = {0}; // Default config - 0 threads means use default + IcebergStaticConfig config = {0}; // Default config - 0 threads means use default int result = iceberg_init_runtime_func(config, panic_callback, result_callback); if (result != CRESULT_OK) { printf("❌ Failed to initialize runtime\n"); From 206d4bb64dd6200eafca825f7d1bef32b24f892d Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 11 Sep 2025 11:21:31 +0200 Subject: [PATCH 30/39] Fix free --- include/iceberg_rust_ffi.h | 12 ++++++------ src/lib.rs | 24 ++++++++++++------------ tests/integration_test.c | 31 +++++++++++++++---------------- 3 files changed, 33 insertions(+), 34 deletions(-) diff --git a/include/iceberg_rust_ffi.h b/include/iceberg_rust_ffi.h index 0828f24..1d06d57 100644 --- a/include/iceberg_rust_ffi.h +++ b/include/iceberg_rust_ffi.h @@ -51,14 +51,14 @@ typedef struct { typedef struct { void *stream; -} IcebergStream; +} IcebergArrowStream; typedef struct { CResult result; - IcebergStream* stream; + IcebergArrowStream* stream; char* error_message; const Context* context; -} IcebergStreamResponse; +} IcebergArrowStreamResponse; typedef struct { CResult result; @@ -92,9 +92,9 @@ void iceberg_scan_free(IcebergScan* scan); void iceberg_scan_builder_free(IcebergScanBuilder* builder); // Async streaming API -CResult iceberg_stream(IcebergScan* scan, IcebergStreamResponse* response, const void* handle); -CResult iceberg_next_batch(IcebergStream* stream, IcebergBatchResponse* response, const void* handle); -void iceberg_stream_free(IcebergStream* stream); +CResult iceberg_arrow_stream(IcebergScan* scan, IcebergArrowStreamResponse* response, const void* handle); +CResult iceberg_next_batch(IcebergArrowStream* stream, IcebergBatchResponse* response, const void* handle); +void iceberg_arrow_stream_free(IcebergArrowStream* stream); void iceberg_arrow_batch_free(ArrowBatch* batch); // Utility functions diff --git a/src/lib.rs b/src/lib.rs index 50b54d8..6af3d4e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -97,12 +97,12 @@ unsafe impl Send for IcebergScan {} // Stream wrapper for FFI - using async mutex to avoid blocking calls #[repr(C)] -pub struct IcebergStream { +pub struct IcebergArrowStream { pub stream: AsyncMutex>>, } -unsafe impl Send for IcebergStream {} +unsafe impl Send for IcebergArrowStream {} #[repr(C)] pub struct ArrowBatch { @@ -145,17 +145,17 @@ impl RawResponse for IcebergTableResponse { } #[repr(C)] -pub struct IcebergStreamResponse { +pub struct IcebergArrowStreamResponse { result: CResult, - stream: *mut IcebergStream, + stream: *mut IcebergArrowStream, error_message: *mut c_char, context: *const Context, } -unsafe impl Send for IcebergStreamResponse {} +unsafe impl Send for IcebergArrowStreamResponse {} -impl RawResponse for IcebergStreamResponse { - type Payload = IcebergStream; +impl RawResponse for IcebergArrowStreamResponse { + type Payload = IcebergArrowStream; fn result_mut(&mut self) -> &mut CResult { &mut self.result @@ -405,8 +405,8 @@ pub extern "C" fn iceberg_scan(builder: *mut IcebergScanBuilder) -> *mut Iceberg // Async function to initialize stream from a table scan without getting first batch export_runtime_op!( - iceberg_stream, - IcebergStreamResponse, + iceberg_arrow_stream, + IcebergArrowStreamResponse, || { if scan.is_null() { return Err(anyhow::anyhow!("Null scan pointer provided")); @@ -417,7 +417,7 @@ export_runtime_op!( scan_ref, async { let stream = scan_ref.to_arrow().await?; - Ok::(IcebergStream { + Ok::(IcebergArrowStream { stream: AsyncMutex::new(stream), }) }, @@ -451,7 +451,7 @@ export_runtime_op!( Err(e) => Err(anyhow::anyhow!("Error reading batch: {}", e)), } }, - stream: *mut IcebergStream + stream: *mut IcebergArrowStream ); // Synchronous operations @@ -483,7 +483,7 @@ pub extern "C" fn iceberg_scan_builder_free(builder: *mut IcebergScanBuilder) { } #[no_mangle] -pub extern "C" fn iceberg_stream_free(stream: *mut IcebergStream) { +pub extern "C" fn iceberg_arrow_stream_free(stream: *mut IcebergArrowStream) { if !stream.is_null() { unsafe { let _ = Box::from_raw(stream); diff --git a/tests/integration_test.c b/tests/integration_test.c index 0a90afb..47419b5 100644 --- a/tests/integration_test.c +++ b/tests/integration_test.c @@ -12,12 +12,12 @@ static int (*iceberg_init_runtime_func)(IcebergStaticConfig config, int (*panic_ static int (*iceberg_table_open_func)(const char*, const char*, IcebergTableResponse*, const void*) = NULL; static IcebergScan* (*iceberg_scan_func)(IcebergScanBuilder*) = NULL; static IcebergScanBuilder* (*iceberg_scan_builder_func)(IcebergTable*) = NULL; -static int (*iceberg_stream_func)(IcebergScan*, IcebergStreamResponse*, const void*) = NULL; -static int (*iceberg_next_batch_func)(IcebergStream*, IcebergBatchResponse*, const void*) = NULL; +static int (*iceberg_arrow_stream_func)(IcebergScan*, IcebergArrowStreamResponse*, const void*) = NULL; +static int (*iceberg_next_batch_func)(IcebergArrowStream*, IcebergBatchResponse*, const void*) = NULL; static void (*iceberg_table_free_func)(IcebergTable*) = NULL; static void (*iceberg_scan_free_func)(IcebergScan*) = NULL; static void (*iceberg_scan_builder_free_func)(IcebergScanBuilder*) = NULL; -static void (*iceberg_stream_free_func)(IcebergStream*) = NULL; +static void (*iceberg_arrow_stream_free_func)(IcebergArrowStream*) = NULL; static void (*iceberg_arrow_batch_free_func)(ArrowBatch*) = NULL; static int (*iceberg_destroy_cstring_func)(char*) = NULL; static int (*iceberg_cancel_context_func)(const void*) = NULL; @@ -88,12 +88,12 @@ static int load_iceberg_library(const char* library_path) { return 0; } - iceberg_stream_func = (int (*)(IcebergScan*, IcebergStreamResponse*, const void*))dlsym(lib_handle, "iceberg_stream"); - if (!iceberg_stream_func) { - fprintf(stderr, "❌ Failed to resolve iceberg_stream: %s\n", dlerror()); + iceberg_arrow_stream_func = (int (*)(IcebergScan*, IcebergArrowStreamResponse*, const void*))dlsym(lib_handle, "iceberg_arrow_stream"); + if (!iceberg_arrow_stream_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_arrow_stream: %s\n", dlerror()); return 0; } - iceberg_next_batch_func = (int (*)(IcebergStream*, IcebergBatchResponse*, const void*))dlsym(lib_handle, "iceberg_next_batch"); + iceberg_next_batch_func = (int (*)(IcebergArrowStream*, IcebergBatchResponse*, const void*))dlsym(lib_handle, "iceberg_next_batch"); if (!iceberg_next_batch_func) { fprintf(stderr, "❌ Failed to resolve iceberg_next_batch: %s\n", dlerror()); return 0; @@ -111,9 +111,9 @@ static int load_iceberg_library(const char* library_path) { return 0; } - iceberg_stream_free_func = (void (*)(IcebergStream*))dlsym(lib_handle, "iceberg_stream_free"); - if (!iceberg_stream_free_func) { - fprintf(stderr, "❌ Failed to resolve iceberg_stream_free: %s\n", dlerror()); + iceberg_arrow_stream_free_func = (void (*)(IcebergArrowStream*))dlsym(lib_handle, "iceberg_arrow_stream_free"); + if (!iceberg_arrow_stream_free_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_arrow_stream_free: %s\n", dlerror()); return 0; } @@ -262,9 +262,9 @@ int main(int argc, char* argv[]) { printf("✅ Scan created successfully\n"); printf("Step 1: Initializing stream asynchronously...\n"); - IcebergStreamResponse stream_response = {0}; + IcebergArrowStreamResponse stream_response = {0}; async_completed = 0; - result = iceberg_stream_func(scan, &stream_response, (const void*)(uintptr_t)&async_completed); + result = iceberg_arrow_stream_func(scan, &stream_response, (const void*)(uintptr_t)&async_completed); if (result != CRESULT_OK) { printf("❌ Failed to create stream\n"); iceberg_scan_free_func(scan); @@ -333,7 +333,7 @@ int main(int argc, char* argv[]) { if (!async_completed) { printf("❌ Batch retrieval async operation timed out\n"); - iceberg_stream_free_func(stream_response.stream); + iceberg_arrow_stream_free_func(stream_response.stream); iceberg_scan_free_func(scan); iceberg_scan_builder_free_func(builder); iceberg_table_free_func(table_response.table); @@ -348,7 +348,7 @@ int main(int argc, char* argv[]) { printf(" Error: %s\n", batch_response.error_message); iceberg_destroy_cstring_func(batch_response.error_message); } - iceberg_stream_free_func(stream_response.stream); + iceberg_arrow_stream_free_func(stream_response.stream); iceberg_scan_free_func(scan); iceberg_scan_builder_free_func(builder); iceberg_table_free_func(table_response.table); @@ -408,9 +408,8 @@ int main(int argc, char* argv[]) { // 5. Cleanup printf("Cleaning up resources...\n"); - iceberg_stream_free_func(stream_response.stream); + iceberg_arrow_stream_free_func(stream_response.stream); iceberg_scan_free_func(scan); - iceberg_scan_builder_free_func(builder); iceberg_table_free_func(table_response.table); unload_iceberg_library(); From 19c51b888c474d10e05549fde882fbaa949524ed Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 11 Sep 2025 11:27:30 +0200 Subject: [PATCH 31/39] . --- include/iceberg_rust_ffi.h | 1 - tests/integration_test.c | 15 --------------- 2 files changed, 16 deletions(-) diff --git a/include/iceberg_rust_ffi.h b/include/iceberg_rust_ffi.h index 1d06d57..f79d960 100644 --- a/include/iceberg_rust_ffi.h +++ b/include/iceberg_rust_ffi.h @@ -89,7 +89,6 @@ IcebergScanBuilder* iceberg_scan_builder(IcebergTable* table); IcebergScanBuilder* iceberg_select_columns(IcebergScanBuilder* scan, const char** column_names, size_t num_columns); IcebergScan* iceberg_scan(IcebergScanBuilder* builder); void iceberg_scan_free(IcebergScan* scan); -void iceberg_scan_builder_free(IcebergScanBuilder* builder); // Async streaming API CResult iceberg_arrow_stream(IcebergScan* scan, IcebergArrowStreamResponse* response, const void* handle); diff --git a/tests/integration_test.c b/tests/integration_test.c index 47419b5..dcf89ff 100644 --- a/tests/integration_test.c +++ b/tests/integration_test.c @@ -16,7 +16,6 @@ static int (*iceberg_arrow_stream_func)(IcebergScan*, IcebergArrowStreamResponse static int (*iceberg_next_batch_func)(IcebergArrowStream*, IcebergBatchResponse*, const void*) = NULL; static void (*iceberg_table_free_func)(IcebergTable*) = NULL; static void (*iceberg_scan_free_func)(IcebergScan*) = NULL; -static void (*iceberg_scan_builder_free_func)(IcebergScanBuilder*) = NULL; static void (*iceberg_arrow_stream_free_func)(IcebergArrowStream*) = NULL; static void (*iceberg_arrow_batch_free_func)(ArrowBatch*) = NULL; static int (*iceberg_destroy_cstring_func)(char*) = NULL; @@ -81,13 +80,6 @@ static int load_iceberg_library(const char* library_path) { fprintf(stderr, "❌ Failed to resolve iceberg_scan_builder: %s\n", dlerror()); return 0; } - - iceberg_scan_builder_free_func = (void (*)(IcebergScanBuilder*))dlsym(lib_handle, "iceberg_scan_builder_free"); - if (!iceberg_scan_builder_free_func) { - fprintf(stderr, "❌ Failed to resolve iceberg_scan_builder_free: %s\n", dlerror()); - return 0; - } - iceberg_arrow_stream_func = (int (*)(IcebergScan*, IcebergArrowStreamResponse*, const void*))dlsym(lib_handle, "iceberg_arrow_stream"); if (!iceberg_arrow_stream_func) { fprintf(stderr, "❌ Failed to resolve iceberg_arrow_stream: %s\n", dlerror()); @@ -253,7 +245,6 @@ int main(int argc, char* argv[]) { if (scan == NULL) { printf("❌ Failed to initiate scan creation\n"); - iceberg_scan_builder_free_func(builder); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; @@ -268,7 +259,6 @@ int main(int argc, char* argv[]) { if (result != CRESULT_OK) { printf("❌ Failed to create stream\n"); iceberg_scan_free_func(scan); - iceberg_scan_builder_free_func(builder); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; @@ -285,7 +275,6 @@ int main(int argc, char* argv[]) { if (!async_completed) { printf("❌ Stream creation async operation timed out\n"); iceberg_scan_free_func(scan); - iceberg_scan_builder_free_func(builder); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; @@ -300,7 +289,6 @@ int main(int argc, char* argv[]) { } printf("\n"); iceberg_scan_free_func(scan); - iceberg_scan_builder_free_func(builder); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; @@ -309,7 +297,6 @@ int main(int argc, char* argv[]) { if (!stream_response.stream) { printf("❌ No stream returned from stream creation\n"); iceberg_scan_free_func(scan); - iceberg_scan_builder_free_func(builder); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; @@ -335,7 +322,6 @@ int main(int argc, char* argv[]) { printf("❌ Batch retrieval async operation timed out\n"); iceberg_arrow_stream_free_func(stream_response.stream); iceberg_scan_free_func(scan); - iceberg_scan_builder_free_func(builder); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; @@ -350,7 +336,6 @@ int main(int argc, char* argv[]) { } iceberg_arrow_stream_free_func(stream_response.stream); iceberg_scan_free_func(scan); - iceberg_scan_builder_free_func(builder); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; From 51a026d0abbea06673d5edab23db423f8d336539 Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 11 Sep 2025 11:28:38 +0200 Subject: [PATCH 32/39] Remove function --- include/iceberg_rust_ffi.h | 3 --- src/lib.rs | 10 ---------- 2 files changed, 13 deletions(-) diff --git a/include/iceberg_rust_ffi.h b/include/iceberg_rust_ffi.h index f79d960..aca5feb 100644 --- a/include/iceberg_rust_ffi.h +++ b/include/iceberg_rust_ffi.h @@ -104,9 +104,6 @@ const char* iceberg_current_metrics(void); CResult iceberg_cancel_context(const Context* ctx); CResult iceberg_destroy_context(const Context* ctx); -// Backward compatibility -const char* iceberg_error_message(void); - #ifdef __cplusplus } #endif diff --git a/src/lib.rs b/src/lib.rs index 6af3d4e..acb20cd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -504,16 +504,6 @@ pub extern "C" fn iceberg_arrow_batch_free(batch: *mut ArrowBatch) { } } } - -// Backward compatibility function for error messages -#[no_mangle] -pub extern "C" fn iceberg_error_message() -> *const c_char { - // For backward compatibility, return a generic message - // In the new async API, errors are returned through response structures - b"Error: Use new async API with response structures for detailed error information\0".as_ptr() - as *const c_char -} - // Re-export object_store_ffi utilities #[no_mangle] pub extern "C" fn iceberg_destroy_cstring(string: *mut c_char) -> CResult { From 907675dc2c724d5ce77c32d8fc4a2f9c695cf59e Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 11 Sep 2025 11:50:11 +0200 Subject: [PATCH 33/39] Simplify api --- include/iceberg_rust_ffi.h | 10 +++--- src/lib.rs | 63 ++++++++++++++++++++------------------ tests/integration_test.c | 22 ++++++------- 3 files changed, 50 insertions(+), 45 deletions(-) diff --git a/include/iceberg_rust_ffi.h b/include/iceberg_rust_ffi.h index aca5feb..fda0f76 100644 --- a/include/iceberg_rust_ffi.h +++ b/include/iceberg_rust_ffi.h @@ -39,7 +39,6 @@ typedef struct { const Context* context; } IcebergTableResponse; -typedef struct IcebergScanBuilder IcebergScanBuilder; typedef struct IcebergScan IcebergScan; typedef struct { @@ -82,12 +81,12 @@ CResult iceberg_init_runtime(IcebergStaticConfig config, PanicCallback panic_cal // Async table operations CResult iceberg_table_open(const char* table_path, const char* metadata_path, IcebergTableResponse* response, const void* handle); -void iceberg_free(IcebergTable* table); +void iceberg_table_free(IcebergTable* table); // Synchronous scan creation -IcebergScanBuilder* iceberg_scan_builder(IcebergTable* table); -IcebergScanBuilder* iceberg_select_columns(IcebergScanBuilder* scan, const char** column_names, size_t num_columns); -IcebergScan* iceberg_scan(IcebergScanBuilder* builder); +IcebergScan* iceberg_new_scan(IcebergTable* table); +IcebergScan* iceberg_select_columns(IcebergScan* scan, const char** column_names, size_t num_columns); +IcebergScan* iceberg_scan(IcebergScan* scan); void iceberg_scan_free(IcebergScan* scan); // Async streaming API @@ -104,6 +103,7 @@ const char* iceberg_current_metrics(void); CResult iceberg_cancel_context(const Context* ctx); CResult iceberg_destroy_context(const Context* ctx); + #ifdef __cplusplus } #endif diff --git a/src/lib.rs b/src/lib.rs index acb20cd..8ee81e1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -83,14 +83,10 @@ pub struct IcebergTable { pub table: iceberg::table::Table, } -#[repr(C)] -pub struct IcebergScanBuilder { - pub builder: TableScanBuilder<'static>, -} - #[repr(C)] pub struct IcebergScan { - pub scan: TableScan, + pub builder: Option>, + pub scan: Option, } unsafe impl Send for IcebergScan {} @@ -343,24 +339,25 @@ export_runtime_op!( ); #[no_mangle] -pub extern "C" fn iceberg_scan_builder(table: *mut IcebergTable) -> *mut IcebergScanBuilder { +pub extern "C" fn iceberg_new_scan(table: *mut IcebergTable) -> *mut IcebergScan { if table.is_null() { return ptr::null_mut(); } let table_ref = unsafe { &*table }; let scan_builder = table_ref.table.scan(); - return Box::into_raw(Box::new(IcebergScanBuilder { - builder: scan_builder, + return Box::into_raw(Box::new(IcebergScan { + builder: Some(scan_builder), + scan: None, })); } #[no_mangle] pub extern "C" fn iceberg_select_columns( - builder: *mut IcebergScanBuilder, + scan: *mut IcebergScan, column_names: *const *const c_char, num_columns: usize, -) -> *mut IcebergScanBuilder { - if builder.is_null() || column_names.is_null() { +) -> *mut IcebergScan { + if scan.is_null() || column_names.is_null() { return ptr::null_mut(); } @@ -381,22 +378,35 @@ pub extern "C" fn iceberg_select_columns( columns.push(col_str.to_string()); } - let builder = unsafe { Box::from_raw(builder).builder }; + let scan = unsafe { Box::from_raw(scan) }; - Box::into_raw(Box::new(IcebergScanBuilder { - builder: builder.select(columns), + if scan.builder.is_none() { + return ptr::null_mut(); + } + + Box::into_raw(Box::new(IcebergScan { + builder: scan.builder.map(|b| b.select(columns)), + scan: scan.scan, })) } #[no_mangle] -pub extern "C" fn iceberg_scan(builder: *mut IcebergScanBuilder) -> *mut IcebergScan { - if builder.is_null() { +pub extern "C" fn iceberg_scan(scan: *mut IcebergScan) -> *mut IcebergScan { + if scan.is_null() { + return ptr::null_mut(); + } + let scan = unsafe { Box::from_raw(scan) }; + if scan.builder.is_none() { return ptr::null_mut(); } - let builder = unsafe { Box::from_raw(builder).builder }; + let builder = scan.builder.unwrap(); + match builder.build() { Ok(table_scan) => { - let scan_ptr = Box::into_raw(Box::new(IcebergScan { scan: table_scan })); + let scan_ptr = Box::into_raw(Box::new(IcebergScan { + scan: Some(table_scan), + builder: None, + })); scan_ptr } Err(_) => ptr::null_mut(), @@ -412,7 +422,11 @@ export_runtime_op!( return Err(anyhow::anyhow!("Null scan pointer provided")); } let scan_ref = unsafe { &((*scan).scan) }; - return Ok(scan_ref) + if scan_ref.is_none() { + return Err(anyhow::anyhow!("Scan not initialized")); + } + + return Ok(scan_ref.as_ref().unwrap()); }, scan_ref, async { @@ -473,15 +487,6 @@ pub extern "C" fn iceberg_scan_free(scan: *mut IcebergScan) { } } -#[no_mangle] -pub extern "C" fn iceberg_scan_builder_free(builder: *mut IcebergScanBuilder) { - if !builder.is_null() { - unsafe { - let _ = Box::from_raw(builder); - } - } -} - #[no_mangle] pub extern "C" fn iceberg_arrow_stream_free(stream: *mut IcebergArrowStream) { if !stream.is_null() { diff --git a/tests/integration_test.c b/tests/integration_test.c index dcf89ff..8ef9819 100644 --- a/tests/integration_test.c +++ b/tests/integration_test.c @@ -10,12 +10,12 @@ // Global function pointers for new async API static int (*iceberg_init_runtime_func)(IcebergStaticConfig config, int (*panic_callback)(void), int (*result_callback)(const void*)) = NULL; static int (*iceberg_table_open_func)(const char*, const char*, IcebergTableResponse*, const void*) = NULL; -static IcebergScan* (*iceberg_scan_func)(IcebergScanBuilder*) = NULL; -static IcebergScanBuilder* (*iceberg_scan_builder_func)(IcebergTable*) = NULL; +static IcebergScan* (*iceberg_scan_func)(IcebergScan*) = NULL; +static IcebergScan* (*iceberg_new_scan_func)(IcebergTable*) = NULL; +static void (*iceberg_scan_free_func)(IcebergScan*) = NULL; static int (*iceberg_arrow_stream_func)(IcebergScan*, IcebergArrowStreamResponse*, const void*) = NULL; static int (*iceberg_next_batch_func)(IcebergArrowStream*, IcebergBatchResponse*, const void*) = NULL; static void (*iceberg_table_free_func)(IcebergTable*) = NULL; -static void (*iceberg_scan_free_func)(IcebergScan*) = NULL; static void (*iceberg_arrow_stream_free_func)(IcebergArrowStream*) = NULL; static void (*iceberg_arrow_batch_free_func)(ArrowBatch*) = NULL; static int (*iceberg_destroy_cstring_func)(char*) = NULL; @@ -69,15 +69,15 @@ static int load_iceberg_library(const char* library_path) { return 0; } - iceberg_scan_func = (IcebergScan* (*)(IcebergScanBuilder*))dlsym(lib_handle, "iceberg_scan"); + iceberg_scan_func = (IcebergScan* (*)(IcebergScan*))dlsym(lib_handle, "iceberg_scan"); if (!iceberg_scan_func) { fprintf(stderr, "❌ Failed to resolve iceberg_scan: %s\n", dlerror()); return 0; } - iceberg_scan_builder_func = (IcebergScanBuilder* (*)(IcebergTable*))dlsym(lib_handle, "iceberg_scan_builder"); - if (!iceberg_scan_builder_func) { - fprintf(stderr, "❌ Failed to resolve iceberg_scan_builder: %s\n", dlerror()); + iceberg_new_scan_func = (IcebergScan* (*)(IcebergTable*))dlsym(lib_handle, "iceberg_new_scan"); + if (!iceberg_new_scan_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_new_scan: %s\n", dlerror()); return 0; } iceberg_arrow_stream_func = (int (*)(IcebergScan*, IcebergArrowStreamResponse*, const void*))dlsym(lib_handle, "iceberg_arrow_stream"); @@ -232,16 +232,16 @@ int main(int argc, char* argv[]) { printf("✅ Table opened successfully\n"); // 3. Create a scan using async API - IcebergScanBuilder *builder = iceberg_scan_builder_func(table_response.table); + IcebergScan *scan = iceberg_new_scan_func(table_response.table); - if (builder == NULL) { - printf("❌ Failed to create scan builder\n"); + if (scan == NULL) { + printf("❌ Failed to create scan\n"); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; } - IcebergScan *scan = iceberg_scan_func(builder); + scan = iceberg_scan_func(scan); if (scan == NULL) { printf("❌ Failed to initiate scan creation\n"); From 3d240d4545e90ad7ec58d874f940e481afeec902 Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 11 Sep 2025 15:32:37 +0200 Subject: [PATCH 34/39] Temp state --- include/iceberg_rust_ffi.h | 8 ++--- src/lib.rs | 68 ++++++++++++++++++++++---------------- tests/integration_test.c | 50 ++++++++++++++++------------ 3 files changed, 71 insertions(+), 55 deletions(-) diff --git a/include/iceberg_rust_ffi.h b/include/iceberg_rust_ffi.h index fda0f76..7c20ca3 100644 --- a/include/iceberg_rust_ffi.h +++ b/include/iceberg_rust_ffi.h @@ -85,12 +85,12 @@ void iceberg_table_free(IcebergTable* table); // Synchronous scan creation IcebergScan* iceberg_new_scan(IcebergTable* table); -IcebergScan* iceberg_select_columns(IcebergScan* scan, const char** column_names, size_t num_columns); -IcebergScan* iceberg_scan(IcebergScan* scan); -void iceberg_scan_free(IcebergScan* scan); +int iceberg_select_columns(IcebergScan* scan, const char** column_names, size_t num_columns); +int iceberg_scan(IcebergScan** scan); +void iceberg_scan_free(IcebergScan** scan); // Async streaming API -CResult iceberg_arrow_stream(IcebergScan* scan, IcebergArrowStreamResponse* response, const void* handle); +CResult iceberg_arrow_stream(IcebergScan** scan, IcebergArrowStreamResponse* response, const void* handle); CResult iceberg_next_batch(IcebergArrowStream* stream, IcebergBatchResponse* response, const void* handle); void iceberg_arrow_stream_free(IcebergArrowStream* stream); void iceberg_arrow_batch_free(ArrowBatch* batch); diff --git a/src/lib.rs b/src/lib.rs index 8ee81e1..5721cd9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,7 +8,7 @@ use arrow_array::RecordBatch; use arrow_ipc::writer::StreamWriter; use iceberg::io::FileIOBuilder; use iceberg::scan::{TableScan, TableScanBuilder}; -use iceberg::table::StaticTable; +use iceberg::table::{self, StaticTable, Table}; use iceberg::TableIdent; // Import from object_store_ffi @@ -80,7 +80,7 @@ impl Default for IcebergStaticConfig { // Direct structures - no opaque wrappers #[repr(C)] pub struct IcebergTable { - pub table: iceberg::table::Table, + pub table: Table, } #[repr(C)] @@ -356,9 +356,9 @@ pub extern "C" fn iceberg_select_columns( scan: *mut IcebergScan, column_names: *const *const c_char, num_columns: usize, -) -> *mut IcebergScan { +) -> CResult { if scan.is_null() || column_names.is_null() { - return ptr::null_mut(); + return CResult::Error; } let mut columns = Vec::new(); @@ -366,50 +366,55 @@ pub extern "C" fn iceberg_select_columns( for i in 0..num_columns { let col_ptr = unsafe { *column_names.add(i) }; if col_ptr.is_null() { - return ptr::null_mut(); + return CResult::Error; } let col_str = unsafe { match CStr::from_ptr(col_ptr).to_str() { Ok(s) => s, - Err(_) => return ptr::null_mut(), + Err(_) => return CResult::Error, } }; columns.push(col_str.to_string()); } - let scan = unsafe { Box::from_raw(scan) }; + let scan_ref = unsafe { Box::from_raw(scan) }; - if scan.builder.is_none() { - return ptr::null_mut(); + if scan_ref.builder.is_none() { + return CResult::Error; + } + unsafe { + *scan = IcebergScan { + builder: scan_ref.builder.map(|b| b.select(columns)), + scan: scan_ref.scan, + }; } - Box::into_raw(Box::new(IcebergScan { - builder: scan.builder.map(|b| b.select(columns)), - scan: scan.scan, - })) + return CResult::Ok; } #[no_mangle] -pub extern "C" fn iceberg_scan(scan: *mut IcebergScan) -> *mut IcebergScan { +pub extern "C" fn iceberg_scan(scan: *mut *mut IcebergScan) -> CResult { if scan.is_null() { - return ptr::null_mut(); + return CResult::Error; } - let scan = unsafe { Box::from_raw(scan) }; - if scan.builder.is_none() { - return ptr::null_mut(); + let mut scan_ref = unsafe { Box::from_raw(*scan) }; + if scan_ref.builder.is_none() { + return CResult::Error; } - let builder = scan.builder.unwrap(); + let builder = scan_ref.builder.unwrap(); match builder.build() { Ok(table_scan) => { - let scan_ptr = Box::into_raw(Box::new(IcebergScan { - scan: Some(table_scan), - builder: None, - })); - scan_ptr + unsafe { + *scan = Box::into_raw(Box::new(IcebergScan { + builder: None, + scan: Some(table_scan), + })); + } + CResult::Ok } - Err(_) => ptr::null_mut(), + Err(_) => CResult::Error, } } @@ -421,7 +426,7 @@ export_runtime_op!( if scan.is_null() { return Err(anyhow::anyhow!("Null scan pointer provided")); } - let scan_ref = unsafe { &((*scan).scan) }; + let scan_ref = unsafe { &(**scan).scan }; if scan_ref.is_none() { return Err(anyhow::anyhow!("Scan not initialized")); } @@ -430,12 +435,15 @@ export_runtime_op!( }, scan_ref, async { + println!("HERE 2"); + let stream = scan_ref.to_arrow().await?; + println!("HERE 3"); Ok::(IcebergArrowStream { stream: AsyncMutex::new(stream), }) }, - scan: *mut IcebergScan + scan: *mut *mut IcebergScan ); // Async function to get next batch from existing stream @@ -479,10 +487,12 @@ pub extern "C" fn iceberg_table_free(table: *mut IcebergTable) { } #[no_mangle] -pub extern "C" fn iceberg_scan_free(scan: *mut IcebergScan) { +pub extern "C" fn iceberg_scan_free(scan: *mut *mut IcebergScan) { if !scan.is_null() { unsafe { - let _ = Box::from_raw(scan); + //let ptr = Box::from_raw(scan); + let _ = Box::from_raw(*scan); + *scan = ptr::null_mut(); } } } diff --git a/tests/integration_test.c b/tests/integration_test.c index 8ef9819..7684139 100644 --- a/tests/integration_test.c +++ b/tests/integration_test.c @@ -10,10 +10,10 @@ // Global function pointers for new async API static int (*iceberg_init_runtime_func)(IcebergStaticConfig config, int (*panic_callback)(void), int (*result_callback)(const void*)) = NULL; static int (*iceberg_table_open_func)(const char*, const char*, IcebergTableResponse*, const void*) = NULL; -static IcebergScan* (*iceberg_scan_func)(IcebergScan*) = NULL; static IcebergScan* (*iceberg_new_scan_func)(IcebergTable*) = NULL; -static void (*iceberg_scan_free_func)(IcebergScan*) = NULL; -static int (*iceberg_arrow_stream_func)(IcebergScan*, IcebergArrowStreamResponse*, const void*) = NULL; +static int (*iceberg_scan_func)(IcebergScan**) = NULL; +static void (*iceberg_scan_free_func)(IcebergScan**) = NULL; +static int (*iceberg_arrow_stream_func)(IcebergScan**, IcebergArrowStreamResponse*, const void*) = NULL; static int (*iceberg_next_batch_func)(IcebergArrowStream*, IcebergBatchResponse*, const void*) = NULL; static void (*iceberg_table_free_func)(IcebergTable*) = NULL; static void (*iceberg_arrow_stream_free_func)(IcebergArrowStream*) = NULL; @@ -69,18 +69,19 @@ static int load_iceberg_library(const char* library_path) { return 0; } - iceberg_scan_func = (IcebergScan* (*)(IcebergScan*))dlsym(lib_handle, "iceberg_scan"); - if (!iceberg_scan_func) { - fprintf(stderr, "❌ Failed to resolve iceberg_scan: %s\n", dlerror()); - return 0; - } - iceberg_new_scan_func = (IcebergScan* (*)(IcebergTable*))dlsym(lib_handle, "iceberg_new_scan"); if (!iceberg_new_scan_func) { fprintf(stderr, "❌ Failed to resolve iceberg_new_scan: %s\n", dlerror()); return 0; } - iceberg_arrow_stream_func = (int (*)(IcebergScan*, IcebergArrowStreamResponse*, const void*))dlsym(lib_handle, "iceberg_arrow_stream"); + + iceberg_scan_func = (int (*)(IcebergScan**))dlsym(lib_handle, "iceberg_scan"); + if (!iceberg_scan_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_scan: %s\n", dlerror()); + return 0; + } + + iceberg_arrow_stream_func = (int (*)(IcebergScan**, IcebergArrowStreamResponse*, const void*))dlsym(lib_handle, "iceberg_arrow_stream"); if (!iceberg_arrow_stream_func) { fprintf(stderr, "❌ Failed to resolve iceberg_arrow_stream: %s\n", dlerror()); return 0; @@ -97,7 +98,7 @@ static int load_iceberg_library(const char* library_path) { return 0; } - iceberg_scan_free_func = (void (*)(IcebergScan*))dlsym(lib_handle, "iceberg_scan_free"); + iceberg_scan_free_func = (void (*)(IcebergScan**))dlsym(lib_handle, "iceberg_scan_free"); if (!iceberg_scan_free_func) { fprintf(stderr, "❌ Failed to resolve iceberg_scan_free: %s\n", dlerror()); return 0; @@ -146,7 +147,6 @@ static void unload_iceberg_library(void) { } } - int main(int argc, char* argv[]) { printf("Starting Iceberg C API integration test with new async API...\n"); @@ -241,24 +241,30 @@ int main(int argc, char* argv[]) { return 1; } - scan = iceberg_scan_func(scan); + // Print the scan pointer + printf("Scan pointer: %p\n", (void*)scan); - if (scan == NULL) { + result = iceberg_scan_func(&scan); + + if (result != CRESULT_OK) { printf("❌ Failed to initiate scan creation\n"); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; } + // Print the scan pointer + printf("Scan pointer: %p\n", (void*)scan); + printf("✅ Scan created successfully\n"); printf("Step 1: Initializing stream asynchronously...\n"); IcebergArrowStreamResponse stream_response = {0}; async_completed = 0; - result = iceberg_arrow_stream_func(scan, &stream_response, (const void*)(uintptr_t)&async_completed); + result = iceberg_arrow_stream_func(&scan, &stream_response, (const void*)(uintptr_t)&async_completed); if (result != CRESULT_OK) { printf("❌ Failed to create stream\n"); - iceberg_scan_free_func(scan); + iceberg_scan_free_func(&scan); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; @@ -274,7 +280,7 @@ int main(int argc, char* argv[]) { if (!async_completed) { printf("❌ Stream creation async operation timed out\n"); - iceberg_scan_free_func(scan); + iceberg_scan_free_func(&scan); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; @@ -288,7 +294,7 @@ int main(int argc, char* argv[]) { iceberg_destroy_cstring_func(stream_response.error_message); } printf("\n"); - iceberg_scan_free_func(scan); + iceberg_scan_free_func(&scan); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; @@ -296,7 +302,7 @@ int main(int argc, char* argv[]) { if (!stream_response.stream) { printf("❌ No stream returned from stream creation\n"); - iceberg_scan_free_func(scan); + iceberg_scan_free_func(&scan); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; @@ -321,7 +327,7 @@ int main(int argc, char* argv[]) { if (!async_completed) { printf("❌ Batch retrieval async operation timed out\n"); iceberg_arrow_stream_free_func(stream_response.stream); - iceberg_scan_free_func(scan); + iceberg_scan_free_func(&scan); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; @@ -335,7 +341,7 @@ int main(int argc, char* argv[]) { iceberg_destroy_cstring_func(batch_response.error_message); } iceberg_arrow_stream_free_func(stream_response.stream); - iceberg_scan_free_func(scan); + iceberg_scan_free_func(&scan); iceberg_table_free_func(table_response.table); unload_iceberg_library(); return 1; @@ -394,7 +400,7 @@ int main(int argc, char* argv[]) { // 5. Cleanup printf("Cleaning up resources...\n"); iceberg_arrow_stream_free_func(stream_response.stream); - iceberg_scan_free_func(scan); + iceberg_scan_free_func(&scan); iceberg_table_free_func(table_response.table); unload_iceberg_library(); From 9106339f0c64370ec561ddd53cf7a231060542ec Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 11 Sep 2025 17:27:39 +0200 Subject: [PATCH 35/39] . --- include/iceberg_rust_ffi.h | 4 ++-- src/lib.rs | 19 ++++++++----------- tests/integration_test.c | 6 +++--- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/include/iceberg_rust_ffi.h b/include/iceberg_rust_ffi.h index 7c20ca3..b670ef5 100644 --- a/include/iceberg_rust_ffi.h +++ b/include/iceberg_rust_ffi.h @@ -85,12 +85,12 @@ void iceberg_table_free(IcebergTable* table); // Synchronous scan creation IcebergScan* iceberg_new_scan(IcebergTable* table); -int iceberg_select_columns(IcebergScan* scan, const char** column_names, size_t num_columns); +int iceberg_select_columns(IcebergScan** scan, const char** column_names, size_t num_columns); int iceberg_scan(IcebergScan** scan); void iceberg_scan_free(IcebergScan** scan); // Async streaming API -CResult iceberg_arrow_stream(IcebergScan** scan, IcebergArrowStreamResponse* response, const void* handle); +CResult iceberg_arrow_stream(IcebergScan* scan, IcebergArrowStreamResponse* response, const void* handle); CResult iceberg_next_batch(IcebergArrowStream* stream, IcebergBatchResponse* response, const void* handle); void iceberg_arrow_stream_free(IcebergArrowStream* stream); void iceberg_arrow_batch_free(ArrowBatch* batch); diff --git a/src/lib.rs b/src/lib.rs index 5721cd9..faadd8c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,7 +8,7 @@ use arrow_array::RecordBatch; use arrow_ipc::writer::StreamWriter; use iceberg::io::FileIOBuilder; use iceberg::scan::{TableScan, TableScanBuilder}; -use iceberg::table::{self, StaticTable, Table}; +use iceberg::table::{StaticTable, Table}; use iceberg::TableIdent; // Import from object_store_ffi @@ -353,7 +353,7 @@ pub extern "C" fn iceberg_new_scan(table: *mut IcebergTable) -> *mut IcebergScan #[no_mangle] pub extern "C" fn iceberg_select_columns( - scan: *mut IcebergScan, + scan: *mut *mut IcebergScan, column_names: *const *const c_char, num_columns: usize, ) -> CResult { @@ -378,16 +378,16 @@ pub extern "C" fn iceberg_select_columns( columns.push(col_str.to_string()); } - let scan_ref = unsafe { Box::from_raw(scan) }; + let scan_ref = unsafe { Box::from_raw(*scan) }; if scan_ref.builder.is_none() { return CResult::Error; } unsafe { - *scan = IcebergScan { + *scan = Box::into_raw(Box::new(IcebergScan { builder: scan_ref.builder.map(|b| b.select(columns)), scan: scan_ref.scan, - }; + })); } return CResult::Ok; @@ -398,7 +398,7 @@ pub extern "C" fn iceberg_scan(scan: *mut *mut IcebergScan) -> CResult { if scan.is_null() { return CResult::Error; } - let mut scan_ref = unsafe { Box::from_raw(*scan) }; + let scan_ref = unsafe { Box::from_raw(*scan) }; if scan_ref.builder.is_none() { return CResult::Error; } @@ -426,7 +426,7 @@ export_runtime_op!( if scan.is_null() { return Err(anyhow::anyhow!("Null scan pointer provided")); } - let scan_ref = unsafe { &(**scan).scan }; + let scan_ref = unsafe { &(*scan).scan }; if scan_ref.is_none() { return Err(anyhow::anyhow!("Scan not initialized")); } @@ -435,15 +435,12 @@ export_runtime_op!( }, scan_ref, async { - println!("HERE 2"); - let stream = scan_ref.to_arrow().await?; - println!("HERE 3"); Ok::(IcebergArrowStream { stream: AsyncMutex::new(stream), }) }, - scan: *mut *mut IcebergScan + scan: *mut IcebergScan ); // Async function to get next batch from existing stream diff --git a/tests/integration_test.c b/tests/integration_test.c index 7684139..23f7869 100644 --- a/tests/integration_test.c +++ b/tests/integration_test.c @@ -13,7 +13,7 @@ static int (*iceberg_table_open_func)(const char*, const char*, IcebergTableResp static IcebergScan* (*iceberg_new_scan_func)(IcebergTable*) = NULL; static int (*iceberg_scan_func)(IcebergScan**) = NULL; static void (*iceberg_scan_free_func)(IcebergScan**) = NULL; -static int (*iceberg_arrow_stream_func)(IcebergScan**, IcebergArrowStreamResponse*, const void*) = NULL; +static int (*iceberg_arrow_stream_func)(IcebergScan*, IcebergArrowStreamResponse*, const void*) = NULL; static int (*iceberg_next_batch_func)(IcebergArrowStream*, IcebergBatchResponse*, const void*) = NULL; static void (*iceberg_table_free_func)(IcebergTable*) = NULL; static void (*iceberg_arrow_stream_free_func)(IcebergArrowStream*) = NULL; @@ -81,7 +81,7 @@ static int load_iceberg_library(const char* library_path) { return 0; } - iceberg_arrow_stream_func = (int (*)(IcebergScan**, IcebergArrowStreamResponse*, const void*))dlsym(lib_handle, "iceberg_arrow_stream"); + iceberg_arrow_stream_func = (int (*)(IcebergScan*, IcebergArrowStreamResponse*, const void*))dlsym(lib_handle, "iceberg_arrow_stream"); if (!iceberg_arrow_stream_func) { fprintf(stderr, "❌ Failed to resolve iceberg_arrow_stream: %s\n", dlerror()); return 0; @@ -261,7 +261,7 @@ int main(int argc, char* argv[]) { printf("Step 1: Initializing stream asynchronously...\n"); IcebergArrowStreamResponse stream_response = {0}; async_completed = 0; - result = iceberg_arrow_stream_func(&scan, &stream_response, (const void*)(uintptr_t)&async_completed); + result = iceberg_arrow_stream_func(scan, &stream_response, (const void*)(uintptr_t)&async_completed); if (result != CRESULT_OK) { printf("❌ Failed to create stream\n"); iceberg_scan_free_func(&scan); From bffe1feaa33d9bae236a8d774f59073c3029645f Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Fri, 12 Sep 2025 14:21:39 +0200 Subject: [PATCH 36/39] Add NULL check --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index faadd8c..4cfbfe3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -395,7 +395,7 @@ pub extern "C" fn iceberg_select_columns( #[no_mangle] pub extern "C" fn iceberg_scan(scan: *mut *mut IcebergScan) -> CResult { - if scan.is_null() { + if scan.is_null() || unsafe { (*scan).is_null() } { return CResult::Error; } let scan_ref = unsafe { Box::from_raw(*scan) }; From 9934fe4c1a49d56c69785338dcd030a3948ff5d9 Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Fri, 19 Sep 2025 10:06:25 +0200 Subject: [PATCH 37/39] Add scan builder functions --- include/iceberg_rust_ffi.h | 5 ++- src/lib.rs | 75 +++++++++++++++++++++++++++++++++++++- tests/integration_test.c | 10 ++--- 3 files changed, 83 insertions(+), 7 deletions(-) diff --git a/include/iceberg_rust_ffi.h b/include/iceberg_rust_ffi.h index b670ef5..5cbed86 100644 --- a/include/iceberg_rust_ffi.h +++ b/include/iceberg_rust_ffi.h @@ -86,7 +86,10 @@ void iceberg_table_free(IcebergTable* table); // Synchronous scan creation IcebergScan* iceberg_new_scan(IcebergTable* table); int iceberg_select_columns(IcebergScan** scan, const char** column_names, size_t num_columns); -int iceberg_scan(IcebergScan** scan); +int iceberg_scan_build(IcebergScan** scan); +int iceberg_scan_with_data_file_concurrency_limit(IcebergScan** scan, size_t n); +int iceberg_scan_with_manifest_entry_concurrency_limit(IcebergScan** scan, size_t n); +int iceberg_scan_with_batch_size(IcebergScan** scan, size_t n); void iceberg_scan_free(IcebergScan** scan); // Async streaming API diff --git a/src/lib.rs b/src/lib.rs index 4cfbfe3..f808423 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -394,7 +394,80 @@ pub extern "C" fn iceberg_select_columns( } #[no_mangle] -pub extern "C" fn iceberg_scan(scan: *mut *mut IcebergScan) -> CResult { +pub extern "C" fn iceberg_scan_with_data_file_concurrency_limit( + scan: *mut *mut IcebergScan, + n: usize, +) -> CResult { + if scan.is_null() { + return CResult::Error; + } + let scan_ref = unsafe { Box::from_raw(*scan) }; + + if scan_ref.builder.is_none() { + return CResult::Error; + } + + unsafe { + *scan = Box::into_raw(Box::new(IcebergScan { + builder: scan_ref + .builder + .map(|b| b.with_data_file_concurrency_limit(n)), + scan: scan_ref.scan, + })); + } + + return CResult::Ok; +} + +#[no_mangle] +pub extern "C" fn iceberg_scan_with_manifest_entry_concurrency_limit( + scan: *mut *mut IcebergScan, + n: usize, +) -> CResult { + if scan.is_null() { + return CResult::Error; + } + let scan_ref = unsafe { Box::from_raw(*scan) }; + + if scan_ref.builder.is_none() { + return CResult::Error; + } + + unsafe { + *scan = Box::into_raw(Box::new(IcebergScan { + builder: scan_ref + .builder + .map(|b| b.with_manifest_entry_concurrency_limit(n)), + scan: scan_ref.scan, + })); + } + + return CResult::Ok; +} + +#[no_mangle] +pub extern "C" fn iceberg_scan_with_batch_size(scan: *mut *mut IcebergScan, n: usize) -> CResult { + if scan.is_null() { + return CResult::Error; + } + let scan_ref = unsafe { Box::from_raw(*scan) }; + + if scan_ref.builder.is_none() { + return CResult::Error; + } + + unsafe { + *scan = Box::into_raw(Box::new(IcebergScan { + builder: scan_ref.builder.map(|b| b.with_batch_size(Some(n))), + scan: scan_ref.scan, + })); + } + + return CResult::Ok; +} + +#[no_mangle] +pub extern "C" fn iceberg_scan_build(scan: *mut *mut IcebergScan) -> CResult { if scan.is_null() || unsafe { (*scan).is_null() } { return CResult::Error; } diff --git a/tests/integration_test.c b/tests/integration_test.c index 23f7869..86bfbd9 100644 --- a/tests/integration_test.c +++ b/tests/integration_test.c @@ -11,7 +11,7 @@ static int (*iceberg_init_runtime_func)(IcebergStaticConfig config, int (*panic_callback)(void), int (*result_callback)(const void*)) = NULL; static int (*iceberg_table_open_func)(const char*, const char*, IcebergTableResponse*, const void*) = NULL; static IcebergScan* (*iceberg_new_scan_func)(IcebergTable*) = NULL; -static int (*iceberg_scan_func)(IcebergScan**) = NULL; +static int (*iceberg_scan_build_func)(IcebergScan**) = NULL; static void (*iceberg_scan_free_func)(IcebergScan**) = NULL; static int (*iceberg_arrow_stream_func)(IcebergScan*, IcebergArrowStreamResponse*, const void*) = NULL; static int (*iceberg_next_batch_func)(IcebergArrowStream*, IcebergBatchResponse*, const void*) = NULL; @@ -75,9 +75,9 @@ static int load_iceberg_library(const char* library_path) { return 0; } - iceberg_scan_func = (int (*)(IcebergScan**))dlsym(lib_handle, "iceberg_scan"); - if (!iceberg_scan_func) { - fprintf(stderr, "❌ Failed to resolve iceberg_scan: %s\n", dlerror()); + iceberg_scan_build_func = (int (*)(IcebergScan**))dlsym(lib_handle, "iceberg_scan_build"); + if (!iceberg_scan_build_func) { + fprintf(stderr, "❌ Failed to resolve iceberg_scan_build: %s\n", dlerror()); return 0; } @@ -244,7 +244,7 @@ int main(int argc, char* argv[]) { // Print the scan pointer printf("Scan pointer: %p\n", (void*)scan); - result = iceberg_scan_func(&scan); + result = iceberg_scan_build_func(&scan); if (result != CRESULT_OK) { printf("❌ Failed to initiate scan creation\n"); From c7d3fe8a3eadb00382ddc2ce8fe8edf778399326 Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Fri, 19 Sep 2025 10:37:47 +0200 Subject: [PATCH 38/39] Cleanup --- src/lib.rs | 78 ++++++++++++++++++++++++------------------------------ 1 file changed, 34 insertions(+), 44 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f808423..3c1218f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -353,11 +353,11 @@ pub extern "C" fn iceberg_new_scan(table: *mut IcebergTable) -> *mut IcebergScan #[no_mangle] pub extern "C" fn iceberg_select_columns( - scan: *mut *mut IcebergScan, + scan: &mut *mut IcebergScan, column_names: *const *const c_char, num_columns: usize, ) -> CResult { - if scan.is_null() || column_names.is_null() { + if scan.is_null() || (*scan).is_null() || column_names.is_null() { return CResult::Error; } @@ -383,22 +383,20 @@ pub extern "C" fn iceberg_select_columns( if scan_ref.builder.is_none() { return CResult::Error; } - unsafe { - *scan = Box::into_raw(Box::new(IcebergScan { - builder: scan_ref.builder.map(|b| b.select(columns)), - scan: scan_ref.scan, - })); - } + *scan = Box::into_raw(Box::new(IcebergScan { + builder: scan_ref.builder.map(|b| b.select(columns)), + scan: scan_ref.scan, + })); return CResult::Ok; } #[no_mangle] pub extern "C" fn iceberg_scan_with_data_file_concurrency_limit( - scan: *mut *mut IcebergScan, + scan: &mut *mut IcebergScan, n: usize, ) -> CResult { - if scan.is_null() { + if scan.is_null() || (*scan).is_null() { return CResult::Error; } let scan_ref = unsafe { Box::from_raw(*scan) }; @@ -407,24 +405,22 @@ pub extern "C" fn iceberg_scan_with_data_file_concurrency_limit( return CResult::Error; } - unsafe { - *scan = Box::into_raw(Box::new(IcebergScan { - builder: scan_ref - .builder - .map(|b| b.with_data_file_concurrency_limit(n)), - scan: scan_ref.scan, - })); - } + *scan = Box::into_raw(Box::new(IcebergScan { + builder: scan_ref + .builder + .map(|b| b.with_data_file_concurrency_limit(n)), + scan: scan_ref.scan, + })); return CResult::Ok; } #[no_mangle] pub extern "C" fn iceberg_scan_with_manifest_entry_concurrency_limit( - scan: *mut *mut IcebergScan, + scan: &mut *mut IcebergScan, n: usize, ) -> CResult { - if scan.is_null() { + if scan.is_null() || (*scan).is_null() { return CResult::Error; } let scan_ref = unsafe { Box::from_raw(*scan) }; @@ -433,21 +429,19 @@ pub extern "C" fn iceberg_scan_with_manifest_entry_concurrency_limit( return CResult::Error; } - unsafe { - *scan = Box::into_raw(Box::new(IcebergScan { - builder: scan_ref - .builder - .map(|b| b.with_manifest_entry_concurrency_limit(n)), - scan: scan_ref.scan, - })); - } + *scan = Box::into_raw(Box::new(IcebergScan { + builder: scan_ref + .builder + .map(|b| b.with_manifest_entry_concurrency_limit(n)), + scan: scan_ref.scan, + })); return CResult::Ok; } #[no_mangle] -pub extern "C" fn iceberg_scan_with_batch_size(scan: *mut *mut IcebergScan, n: usize) -> CResult { - if scan.is_null() { +pub extern "C" fn iceberg_scan_with_batch_size(scan: &mut *mut IcebergScan, n: usize) -> CResult { + if scan.is_null() || (*scan).is_null() { return CResult::Error; } let scan_ref = unsafe { Box::from_raw(*scan) }; @@ -456,19 +450,17 @@ pub extern "C" fn iceberg_scan_with_batch_size(scan: *mut *mut IcebergScan, n: u return CResult::Error; } - unsafe { - *scan = Box::into_raw(Box::new(IcebergScan { - builder: scan_ref.builder.map(|b| b.with_batch_size(Some(n))), - scan: scan_ref.scan, - })); - } + *scan = Box::into_raw(Box::new(IcebergScan { + builder: scan_ref.builder.map(|b| b.with_batch_size(Some(n))), + scan: scan_ref.scan, + })); return CResult::Ok; } #[no_mangle] -pub extern "C" fn iceberg_scan_build(scan: *mut *mut IcebergScan) -> CResult { - if scan.is_null() || unsafe { (*scan).is_null() } { +pub extern "C" fn iceberg_scan_build(scan: &mut *mut IcebergScan) -> CResult { + if scan.is_null() || (*scan).is_null() { return CResult::Error; } let scan_ref = unsafe { Box::from_raw(*scan) }; @@ -479,12 +471,10 @@ pub extern "C" fn iceberg_scan_build(scan: *mut *mut IcebergScan) -> CResult { match builder.build() { Ok(table_scan) => { - unsafe { - *scan = Box::into_raw(Box::new(IcebergScan { - builder: None, - scan: Some(table_scan), - })); - } + *scan = Box::into_raw(Box::new(IcebergScan { + builder: None, + scan: Some(table_scan), + })); CResult::Ok } Err(_) => CResult::Error, From d25ff6c458f608025089763ff927af31dec2a982 Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Fri, 19 Sep 2025 11:46:09 +0200 Subject: [PATCH 39/39] . --- src/lib.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 3c1218f..85d0187 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -547,10 +547,9 @@ pub extern "C" fn iceberg_table_free(table: *mut IcebergTable) { } #[no_mangle] -pub extern "C" fn iceberg_scan_free(scan: *mut *mut IcebergScan) { +pub extern "C" fn iceberg_scan_free(scan: &mut *mut IcebergScan) { if !scan.is_null() { unsafe { - //let ptr = Box::from_raw(scan); let _ = Box::from_raw(*scan); *scan = ptr::null_mut(); }