From 7e43997898d17718019d1eab58b8ae58e46f4d87 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 6 Jun 2025 23:50:00 +0200 Subject: [PATCH 01/13] Fix duckdb v1.3.0 (local branch) (#26) * feat: Update duckdb and fix parquet scan * Update MainDistributionPipeline to v1.3.0 * v1.3 build --------- Co-authored-by: Votre Nom --- .../workflows/MainDistributionPipeline.yml | 16 +++++------ chsql/src/duck_flock.cpp | 2 +- chsql/src/parquet_ordered_scan.cpp | 27 +++++++++---------- duckdb | 2 +- 4 files changed, 23 insertions(+), 24 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 9c03355..452937d 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -28,23 +28,23 @@ jobs: # We have to build v1.2.0 based due to go-duckdb restrictions duckdb-1-2-0-build: name: Build extension binaries - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.2.1 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.3.0 with: - duckdb_version: v1.2.0 - ci_tools_version: v1.2.0 + duckdb_version: v1.3.0 + ci_tools_version: v1.3.0 extension_name: chsql duckdb-stable-build: name: Build extension binaries - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.2.1 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.3.0 with: - duckdb_version: v1.2.1 - ci_tools_version: v1.2.1 + duckdb_version: v1.3.0 + ci_tools_version: v1.3.0 extension_name: chsql release-all-artifacts: name: Process Extension Artifacts - needs: [duckdb-1-2-0-build, duckdb-stable-build] + needs: [duckdb-1-3-0-build, duckdb-stable-build] if: github.event_name == 'release' && github.event.action == 'published' runs-on: ubuntu-latest steps: @@ -72,4 +72,4 @@ jobs: - name: Upload Release Assets uses: softprops/action-gh-release@v1 with: - files: to-upload/* \ No newline at end of file + files: to-upload/* diff --git a/chsql/src/duck_flock.cpp b/chsql/src/duck_flock.cpp index 79a821d..b93eaaa 100644 --- a/chsql/src/duck_flock.cpp +++ b/chsql/src/duck_flock.cpp @@ -104,7 +104,7 @@ namespace duckdb { try { if (res->TryFetch(data_chunk, error_data)) { - if (data_chunk && !data_chunk->size() == 0) { + if (data_chunk && data_chunk->size() != 0) { output.Append(*data_chunk); return; } diff --git a/chsql/src/parquet_ordered_scan.cpp b/chsql/src/parquet_ordered_scan.cpp index e41117b..81dd069 100644 --- a/chsql/src/parquet_ordered_scan.cpp +++ b/chsql/src/parquet_ordered_scan.cpp @@ -2,7 +2,7 @@ #include "duckdb/common/exception.hpp" #include #include "chsql_extension.hpp" -#include +#include #include "chsql_parquet_types.h" namespace duckdb { @@ -35,11 +35,10 @@ namespace duckdb { haveAbsentColumns = true; continue; } - columnMap.push_back(schema_column - reader->metadata->metadata->schema.begin() - 1); - reader->reader_data.column_ids.push_back( - schema_column - reader->metadata->metadata->schema.begin() - 1); - reader->reader_data.column_mapping.push_back( - it - returnCols.begin()); + columnMap.push_back(static_cast(schema_column - reader->metadata->metadata->schema.begin() - 1)); + reader->column_ids.push_back( + MultiFileLocalColumnId(static_cast(schema_column - reader->metadata->metadata->schema.begin() - 1))); + reader->column_indexes.emplace_back(static_cast(it - returnCols.begin())); } auto order_by_column_it = find_if( reader->metadata->metadata->schema.begin(), @@ -55,7 +54,7 @@ namespace duckdb { } void Scan(ClientContext& ctx) { chunk->Reset(); - reader->Scan(*scanState, *chunk); + reader->Scan(ctx, *scanState, *chunk); if (!haveAbsentColumns || chunk->size() == 0) { return; } @@ -180,7 +179,7 @@ namespace duckdb { ParquetOptions po; po.binary_as_string = true; set->reader = make_uniq(context, file, po, nullptr); - res.push_back(move(set)); + res.push_back(std::move(set)); } } @@ -189,16 +188,16 @@ namespace duckdb { Connection conn(*context.db); auto res = make_uniq(); auto files = ListValue::GetChildren(input.inputs[0]); - vector fileNames; + vector fileInfoList; for (auto & file : files) { - fileNames.push_back(file.ToString()); + fileInfoList.emplace_back(file.ToString()); } - GlobMultiFileList fileList(context, fileNames, FileGlobOptions::ALLOW_EMPTY); - string filename; + GlobMultiFileList fileList(context, fileInfoList, FileGlobOptions::ALLOW_EMPTY); + OpenFileInfo file_info; MultiFileListScanData it; fileList.InitializeScan(it); - while (fileList.Scan(it, filename)) { - res->files.push_back(filename); + while (fileList.Scan(it, file_info)) { + res->files.push_back(file_info.path); } if (res->files.empty()) { throw InvalidInputException("No files matched the provided pattern."); diff --git a/duckdb b/duckdb index 7c0f857..71c5c07 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 7c0f8574bda9af7aa5b23166d7860d68ae3b9481 +Subproject commit 71c5c07cdd295e9409c0505885033ae9eb6b5ddd From 848b475dbe2efa583c522cb50d0020f582d3fe63 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 6 Jun 2025 23:53:13 +0200 Subject: [PATCH 02/13] Update MainDistributionPipeline.yml --- .../workflows/MainDistributionPipeline.yml | 56 ++----------------- 1 file changed, 5 insertions(+), 51 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 452937d..5fc829a 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -4,34 +4,20 @@ name: Main Extension Distribution Pipeline on: push: - paths-ignore: - - "*/**.md" - - "*/**.yml" pull_request: workflow_dispatch: - release: - types: [published] concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' && github.sha || '' }} cancel-in-progress: true jobs: -# Temporarily disabled because main is broken -# duckdb-next-build: -# name: Build extension binaries (next) -# uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main -# with: -# duckdb_version: 1.1.2 -# ci_tools_version: 1.1.2 -# extension_name: chsql - # We have to build v1.2.0 based due to go-duckdb restrictions - duckdb-1-2-0-build: + duckdb-next-build: name: Build extension binaries - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.3.0 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main with: - duckdb_version: v1.3.0 - ci_tools_version: v1.3.0 + duckdb_version: main + ci_tools_version: main extension_name: chsql duckdb-stable-build: @@ -41,35 +27,3 @@ jobs: duckdb_version: v1.3.0 ci_tools_version: v1.3.0 extension_name: chsql - - release-all-artifacts: - name: Process Extension Artifacts - needs: [duckdb-1-3-0-build, duckdb-stable-build] - if: github.event_name == 'release' && github.event.action == 'published' - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Download all artifacts - uses: actions/download-artifact@v4 - with: - path: downloaded-artifacts - - - name: List downloaded artifacts - run: | - mkdir to-upload; \ - echo "Artifacts downloaded:" \ - ls -la downloaded-artifacts; \ - for l in `ls downloaded-artifacts`; do \ - VER=`echo $l | cut -d '-' -f 2`; \ - ARCH=`echo $l| cut -d '-' -f 4`; \ - EXT=`ls downloaded-artifacts/$l | cut -b 7-`; \ - mv downloaded-artifacts/$l/chsql.$EXT to-upload/chsql.$VER.$ARCH.$EXT; \ - done; \ - echo "Artifacts to be uploaded:" \ - ls -la to-upload - - - name: Upload Release Assets - uses: softprops/action-gh-release@v1 - with: - files: to-upload/* From 050b6622bbbe7e3852c9fe7f8ecb1c5814b3ad71 Mon Sep 17 00:00:00 2001 From: lmangani <> Date: Fri, 6 Jun 2025 22:13:32 +0000 Subject: [PATCH 03/13] resync --- extension-ci-tools | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extension-ci-tools b/extension-ci-tools index 58970c5..71d2002 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit 58970c538d35919db875096460c05806056f4de0 +Subproject commit 71d20029c5314dfc34f3bbdab808b9bce03b8003 From 34c6e7c8b5d40e0d6ec39a7957327eee566a85dc Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Sat, 7 Jun 2025 00:32:26 +0200 Subject: [PATCH 04/13] Update MainDistributionPipeline.yml --- .github/workflows/MainDistributionPipeline.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 5fc829a..981f386 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -12,13 +12,13 @@ concurrency: cancel-in-progress: true jobs: - duckdb-next-build: - name: Build extension binaries - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main - with: - duckdb_version: main - ci_tools_version: main - extension_name: chsql +# duckdb-next-build: +# name: Build extension binaries +# uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main +# with: +# duckdb_version: main +# ci_tools_version: main +# extension_name: chsql duckdb-stable-build: name: Build extension binaries From 9711634a583f2f12805faf6d98ea4de82861dd73 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Sat, 7 Jun 2025 01:18:48 +0200 Subject: [PATCH 05/13] Update CMakeLists.txt --- chsql/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index a3375c5..3863bad 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -10,6 +10,11 @@ set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) set(CHSQL_DUCKDB_VERSION ${DUCKDB_MAJOR_VERSION}) project(${TARGET_NAME}) +set_source_files_properties( + ${CMAKE_SOURCE_DIR}/third_party/mbedtls/version + PROPERTIES HEADER_FILE_ONLY FALSE +) + include_directories( ./src/include ./src From a7b3ca36ef5e571dbec03b5e80788e0fad54f5ca Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Sat, 7 Jun 2025 01:27:51 +0200 Subject: [PATCH 06/13] Update CMakeLists.txt --- chsql/CMakeLists.txt | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index 3863bad..bb19615 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -10,11 +10,6 @@ set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) set(CHSQL_DUCKDB_VERSION ${DUCKDB_MAJOR_VERSION}) project(${TARGET_NAME}) -set_source_files_properties( - ${CMAKE_SOURCE_DIR}/third_party/mbedtls/version - PROPERTIES HEADER_FILE_ONLY FALSE -) - include_directories( ./src/include ./src @@ -24,7 +19,9 @@ include_directories( ../duckdb/third_party/thrift ../duckdb/third_party/snappy ../duckdb/third_party/zstd/include +if(!APPLE) ../duckdb/third_party/mbedtls +endif() ../duckdb/third_party/mbedtls/include ../duckdb/third_party/brotli/include) set(EXTENSION_SOURCES src/chsql_extension.cpp src/duck_flock.cpp src/chsql_system.cpp src/parquet_types.cpp) From 1b278eeb8a8f1ad9904f2c0ca4aaf74b2e49acd5 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Sat, 7 Jun 2025 01:34:10 +0200 Subject: [PATCH 07/13] Update CMakeLists.txt --- chsql/CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index bb19615..b14d8c7 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -19,9 +19,6 @@ include_directories( ../duckdb/third_party/thrift ../duckdb/third_party/snappy ../duckdb/third_party/zstd/include -if(!APPLE) - ../duckdb/third_party/mbedtls -endif() ../duckdb/third_party/mbedtls/include ../duckdb/third_party/brotli/include) set(EXTENSION_SOURCES src/chsql_extension.cpp src/duck_flock.cpp src/chsql_system.cpp src/parquet_types.cpp) From 00fada3a339cbfba2bfee8122a87c62afb85fd01 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Sat, 7 Jun 2025 11:55:45 +0200 Subject: [PATCH 08/13] Update CMakeLists.txt --- chsql/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index b14d8c7..80d7062 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -22,6 +22,11 @@ include_directories( ../duckdb/third_party/mbedtls/include ../duckdb/third_party/brotli/include) set(EXTENSION_SOURCES src/chsql_extension.cpp src/duck_flock.cpp src/chsql_system.cpp src/parquet_types.cpp) +# Exclude mbedtls/version from the build +set_source_files_properties( + ${CMAKE_SOURCE_DIR}/third_party/mbedtls/version + PROPERTIES HEADER_FILE_ONLY TRUE +) build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) # Link OpenSSL in both the static library as the loadable extension From 0a91e78720900de5a7c06e41135eafb62979ca2d Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Sat, 7 Jun 2025 12:05:05 +0200 Subject: [PATCH 09/13] Update CMakeLists.txt --- chsql/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index 80d7062..db411c8 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -19,7 +19,7 @@ include_directories( ../duckdb/third_party/thrift ../duckdb/third_party/snappy ../duckdb/third_party/zstd/include - ../duckdb/third_party/mbedtls/include +# ../duckdb/third_party/mbedtls/include ../duckdb/third_party/brotli/include) set(EXTENSION_SOURCES src/chsql_extension.cpp src/duck_flock.cpp src/chsql_system.cpp src/parquet_types.cpp) # Exclude mbedtls/version from the build @@ -30,8 +30,8 @@ set_source_files_properties( build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) # Link OpenSSL in both the static library as the loadable extension -target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto) -target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto) +target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto duckdb_mbedtls) +target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto duckdb_mbedtls) install( TARGETS ${EXTENSION_NAME} EXPORT "${DUCKDB_EXPORT_SET}" From f77cd42c764d47e85127ea3b91561e7aefacd00d Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Sat, 7 Jun 2025 12:43:07 +0200 Subject: [PATCH 10/13] Update CMakeLists.txt --- chsql/CMakeLists.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index db411c8..b6193da 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -22,11 +22,6 @@ include_directories( # ../duckdb/third_party/mbedtls/include ../duckdb/third_party/brotli/include) set(EXTENSION_SOURCES src/chsql_extension.cpp src/duck_flock.cpp src/chsql_system.cpp src/parquet_types.cpp) -# Exclude mbedtls/version from the build -set_source_files_properties( - ${CMAKE_SOURCE_DIR}/third_party/mbedtls/version - PROPERTIES HEADER_FILE_ONLY TRUE -) build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) # Link OpenSSL in both the static library as the loadable extension From 1300eb9bc1e55a7a094c8a56e3f8336073b8b784 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Sat, 7 Jun 2025 12:51:34 +0200 Subject: [PATCH 11/13] use latest ci tools --- .github/workflows/MainDistributionPipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 981f386..c130c02 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -22,7 +22,7 @@ jobs: duckdb-stable-build: name: Build extension binaries - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.3.0 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main with: duckdb_version: v1.3.0 ci_tools_version: v1.3.0 From 267d1913781a7f38a67b93cbb8faa8fdc5fe546f Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Sat, 7 Jun 2025 12:55:56 +0200 Subject: [PATCH 12/13] Update extension_config.cmake --- chsql/extension_config.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chsql/extension_config.cmake b/chsql/extension_config.cmake index 4ec1a10..7ae21d2 100644 --- a/chsql/extension_config.cmake +++ b/chsql/extension_config.cmake @@ -8,7 +8,7 @@ include_directories( ../duckdb/third_party/thrift ../duckdb/third_party/snappy ../duckdb/third_party/zstd/include - ../duckdb/third_party/mbedtls +# ../duckdb/third_party/mbedtls ../duckdb/third_party/mbedtls/include ../duckdb/third_party/brotli/include) From f1f64837c09a7cd53541c892cb537a49d0d81845 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Sat, 7 Jun 2025 13:04:31 +0200 Subject: [PATCH 13/13] Update CMakeLists.txt --- chsql/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index b6193da..919a25a 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -19,7 +19,7 @@ include_directories( ../duckdb/third_party/thrift ../duckdb/third_party/snappy ../duckdb/third_party/zstd/include -# ../duckdb/third_party/mbedtls/include + ../duckdb/third_party/mbedtls/include ../duckdb/third_party/brotli/include) set(EXTENSION_SOURCES src/chsql_extension.cpp src/duck_flock.cpp src/chsql_system.cpp src/parquet_types.cpp) build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES})