diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 81bdfee9..9acbbbd6 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -8,7 +8,7 @@ - **Build system**: CMake (3.5+) with DuckDB's extension CI tools (`extension-ci-tools/`) - **Dependency manager**: vcpkg (managed via `vcpkg.json`) - **Key dependencies**: `nlohmann-json`, `curl`, `gtest` (see `vcpkg.json`) -- **DuckDB version targeted**: v1.4.4 (see `MainDistributionPipeline.yml`) +- **DuckDB version targeted**: v1.5.0 (see `MainDistributionPipeline.yml`) ## Repository Layout @@ -112,7 +112,7 @@ Always run `clang-format` on modified C++ files before committing. The CI pipeli Defined in `.github/workflows/MainDistributionPipeline.yml`: -- **duckdb-stable-build**: Builds extension binaries for all platforms using DuckDB v1.4.4 CI tools. +- **duckdb-stable-build**: Builds extension binaries for all platforms using DuckDB v1.5.0 CI tools. - **code-quality-check**: Runs `clang-format` and `clang-tidy` checks. Triggered on push to `main`/`dev` when `src/`, `test/`, `CMakeLists.txt`, or workflow files change, and on `workflow_dispatch`. diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 9f5749d6..f7d47247 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -24,17 +24,17 @@ concurrency: jobs: duckdb-stable-build: name: Build extension binaries - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.4.4 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.5.0 with: - duckdb_version: v1.4.4 - ci_tools_version: v1.4.4 + duckdb_version: v1.5.0 + ci_tools_version: v1.5.0 extension_name: flock code-quality-check: name: Code Quality Check - uses: duckdb/extension-ci-tools/.github/workflows/_extension_code_quality.yml@v1.4.4 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_code_quality.yml@v1.5.0 with: - duckdb_version: v1.4.4 - ci_tools_version: v1.4.4 + duckdb_version: v1.5.0 + ci_tools_version: v1.5.0 extension_name: flock - format_checks: 'format;tidy' + format_checks: "format;tidy" \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 383772e3..633a0583 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,6 +21,13 @@ if(NOT EMSCRIPTEN) endif() find_package(nlohmann_json CONFIG REQUIRED) +# Coverage instrumentation (must be before targets are built) +if(CMAKE_BUILD_TYPE STREQUAL "Coverage") + message(STATUS "Enabling code coverage") + add_compile_options(-fprofile-instr-generate -fcoverage-mapping) + add_link_options(-fprofile-instr-generate -fcoverage-mapping) +endif() + # Build the DuckDB static and loadable extensions build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) @@ -31,19 +38,6 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION target_link_libraries(${EXTENSION_NAME} -lstdc++fs) endif() -# Check if we're in debug mode and enable AddressSanitizer -if(CMAKE_BUILD_TYPE STREQUAL "Debug") - message(STATUS "Enabling AddressSanitizer for Debug build") - # Enable AddressSanitizer - target_compile_options(${EXTENSION_NAME} PRIVATE -fsanitize=address - -fno-omit-frame-pointer) - target_link_options(${EXTENSION_NAME} PRIVATE -fsanitize=address) - - target_compile_options(${LOADABLE_EXTENSION_NAME} - PRIVATE -fsanitize=address -fno-omit-frame-pointer) - target_link_options(${LOADABLE_EXTENSION_NAME} PRIVATE -fsanitize=address) -endif() - # Link libraries for the static extension if(NOT EMSCRIPTEN) target_link_libraries(${EXTENSION_NAME} CURL::libcurl) @@ -56,8 +50,6 @@ if(NOT EMSCRIPTEN) endif() target_link_libraries(${LOADABLE_EXTENSION_NAME} nlohmann_json::nlohmann_json) -# WASM builds use EM_JS with synchronous XMLHttpRequest for HTTP - # Install the extension install( TARGETS ${EXTENSION_NAME} @@ -65,12 +57,6 @@ install( LIBRARY DESTINATION "${INSTALL_LIB_DIR}" ARCHIVE DESTINATION "${INSTALL_LIB_DIR}") -if(CMAKE_BUILD_TYPE STREQUAL "Coverage") - message(STATUS "Enabling code coverage for Debug build") - add_compile_options(-fprofile-instr-generate -fcoverage-mapping) - add_link_options(-fprofile-instr-generate -fcoverage-mapping) -endif() - if(NOT EMSCRIPTEN) # Add the test directory if not on WASM enable_testing() diff --git a/README.md b/README.md index 09563fcc..e0147437 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ To cite the project: - **WASM Support**: Compile Flock as a DuckDB-WASM loadable extension to run in the browser, enabling client-side analytics and demos without server infrastructure. - **LLM Metrics Tracking**: Track token usage, API latency, and execution time through dedicated functions like `flock_get_metrics()` for better cost and performance monitoring. - **Audio Transcription**: Send audio inputs to OpenAI or Azure and obtain text transcripts using the same `context_columns` abstraction (with `type: 'audio'`). -- **DuckDB v1.4.4**: Upgraded to DuckDB **1.4.4**, inheriting the latest performance and stability improvements. +- **DuckDB v1.5.0**: Upgraded to DuckDB **1.5.0**, inheriting the latest performance and stability improvements. - **Architecture Improvements**: Centralized bind data and RAII-based storage guards reduce duplication and improve robustness across scalar and aggregate functions. - **Developer Experience**: Interactive build scripts, improved extension CI tooling, and GitHub Copilot agent instructions streamline local development and contributions. @@ -90,7 +90,7 @@ To cite the project: ### 📝 Prerequisites -1. **DuckDB**: Version **1.4.4 or later**. Install it from the official [DuckDB installation guide](https://duckdb.org/docs/installation/). +1. **DuckDB**: Version **1.5.0 or later**. Install it from the official [DuckDB installation guide](https://duckdb.org/docs/installation/). 2. **Supported Providers**: Ensure you have credentials or API keys for at least one of the supported providers: - OpenAI - Azure diff --git a/duckdb b/duckdb index 6ddac802..3a3967aa 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 6ddac802ffa9bcfbcc3f5f0d71de5dff9b0bc250 +Subproject commit 3a3967aa8190d0a2d1931d4ca4f5d920760030b4 diff --git a/extension-ci-tools b/extension-ci-tools index 86fa59ca..02fb3fd3 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit 86fa59ca22c3f5dcbe7e1d17aea6b79c97cb3616 +Subproject commit 02fb3fd377ba6c46d61b1163413961558cecf5a3 diff --git a/extension_config.cmake b/extension_config.cmake index 3205e38b..46dcd65e 100644 --- a/extension_config.cmake +++ b/extension_config.cmake @@ -1,7 +1,9 @@ # This file is included by DuckDB's build system. It specifies which extension # to load +# Ensure dependencies are loaded before flock bootstraps config +duckdb_extension_load(core_functions) +duckdb_extension_load(json) + # Extension from this repo duckdb_extension_load(flock SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR} LOAD_TESTS) - -# Any extra extensions that should be built e.g.: duckdb_extension_load(json) diff --git a/src/core/config/model.cpp b/src/core/config/model.cpp index b85d558e..eda6e989 100644 --- a/src/core/config/model.cpp +++ b/src/core/config/model.cpp @@ -9,7 +9,6 @@ std::string Config::get_user_defined_models_table_name() { return "FLOCKMTL_MODE void Config::SetupDefaultModelsConfig(duckdb::Connection& con, std::string& schema_name) { const std::string table_name = Config::get_default_models_table_name(); - con.Query("INSTALL JSON; LOAD JSON;"); con.Query(duckdb_fmt::format(" CREATE TABLE IF NOT EXISTS {}.{} ( " " model_name VARCHAR NOT NULL PRIMARY KEY, " " model VARCHAR NOT NULL, " @@ -33,7 +32,6 @@ void Config::SetupDefaultModelsConfig(duckdb::Connection& con, std::string& sche void Config::SetupUserDefinedModelsConfig(duckdb::Connection& con, std::string& schema_name) { const std::string table_name = Config::get_user_defined_models_table_name(); - con.Query("INSTALL JSON; LOAD JSON;"); con.Query(duckdb_fmt::format(" CREATE TABLE IF NOT EXISTS {}.{} ( " " model_name VARCHAR NOT NULL PRIMARY KEY, " " model VARCHAR NOT NULL, " diff --git a/src/flock_extension.cpp b/src/flock_extension.cpp index faef6dc4..f2f21d0e 100644 --- a/src/flock_extension.cpp +++ b/src/flock_extension.cpp @@ -15,11 +15,11 @@ namespace duckdb { static void LoadInternal(ExtensionLoader& loader) { flock::Config::Configure(loader); - // Register the custom parser + // Register parser and binder hooks using extension registration APIs. auto& config = DBConfig::GetConfig(loader.GetDatabaseInstance()); DuckParserExtension duck_parser; - config.parser_extensions.push_back(duck_parser); - config.operator_extensions.push_back(make_uniq()); + ParserExtension::Register(config, duck_parser); + OperatorExtension::Register(config, make_shared_ptr()); } ParserExtensionParseResult duck_parse(ParserExtensionInfo*, const std::string& query) { diff --git a/src/functions/scalar/llm_filter/implementation.cpp b/src/functions/scalar/llm_filter/implementation.cpp index e0a4419f..363e7829 100644 --- a/src/functions/scalar/llm_filter/implementation.cpp +++ b/src/functions/scalar/llm_filter/implementation.cpp @@ -84,11 +84,16 @@ void LlmFilter::Execute(duckdb::DataChunk& args, duckdb::ExpressionState& state, auto& func_expr = state.expr.Cast(); auto* bind_data = &func_expr.bind_info->Cast(); - const auto results = LlmFilter::Operation(args, bind_data); - - auto index = 0; - for (const auto& res: results) { - result.SetValue(index++, duckdb::Value(res)); + if (const auto results = LlmFilter::Operation(args, bind_data); static_cast(results.size()) == 1) { + auto empty_vec = duckdb::Vector(std::string()); + duckdb::UnaryExecutor::Execute( + empty_vec, result, args.size(), + [&](duckdb::string_t name) { return duckdb::StringVector::AddString(result, results[0]); }); + } else { + auto index = 0; + for (const auto& res: results) { + result.SetValue(index++, duckdb::Value(res)); + } } auto exec_end = std::chrono::high_resolution_clock::now(); diff --git a/src/include/flock_extension.hpp b/src/include/flock_extension.hpp index 4c098257..f8ce44d1 100644 --- a/src/include/flock_extension.hpp +++ b/src/include/flock_extension.hpp @@ -1,6 +1,8 @@ #pragma once #include "flock/core/common.hpp" +#include "duckdb/parser/parser_extension.hpp" +#include "duckdb/planner/operator_extension.hpp" namespace duckdb { diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 0bfbc02f..53b53570 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -1,13 +1,25 @@ find_package(GTest CONFIG REQUIRED) -file(GLOB_RECURSE TEST_SOURCES *.cpp) -list(REMOVE_ITEM TEST_SOURCES "test_main.cpp") +file(GLOB_RECURSE TEST_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) +list(FILTER TEST_SOURCES EXCLUDE REGEX ".*test_main\\.cpp$") file(COPY unit_test.db DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) add_executable(${PROJECT_NAME}_tests test_main.cpp ${TEST_SOURCES}) -target_link_libraries(${PROJECT_NAME}_tests PRIVATE ${PROJECT_NAME}_extension - GTest::gtest GTest::gmock) +# GNU ld (Linux + MinGW) needs --start-group/--end-group for circular deps +# jemalloc is only built on Linux +target_link_libraries(${PROJECT_NAME}_tests PRIVATE + GTest::gtest + GTest::gmock + $<$:-Wl,--start-group> + ${PROJECT_NAME}_extension + duckdb_generated_extension_loader + core_functions_extension + json_extension + parquet_extension + $<$:jemalloc_extension> + duckdb_static + $<$:-Wl,--end-group>) add_test(AllTestsInMain ${PROJECT_NAME}_tests)