Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/copilot-instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
- **Build system**: CMake (3.5+) with DuckDB's extension CI tools (`extension-ci-tools/`)
- **Dependency manager**: vcpkg (managed via `vcpkg.json`)
- **Key dependencies**: `nlohmann-json`, `curl`, `gtest` (see `vcpkg.json`)
- **DuckDB version targeted**: v1.4.4 (see `MainDistributionPipeline.yml`)
- **DuckDB version targeted**: v1.5.0 (see `MainDistributionPipeline.yml`)

## Repository Layout

Expand Down Expand Up @@ -112,7 +112,7 @@ Always run `clang-format` on modified C++ files before committing. The CI pipeli

Defined in `.github/workflows/MainDistributionPipeline.yml`:

- **duckdb-stable-build**: Builds extension binaries for all platforms using DuckDB v1.4.4 CI tools.
- **duckdb-stable-build**: Builds extension binaries for all platforms using DuckDB v1.5.0 CI tools.
- **code-quality-check**: Runs `clang-format` and `clang-tidy` checks.

Triggered on push to `main`/`dev` when `src/`, `test/`, `CMakeLists.txt`, or workflow files change, and on `workflow_dispatch`.
Expand Down
14 changes: 7 additions & 7 deletions .github/workflows/MainDistributionPipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,17 @@ concurrency:
jobs:
duckdb-stable-build:
name: Build extension binaries
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.4.4
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.5.0
with:
duckdb_version: v1.4.4
ci_tools_version: v1.4.4
duckdb_version: v1.5.0
ci_tools_version: v1.5.0
extension_name: flock

code-quality-check:
name: Code Quality Check
uses: duckdb/extension-ci-tools/.github/workflows/_extension_code_quality.yml@v1.4.4
uses: duckdb/extension-ci-tools/.github/workflows/_extension_code_quality.yml@v1.5.0
with:
duckdb_version: v1.4.4
ci_tools_version: v1.4.4
duckdb_version: v1.5.0
ci_tools_version: v1.5.0
extension_name: flock
format_checks: 'format;tidy'
format_checks: "format;tidy"
28 changes: 7 additions & 21 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@ if(NOT EMSCRIPTEN)
endif()
find_package(nlohmann_json CONFIG REQUIRED)

# Coverage instrumentation (must be before targets are built)
if(CMAKE_BUILD_TYPE STREQUAL "Coverage")
message(STATUS "Enabling code coverage")
add_compile_options(-fprofile-instr-generate -fcoverage-mapping)
add_link_options(-fprofile-instr-generate -fcoverage-mapping)
endif()
Comment on lines +24 to +29
Copy link

Copilot AI Apr 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Coverage build type unconditionally adds Clang coverage flags (-fprofile-instr-generate -fcoverage-mapping). These flags will fail on GCC toolchains. Consider guarding by CMAKE_CXX_COMPILER_ID MATCHES "Clang" (and using --coverage/-fprofile-arcs -ftest-coverage for GNU), or documenting that Coverage builds require Clang.

Copilot uses AI. Check for mistakes.

# Build the DuckDB static and loadable extensions
build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES})
build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES})
Expand All @@ -31,19 +38,6 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION
target_link_libraries(${EXTENSION_NAME} -lstdc++fs)
endif()

# Check if we're in debug mode and enable AddressSanitizer
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
message(STATUS "Enabling AddressSanitizer for Debug build")
# Enable AddressSanitizer
target_compile_options(${EXTENSION_NAME} PRIVATE -fsanitize=address
-fno-omit-frame-pointer)
target_link_options(${EXTENSION_NAME} PRIVATE -fsanitize=address)

target_compile_options(${LOADABLE_EXTENSION_NAME}
PRIVATE -fsanitize=address -fno-omit-frame-pointer)
target_link_options(${LOADABLE_EXTENSION_NAME} PRIVATE -fsanitize=address)
endif()

# Link libraries for the static extension
if(NOT EMSCRIPTEN)
target_link_libraries(${EXTENSION_NAME} CURL::libcurl)
Expand All @@ -56,21 +50,13 @@ if(NOT EMSCRIPTEN)
endif()
target_link_libraries(${LOADABLE_EXTENSION_NAME} nlohmann_json::nlohmann_json)

# WASM builds use EM_JS with synchronous XMLHttpRequest for HTTP

# Install the extension
install(
TARGETS ${EXTENSION_NAME}
EXPORT "${DUCKDB_EXPORT_SET}"
LIBRARY DESTINATION "${INSTALL_LIB_DIR}"
ARCHIVE DESTINATION "${INSTALL_LIB_DIR}")

if(CMAKE_BUILD_TYPE STREQUAL "Coverage")
message(STATUS "Enabling code coverage for Debug build")
add_compile_options(-fprofile-instr-generate -fcoverage-mapping)
add_link_options(-fprofile-instr-generate -fcoverage-mapping)
endif()

if(NOT EMSCRIPTEN)
# Add the test directory if not on WASM
enable_testing()
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ To cite the project:
- **WASM Support**: Compile Flock as a DuckDB-WASM loadable extension to run in the browser, enabling client-side analytics and demos without server infrastructure.
- **LLM Metrics Tracking**: Track token usage, API latency, and execution time through dedicated functions like `flock_get_metrics()` for better cost and performance monitoring.
- **Audio Transcription**: Send audio inputs to OpenAI or Azure and obtain text transcripts using the same `context_columns` abstraction (with `type: 'audio'`).
- **DuckDB v1.4.4**: Upgraded to DuckDB **1.4.4**, inheriting the latest performance and stability improvements.
- **DuckDB v1.5.0**: Upgraded to DuckDB **1.5.0**, inheriting the latest performance and stability improvements.
- **Architecture Improvements**: Centralized bind data and RAII-based storage guards reduce duplication and improve robustness across scalar and aggregate functions.
- **Developer Experience**: Interactive build scripts, improved extension CI tooling, and GitHub Copilot agent instructions streamline local development and contributions.

Expand All @@ -90,7 +90,7 @@ To cite the project:

### 📝 Prerequisites

1. **DuckDB**: Version **1.4.4 or later**. Install it from the official [DuckDB installation guide](https://duckdb.org/docs/installation/).
1. **DuckDB**: Version **1.5.0 or later**. Install it from the official [DuckDB installation guide](https://duckdb.org/docs/installation/).
2. **Supported Providers**: Ensure you have credentials or API keys for at least one of the supported providers:
- OpenAI
- Azure
Expand Down
2 changes: 1 addition & 1 deletion duckdb
Submodule duckdb updated 3485 files
6 changes: 4 additions & 2 deletions extension_config.cmake
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# This file is included by DuckDB's build system. It specifies which extension
# to load

# Ensure dependencies are loaded before flock bootstraps config
duckdb_extension_load(core_functions)
duckdb_extension_load(json)

# Extension from this repo
duckdb_extension_load(flock SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR} LOAD_TESTS)

# Any extra extensions that should be built e.g.: duckdb_extension_load(json)
2 changes: 0 additions & 2 deletions src/core/config/model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ std::string Config::get_user_defined_models_table_name() { return "FLOCKMTL_MODE

void Config::SetupDefaultModelsConfig(duckdb::Connection& con, std::string& schema_name) {
const std::string table_name = Config::get_default_models_table_name();
con.Query("INSTALL JSON; LOAD JSON;");
con.Query(duckdb_fmt::format(" CREATE TABLE IF NOT EXISTS {}.{} ( "
" model_name VARCHAR NOT NULL PRIMARY KEY, "
" model VARCHAR NOT NULL, "
Expand All @@ -33,7 +32,6 @@ void Config::SetupDefaultModelsConfig(duckdb::Connection& con, std::string& sche

void Config::SetupUserDefinedModelsConfig(duckdb::Connection& con, std::string& schema_name) {
const std::string table_name = Config::get_user_defined_models_table_name();
con.Query("INSTALL JSON; LOAD JSON;");
con.Query(duckdb_fmt::format(" CREATE TABLE IF NOT EXISTS {}.{} ( "
" model_name VARCHAR NOT NULL PRIMARY KEY, "
" model VARCHAR NOT NULL, "
Expand Down
6 changes: 3 additions & 3 deletions src/flock_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ namespace duckdb {
static void LoadInternal(ExtensionLoader& loader) {
flock::Config::Configure(loader);

// Register the custom parser
// Register parser and binder hooks using extension registration APIs.
auto& config = DBConfig::GetConfig(loader.GetDatabaseInstance());
DuckParserExtension duck_parser;
config.parser_extensions.push_back(duck_parser);
config.operator_extensions.push_back(make_uniq<DuckOperatorExtension>());
ParserExtension::Register(config, duck_parser);
OperatorExtension::Register(config, make_shared_ptr<DuckOperatorExtension>());
}

ParserExtensionParseResult duck_parse(ParserExtensionInfo*, const std::string& query) {
Expand Down
15 changes: 10 additions & 5 deletions src/functions/scalar/llm_filter/implementation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,16 @@ void LlmFilter::Execute(duckdb::DataChunk& args, duckdb::ExpressionState& state,
auto& func_expr = state.expr.Cast<duckdb::BoundFunctionExpression>();
auto* bind_data = &func_expr.bind_info->Cast<LlmFunctionBindData>();

const auto results = LlmFilter::Operation(args, bind_data);

auto index = 0;
for (const auto& res: results) {
result.SetValue(index++, duckdb::Value(res));
if (const auto results = LlmFilter::Operation(args, bind_data); static_cast<int>(results.size()) == 1) {
auto empty_vec = duckdb::Vector(std::string());
duckdb::UnaryExecutor::Execute<duckdb::string_t, duckdb::string_t>(
empty_vec, result, args.size(),
[&](duckdb::string_t name) { return duckdb::StringVector::AddString(result, results[0]); });
Comment on lines +87 to +91
Copy link

Copilot AI Apr 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The results.size() == 1 branch broadcasts by executing a per-row lambda that calls StringVector::AddString for every row, which duplicates the same string in the result heap. Prefer using DuckDB’s constant vector facilities to set a single constant value for the whole chunk to reduce allocations and memory use.

Copilot uses AI. Check for mistakes.
} else {
auto index = 0;
for (const auto& res: results) {
result.SetValue(index++, duckdb::Value(res));
}
}

auto exec_end = std::chrono::high_resolution_clock::now();
Expand Down
2 changes: 2 additions & 0 deletions src/include/flock_extension.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#pragma once

#include "flock/core/common.hpp"
#include "duckdb/parser/parser_extension.hpp"
#include "duckdb/planner/operator_extension.hpp"

namespace duckdb {

Expand Down
20 changes: 16 additions & 4 deletions test/unit/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,25 @@
find_package(GTest CONFIG REQUIRED)

file(GLOB_RECURSE TEST_SOURCES *.cpp)
list(REMOVE_ITEM TEST_SOURCES "test_main.cpp")
file(GLOB_RECURSE TEST_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
list(FILTER TEST_SOURCES EXCLUDE REGEX ".*test_main\\.cpp$")
Comment on lines +3 to +4
Copy link

Copilot AI Apr 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

list(FILTER ...) requires CMake >= 3.6, but the repo’s cmake_minimum_required is 3.5. This will break configuration with the documented minimum CMake version. Either bump the minimum required CMake version, or replace this with a 3.5-compatible removal (e.g., list(REMOVE_ITEM TEST_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/test_main.cpp")).

Copilot uses AI. Check for mistakes.

file(COPY unit_test.db DESTINATION ${CMAKE_CURRENT_BINARY_DIR})

add_executable(${PROJECT_NAME}_tests test_main.cpp ${TEST_SOURCES})

target_link_libraries(${PROJECT_NAME}_tests PRIVATE ${PROJECT_NAME}_extension
GTest::gtest GTest::gmock)
# GNU ld (Linux + MinGW) needs --start-group/--end-group for circular deps
# jemalloc is only built on Linux
target_link_libraries(${PROJECT_NAME}_tests PRIVATE
GTest::gtest
GTest::gmock
$<$<CXX_COMPILER_ID:GNU>:-Wl,--start-group>
${PROJECT_NAME}_extension
duckdb_generated_extension_loader
core_functions_extension
json_extension
parquet_extension
$<$<PLATFORM_ID:Linux>:jemalloc_extension>
duckdb_static
$<$<CXX_COMPILER_ID:GNU>:-Wl,--end-group>)

add_test(AllTestsInMain ${PROJECT_NAME}_tests)
Loading