From 0f43f96c57eda3b21e2d9e9fb65f5e75f00bc3ca Mon Sep 17 00:00:00 2001 From: Dmitrij Kosmakov Date: Mon, 4 May 2026 13:18:47 +0300 Subject: [PATCH 1/2] Upgrade to duckdb 1.5.2 --- .github/workflows/MainDistributionPipeline.yml | 6 +++--- .gitignore | 1 + duckdb | 2 +- extension-ci-tools | 2 +- src/block_manager.cpp | 12 ++++++------ src/quackstore_extension.cpp | 4 ++-- 6 files changed, 14 insertions(+), 13 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 4361e31..a6dc22c 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -14,9 +14,9 @@ concurrency: jobs: duckdb-next-build: name: Build extension binaries - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.4.4 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.5.2 with: - duckdb_version: v1.4.4 - ci_tools_version: v1.4.4 + duckdb_version: v1.5.2 + ci_tools_version: v1.5.2 extension_name: quackstore exclude_archs: "windows_amd64_rtools;windows_amd64;windows_amd64_mingw;wasm_mvp;wasm_eh;wasm_threads" diff --git a/.gitignore b/.gitignore index b396bda..882c3d1 100644 --- a/.gitignore +++ b/.gitignore @@ -2,5 +2,6 @@ build .idea .venv .vscode/settings.json +.cache duckdb_unittest_tempdir/ .DS_Store diff --git a/duckdb b/duckdb index 6ddac80..8a58519 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 6ddac802ffa9bcfbcc3f5f0d71de5dff9b0bc250 +Subproject commit 8a5851971fae891f292c2714d86046ee018e9737 diff --git a/extension-ci-tools b/extension-ci-tools index af154d2..8e75e1a 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit af154d26927bc8bdc9be4b3c58cde1c00d196388 +Subproject commit 8e75e1ae0dd4bddbc4f701840f893f3bf68df3d4 diff --git a/src/block_manager.cpp b/src/block_manager.cpp index 7322018..8f69851 100644 --- a/src/block_manager.cpp +++ b/src/block_manager.cpp @@ -301,27 +301,27 @@ void BlockManager::LoadFreeList() { void BlockManager::ValidateBlockId(block_id_t block_id) const { if (block_id == INVALID_BLOCK_ID) { throw duckdb::InvalidInputException( - "Block ID cannot be INVALID_BLOCK_ID", { {"block_id", std::to_string(block_id)} - } + }, + "Block ID cannot be INVALID_BLOCK_ID" ); } if (block_id < 0) { throw duckdb::InvalidInputException( - "Block ID cannot be negative", { {"block_id", std::to_string(block_id)} - } + }, + "Block ID cannot be negative" ); } if (block_id >= max_block) { throw duckdb::InvalidInputException( - "Block ID cannot exceed max_block", { {"block_id", std::to_string(block_id)}, {"max_block", std::to_string(max_block)} - } + }, + "Block ID cannot exceed max_block" ); } } diff --git a/src/quackstore_extension.cpp b/src/quackstore_extension.cpp index aecd5c3..7c77853 100644 --- a/src/quackstore_extension.cpp +++ b/src/quackstore_extension.cpp @@ -43,11 +43,11 @@ static void LoadInternal(ExtensionLoader &loader) { loader.RegisterFunction(std::move(info)); } - auto extension_callback = make_uniq(std::move(cache)); + auto extension_callback = duckdb::make_shared_ptr(std::move(cache)); for (auto& connection : ConnectionManager::Get(instance).GetConnectionList()) { extension_callback->OnConnectionOpened(*connection); } - config.extension_callbacks.push_back(std::move(extension_callback)); + config.GetCallbackManager().Register(std::move(extension_callback)); } void QuackstoreExtension::Load(ExtensionLoader &loader) { From f1b582e67f96301177f4b493afcc229dfb5981ce Mon Sep 17 00:00:00 2001 From: Dmitrij Kosmakov Date: Tue, 5 May 2026 15:26:40 +0300 Subject: [PATCH 2/2] Bring changes from CacheFS extension --- CMakeLists.txt | 2 +- src/quackstore_filesystem.cpp | 37 ++++++++----- test/unittest/test_cache.cpp | 2 +- test/unittest/test_cachefs.cpp | 97 +++++++++++++++++++++++++++++++++- vcpkg.json | 1 + 5 files changed, 123 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 21a933b..91f0fe9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.5) +cmake_minimum_required(VERSION 3.10) # Set extension name here set(TARGET_NAME quackstore) diff --git a/src/quackstore_filesystem.cpp b/src/quackstore_filesystem.cpp index 8ec1c0b..5ebcddd 100644 --- a/src/quackstore_filesystem.cpp +++ b/src/quackstore_filesystem.cpp @@ -81,16 +81,19 @@ class CacheFileHandle : public duckdb::FileHandle { if (!cache.RetrieveFileMetadata(path, md)) { // First time caching this file - store metadata - cache.StoreFileSize(GetPath(), get_underlying_filesize()); - cache.StoreFileLastModified(GetPath(), get_underlying_last_modified()); + auto file_size = get_underlying_filesize(); + auto last_modified = get_underlying_last_modified(); + cache.StoreFileSize(GetPath(), file_size); + cache.StoreFileLastModified(GetPath(), last_modified); return; } + // Validate existing metadata + bool evict_file_entry = false; + // For mutable data, validate cache freshness if (params.data_mutable) { - bool evict_file_entry = false; - if (md.last_modified != get_underlying_last_modified()) { evict_file_entry = true; @@ -100,18 +103,26 @@ class CacheFileHandle : public duckdb::FileHandle { // Certain FS don't provide LastModified property. Let's check file size evict_file_entry = (md.file_size != get_underlying_filesize() || get_underlying_filesize() == 0); } + } - if (evict_file_entry) - { - // File changed - invalidate cache and update metadata - cache.Evict(path); + // Validate file size + if (md.file_size == 0) + { + // There are blocks cached, something is wrong - evict the entry + evict_file_entry = evict_file_entry || !md.blocks.empty(); + // Underlying file size is different - evict the entry + evict_file_entry = evict_file_entry || get_underlying_filesize() != 0; + } - duckdb::timestamp_t last_modified = get_underlying_last_modified(); - cache.StoreFileLastModified(GetPath(), last_modified); + if (evict_file_entry) + { + // File changed - invalidate cache and update metadata + cache.Evict(path); - int64_t filesize = get_underlying_filesize(); - cache.StoreFileSize(GetPath(), filesize); - } + auto last_modified = get_underlying_last_modified(); + auto file_size = get_underlying_filesize(); + cache.StoreFileLastModified(GetPath(), last_modified); + cache.StoreFileSize(GetPath(), file_size); } } catch (...) diff --git a/test/unittest/test_cache.cpp b/test/unittest/test_cache.cpp index 315ad6b..f2be73c 100644 --- a/test/unittest/test_cache.cpp +++ b/test/unittest/test_cache.cpp @@ -184,7 +184,7 @@ class CrashingBlockManager : public quackstore::BlockManager { duckdb::vector InitializeRandomData(size_t size) { std::random_device rd; // Initialize a random device std::mt19937 gen(rd()); // Seed the generator - std::uniform_int_distribution dis(0, 255); // Create a distribution in [0, 255] + std::uniform_int_distribution dis(0, 255); // Create a distribution in [0, 255] duckdb::vector data(size, 0); for (auto& byte : data) { diff --git a/test/unittest/test_cachefs.cpp b/test/unittest/test_cachefs.cpp index 77cdfe1..20ff3f3 100644 --- a/test/unittest/test_cachefs.cpp +++ b/test/unittest/test_cachefs.cpp @@ -853,7 +853,102 @@ TEST_CASE_METHOD(WithDuckDB, "CacheFileHandle constructor exception handling pre } } -TEST_CASE_METHOD(WithDuckDB, "Check QuackstoreFileSystem::FileExists", "[quackstore]") { +TEST_CASE_METHOD(WithDuckDB, "Evict entries having file size == 0, while having blocks stored or underlying file size > 0 ", "[cachefs][zero-size]") { + const duckdb::string CACHE_PATH = "/tmp/cache_zero_size_test.bin"; + const duckdb::string TEST_FS_PREFIX = "test://"; + const duckdb::string FILENAME = "/tmp/non_empty_test_file.txt"; + const duckdb::string FILE_URI = TEST_FS_PREFIX + FILENAME; + const duckdb::string CACHED_FILE_URI = QuackstoreFileSystem::SCHEMA_PREFIX + FILE_URI; + const duckdb::string FILE_CONTENT = "QUACK!"; + + // Setup test filesystem + auto test_fs = duckdb::make_uniq(TEST_FS_PREFIX); + auto& test_fs_ref = *test_fs; + test_fs_ref.ResetFileSize(); // Use real file size + test_fs_ref.SetLastModified(duckdb::timestamp_t{123}); + + // Setup cache + RemoveLocalFile(CACHE_PATH); + auto& config = duckdb::DBConfig::GetConfig(GetDBInstance()); + config.SetOptionByName(ExtensionParams::PARAM_NAME_QUACKSTORE_CACHE_PATH, duckdb::Value{CACHE_PATH}); + config.SetOptionByName(ExtensionParams::PARAM_NAME_QUACKSTORE_CACHE_ENABLED, duckdb::Value::BOOLEAN(true)); + + auto cache = duckdb::make_uniq(1024); + Cache& cache_ref = *cache; + auto cache_fs = duckdb::make_uniq(*cache); + + auto& main_fs_ref = GetDBInstance().GetFileSystem(); + main_fs_ref.UnregisterSubSystem(QuackstoreFileSystem::FILESYSTEM_NAME); + main_fs_ref.RegisterSubSystem(std::move(cache_fs)); + main_fs_ref.RegisterSubSystem(std::move(test_fs)); + + // Create non-empty file + auto local_fs = duckdb::FileSystem::CreateLocal(); + { + auto handle = local_fs->OpenFile(FILENAME, + duckdb::FileFlags::FILE_FLAGS_FILE_CREATE_NEW | + duckdb::FileFlags::FILE_FLAGS_WRITE); + REQUIRE(handle); + handle->Write((void*)FILE_CONTENT.c_str(), FILE_CONTENT.size()); + handle->Close(); + } + + auto initial_handle = main_fs_ref.OpenFile(CACHED_FILE_URI, duckdb::FileOpenFlags::FILE_FLAGS_READ); + duckdb::vector buffer(256); + main_fs_ref.Read(*initial_handle, buffer.data(), buffer.size()); // Populate cache blocks + + MetadataManager::FileMetadata md; + cache_ref.RetrieveFileMetadata(CACHED_FILE_URI, md); + REQUIRE(md.last_modified == duckdb::timestamp_t{123}); + REQUIRE(md.file_size == FILE_CONTENT.size()); + REQUIRE_FALSE(md.blocks.empty()); + initial_handle->Close(); + + cache_ref.StoreFileSize(CACHED_FILE_URI, 0); // Corrupt the metadata to have file_size = 0 + cache_ref.RetrieveFileMetadata(CACHED_FILE_URI, md); + REQUIRE(md.last_modified == duckdb::timestamp_t{123}); + REQUIRE(md.file_size == 0); + REQUIRE_FALSE(md.blocks.empty()); + + SECTION("We have blocks in cache, but cached file size == 0, should evict") { + for(bool val: {true, false}) { + INFO("PARAM_NAME_QUACKSTORE_DATA_MUTABLE: " << val); + config.SetOptionByName(ExtensionParams::PARAM_NAME_QUACKSTORE_DATA_MUTABLE, duckdb::Value::BOOLEAN(val)); + + auto handle = main_fs_ref.OpenFile(CACHED_FILE_URI, duckdb::FileOpenFlags::FILE_FLAGS_READ); + REQUIRE(handle != nullptr); + + cache_ref.RetrieveFileMetadata(CACHED_FILE_URI, md); + REQUIRE(md.last_modified == duckdb::timestamp_t{123}); + REQUIRE(md.file_size == FILE_CONTENT.size()); + REQUIRE(md.blocks.empty()); + } + } + + SECTION("Underlying file size > 0, but cached file size == 0, should evict") { + for(bool val: {true, false}) { + INFO("PARAM_NAME_QUACKSTORE_DATA_MUTABLE: " << val); + config.SetOptionByName(ExtensionParams::PARAM_NAME_QUACKSTORE_DATA_MUTABLE, duckdb::Value::BOOLEAN(val)); + + cache_ref.Evict(CACHED_FILE_URI); // First evict to remove blocks + cache_ref.StoreFileSize(CACHED_FILE_URI, 0); // Corrupt the metadata + cache_ref.StoreFileLastModified(CACHED_FILE_URI, duckdb::timestamp_t{123}); + + auto handle = main_fs_ref.OpenFile(CACHED_FILE_URI, duckdb::FileOpenFlags::FILE_FLAGS_READ); + REQUIRE(handle != nullptr); + + cache_ref.RetrieveFileMetadata(CACHED_FILE_URI, md); + REQUIRE(md.last_modified == duckdb::timestamp_t{123}); + REQUIRE(md.file_size == FILE_CONTENT.size()); + REQUIRE(md.blocks.empty()); + } + } + + // Cleanup + local_fs->RemoveFile(FILENAME); +} + +TEST_CASE_METHOD(WithDuckDB, "Check QuackstoreFileSystem::FileExists", "[cachefs]") { const auto CACHE_PATH = "/tmp/cache.bin"; RemoveLocalFile(CACHE_PATH); auto cache = Cache{16}; diff --git a/vcpkg.json b/vcpkg.json index 407341c..6aecf65 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -1,5 +1,6 @@ { "dependencies": [ + "vcpkg-cmake", "openssl", "curl" ],