diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..20a57f88 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,193 @@ +name: Release + +permissions: + contents: read + +on: + push: + tags: + - 'v*' # Match v0.3.0, v1.0.0, etc. + workflow_dispatch: # Allow manual trigger + +jobs: + # ============================================================================ + # Linux x64 Build + # ============================================================================ + linux-x64: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y cmake ninja-build build-essential + + - name: Build libzvec_c_api.so + run: | + cmake -S . -B build -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_PYTHON_BINDINGS=OFF \ + -DBUILD_TOOLS=OFF \ + -DBUILD_EXAMPLES=OFF + cmake --build build --parallel --target zvec_c_api + + - name: Verify library + run: | + echo "=== Library file ===" + ls -lh build/src/c_api/libzvec_c_api.so + echo "=== Check dependencies ===" + ldd build/src/c_api/libzvec_c_api.so || true + + - name: Create tarball + run: | + cp src/include/zvec/c_api.h . + cp build/src/c_api/libzvec_c_api.so . + tar -czvf libzvec-capi-linux-x64.tar.gz \ + c_api.h \ + libzvec_c_api.so + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: zvec-capi-linux-x64 + path: libzvec-capi-linux-x64.tar.gz + + # ============================================================================ + # Linux ARM64 Build + # ============================================================================ + linux-arm64: + runs-on: ubuntu-24.04-arm + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y cmake ninja-build build-essential + + - name: Build libzvec_c_api.so (ARM64) + run: | + cmake -S . -B build -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_PYTHON_BINDINGS=OFF \ + -DBUILD_TOOLS=OFF \ + -DBUILD_EXAMPLES=OFF + cmake --build build --parallel --target zvec_c_api + + - name: Verify library + run: | + echo "=== Library file ===" + ls -lh build/src/c_api/libzvec_c_api.so + echo "=== Check dependencies ===" + ldd build/src/c_api/libzvec_c_api.so || true + + - name: Create tarball + run: | + cp src/include/zvec/c_api.h . + cp build/src/c_api/libzvec_c_api.so . + tar -czvf libzvec-capi-linux-arm64.tar.gz \ + c_api.h \ + libzvec_c_api.so + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: zvec-capi-linux-arm64 + path: libzvec-capi-linux-arm64.tar.gz + + # ============================================================================ + # macOS Universal Build (arm64 + x86_64) + # ============================================================================ + macos-universal: + runs-on: macos-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + + - name: Install dependencies + run: | + brew install cmake ninja + + - name: Build libzvec_c_api.dylib (Universal Binary) + env: + CMAKE_OSX_ARCHITECTURES: "arm64;x86_64" + MACOSX_DEPLOYMENT_TARGET: "11.0" + run: | + cmake -S . -B build -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \ + -DCMAKE_OSX_DEPLOYMENT_TARGET="11.0" \ + -DBUILD_PYTHON_BINDINGS=OFF \ + -DBUILD_TOOLS=OFF \ + -DBUILD_EXAMPLES=OFF + cmake --build build --parallel --target zvec_c_api + + - name: Verify library + run: | + echo "=== Library file ===" + ls -lh build/src/c_api/libzvec_c_api.dylib + echo "=== Check architectures ===" + lipo -archs build/src/c_api/libzvec_c_api.dylib + + - name: Create tarball + run: | + cp src/include/zvec/c_api.h . + cp build/src/c_api/libzvec_c_api.dylib . + tar -czvf libzvec-capi-macos-universal.tar.gz \ + c_api.h \ + libzvec_c_api.dylib + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: zvec-capi-macos-universal + path: libzvec-capi-macos-universal.tar.gz + + # ============================================================================ + # Upload to GitHub Releases + # ============================================================================ + upload-release: + needs: [linux-x64, linux-arm64, macos-universal] + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # Download build artifacts for each platform + - uses: actions/download-artifact@v4 + with: + name: zvec-capi-linux-x64 + path: dist/ + + - uses: actions/download-artifact@v4 + with: + name: zvec-capi-linux-arm64 + path: dist/ + + - uses: actions/download-artifact@v4 + with: + name: zvec-capi-macos-universal + path: dist/ + + - name: List artifacts + run: ls -la dist/ + + # Upload to GitHub Releases + - uses: softprops/action-gh-release@v1 + with: + files: dist/*.tar.gz + generate_release_notes: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 52a59754..ad954faa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,9 +34,15 @@ if(DEFINED ENV{USE_OSS_MIRROR} AND NOT "$ENV{USE_OSS_MIRROR}" STREQUAL "") endif() message(STATUS "USE_OSS_MIRROR:${USE_OSS_MIRROR}") +option(BUILD_EXAMPLES "Build examples" ON) +message(STATUS "BUILD_EXAMPLES:${BUILD_EXAMPLES}") + cc_directory(thirdparty) cc_directories(src) cc_directories(tests) +if(BUILD_EXAMPLES) + cc_directories(examples) +endif() if(BUILD_TOOLS) cc_directories(tools) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 00000000..66e943ad --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2025-present the zvec project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_subdirectory(c_api) \ No newline at end of file diff --git a/examples/c_api/CMakeLists.txt b/examples/c_api/CMakeLists.txt new file mode 100644 index 00000000..759f744f --- /dev/null +++ b/examples/c_api/CMakeLists.txt @@ -0,0 +1,65 @@ +# Copyright 2025-present the zvec project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Basic example +add_executable(c_api_basic_example basic_example.c) +target_link_libraries(c_api_basic_example PRIVATE zvec_c_api) +target_include_directories(c_api_basic_example PRIVATE + ${PROJECT_SOURCE_DIR}/src/include +) +set_target_properties(c_api_basic_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/examples/c_api +) + + +# Schema example +add_executable(c_api_collection_schema_example collection_schema_example.c) +target_link_libraries(c_api_collection_schema_example PRIVATE zvec_c_api) +target_include_directories(c_api_collection_schema_example PRIVATE + ${PROJECT_SOURCE_DIR}/src/include +) +set_target_properties(c_api_collection_schema_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/examples/c_api +) + +# Struct document example +add_executable(c_api_doc_example doc_example.c) +target_link_libraries(c_api_doc_example PRIVATE zvec_c_api) +target_include_directories(c_api_doc_example PRIVATE + ${PROJECT_SOURCE_DIR}/src/include +) +set_target_properties(c_api_doc_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/examples/c_api +) + +# Index example +add_executable(c_api_index_example index_example.c) +target_link_libraries(c_api_index_example PRIVATE zvec_c_api) +set_target_properties(c_api_index_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/examples/c_api +) + +# Newly added field schema example +add_executable(c_api_field_schema_example field_schema_example.c) +target_link_libraries(c_api_field_schema_example PRIVATE zvec_c_api) +set_target_properties(c_api_field_schema_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/examples/c_api +) + +# Optimized example +add_executable(c_api_optimized_example optimized_example.c) +target_link_libraries(c_api_optimized_example PRIVATE zvec_c_api) +set_target_properties(c_api_optimized_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/examples/c_api +) diff --git a/examples/c_api/basic_example.c b/examples/c_api/basic_example.c new file mode 100644 index 00000000..2e912248 --- /dev/null +++ b/examples/c_api/basic_example.c @@ -0,0 +1,235 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + free(error_msg); + } + return error; +} + +/** + * @brief Create a simple test collection using CollectionSchema + */ +static ZVecErrorCode create_simple_test_collection( + ZVecCollection **collection) { + // Create collection schema using C API + ZVecCollectionSchema *schema = + zvec_collection_schema_create("test_collection"); + if (!schema) { + return ZVEC_ERROR_INTERNAL_ERROR; + } + + ZVecErrorCode error = ZVEC_OK; + + // Create index parameters using new macros + // clang-format off + ZVecIndexParams invert_params_val = ZVEC_INVERT_PARAMS(true, false); + ZVecIndexParams hnsw_params_val = ZVEC_HNSW_PARAMS( + ZVEC_METRIC_TYPE_COSINE, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + ZVecIndexParams *invert_params = &invert_params_val; + ZVecIndexParams *hnsw_params = &hnsw_params_val; + + // Create and add ID field (primary key) + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + zvec_field_schema_set_invert_index(id_field, invert_params); + error = zvec_collection_schema_add_field(schema, id_field); + if (error != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + return error; + } + + // Create text field (inverted index) + ZVecFieldSchema *text_field = + zvec_field_schema_create("text", ZVEC_DATA_TYPE_STRING, true, 0); + zvec_field_schema_set_invert_index(text_field, invert_params); + error = zvec_collection_schema_add_field(schema, text_field); + if (error != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + return error; + } + + // Create embedding field (HNSW index) + ZVecFieldSchema *embedding_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 3); + zvec_field_schema_set_hnsw_index(embedding_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, embedding_field); + if (error != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + return error; + } + + // Use default options + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + + // Create collection using the new API + error = zvec_collection_create_and_open("./test_collection", schema, &options, + collection); + + // Cleanup resources + zvec_collection_schema_destroy(schema); + + return error; +} + +/** + * @brief Basic C API usage example + */ +int main() { + printf("=== ZVec C API Basic Example ===\n\n"); + + ZVecErrorCode error; + + // Create collection using simplified function + ZVecCollection *collection = NULL; + error = create_simple_test_collection(&collection); + if (handle_error(error, "creating collection") != ZVEC_OK) { + return 1; + } + printf("✓ Collection created successfully\n"); + + // Prepare test data + float vector1[] = {0.1f, 0.2f, 0.3f}; + float vector2[] = {0.4f, 0.5f, 0.6f}; + + ZVecDoc *docs[2]; + for (int i = 0; i < 2; ++i) { + docs[i] = zvec_doc_create(); + if (!docs[i]) { + fprintf(stderr, "Failed to create document %d\n", i); + // Cleanup allocated resources + for (int j = 0; j < i; ++j) { + zvec_doc_destroy(docs[j]); + } + return ZVEC_ERROR_INTERNAL_ERROR; + } + } + + // Manually add fields to document 1 + zvec_doc_set_pk(docs[0], "doc1"); + zvec_doc_add_field_by_value(docs[0], "id", ZVEC_DATA_TYPE_STRING, "doc1", + strlen("doc1")); + zvec_doc_add_field_by_value(docs[0], "text", ZVEC_DATA_TYPE_STRING, + "First document", strlen("First document")); + zvec_doc_add_field_by_value(docs[0], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, + vector1, 3 * sizeof(float)); + + // Manually add fields to document 2 + zvec_doc_set_pk(docs[1], "doc2"); + zvec_doc_add_field_by_value(docs[1], "id", ZVEC_DATA_TYPE_STRING, "doc2", + strlen("doc2")); + zvec_doc_add_field_by_value(docs[1], "text", ZVEC_DATA_TYPE_STRING, + "Second document", strlen("Second document")); + zvec_doc_add_field_by_value(docs[1], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, + vector2, 3 * sizeof(float)); + + // Insert documents + size_t success_count = 0; + size_t error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)docs, 2, + &success_count, &error_count); + if (handle_error(error, "inserting documents") != ZVEC_OK) { + zvec_collection_destroy(collection); + return 1; + } + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + for (int i = 0; i < 2; ++i) { + zvec_doc_destroy(docs[i]); + } + + // Flush collection + error = zvec_collection_flush(collection); + if (handle_error(error, "flushing collection") != ZVEC_OK) { + printf("Collection flush failed\n"); + } else { + printf("✓ Collection flushed successfully\n"); + } + + // Get collection statistics + ZVecCollectionStats *stats = NULL; + error = zvec_collection_get_stats(collection, &stats); + if (handle_error(error, "getting collection stats") == ZVEC_OK) { + printf("✓ Collection stats - Document count: %llu\n", + (unsigned long long)stats->doc_count); + // Free statistics memory + zvec_collection_stats_destroy(stats); + } + + printf("Testing vector query...\n"); + // Query documents + ZVecVectorQuery query = {0}; + query.field_name = + (ZVecString){.data = "embedding", .length = strlen("embedding")}; + query.query_vector = + (ZVecByteArray){.data = (uint8_t *)vector1, .length = 3 * sizeof(float)}; + query.topk = 10; + query.filter = (ZVecString){.data = "", .length = 0}; + query.include_vector = true; + query.include_doc_id = true; + query.output_fields.strings = NULL; + query.output_fields.count = 0; + + ZVecDoc **results = NULL; + size_t result_count = 0; + error = zvec_collection_query(collection, &query, &results, &result_count); + + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + printf("[ERROR] Query failed: %s\n", + error_msg ? error_msg : "Unknown error"); + free(error_msg); + goto cleanup; + } + + printf("✓ Query successful - Returned %zu results\n", result_count); + + // Process query results + for (size_t i = 0; i < result_count && i < 5; ++i) { + const ZVecDoc *doc = results[i]; + const char *pk = zvec_doc_get_pk_copy(doc); + + printf(" Result %zu: PK=%s, DocID=%llu, Score=%.4f\n", i + 1, + pk ? pk : "NULL", (unsigned long long)zvec_doc_get_doc_id(doc), + zvec_doc_get_score(doc)); + + if (pk) { + free((void *)pk); + } + } + + // Free query results memory + zvec_docs_free(results, result_count); + +cleanup: + // Cleanup resources + zvec_collection_destroy(collection); + printf("✓ Example completed\n"); + return 0; +} \ No newline at end of file diff --git a/examples/c_api/collection_schema_example.c b/examples/c_api/collection_schema_example.c new file mode 100644 index 00000000..183cc270 --- /dev/null +++ b/examples/c_api/collection_schema_example.c @@ -0,0 +1,237 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + free(error_msg); + } + return error; +} + +/** + * @brief Collection schema creation and management example + */ +int main() { + printf("=== ZVec Collection Schema Example ===\n\n"); + + ZVecErrorCode error; + + // 1. Create collection schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("schema_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return 1; + } + printf("✓ Collection schema created successfully\n"); + + // 2. Set schema properties + schema->max_doc_count_per_segment = 1000000; + printf("✓ Set max documents per segment: %llu\n", + (unsigned long long)schema->max_doc_count_per_segment); + + // 3. Create index parameters + // clang-format off + ZVecIndexParams invert_params_val = ZVEC_INVERT_PARAMS(true, false); + // clang-format on + ZVecIndexParams *invert_params = &invert_params_val; + // clang-format off + ZVecIndexParams hnsw_params_val = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_L2, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + ZVecIndexParams *hnsw_params = &hnsw_params_val; + + if (!invert_params || !hnsw_params) { + fprintf(stderr, "Failed to create index parameters\n"); + zvec_collection_schema_destroy(schema); + return 1; + } + + // 4. Create and add ID field (primary key) + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + if (!id_field) { + fprintf(stderr, "Failed to create ID field\n"); + zvec_collection_schema_destroy(schema); + return 1; + } + + error = zvec_collection_schema_add_field(schema, id_field); + if (handle_error(error, "adding ID field") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + return 1; + } + printf("✓ ID field added successfully\n"); + + // 5. Create and add text field with inverted index + ZVecFieldSchema *text_field = + zvec_field_schema_create("content", ZVEC_DATA_TYPE_STRING, true, 0); + if (!text_field) { + fprintf(stderr, "Failed to create text field\n"); + zvec_collection_schema_destroy(schema); + return 1; + } + + zvec_field_schema_set_invert_index(text_field, invert_params); + error = zvec_collection_schema_add_field(schema, text_field); + if (handle_error(error, "adding text field") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + return 1; + } + printf("✓ Text field with inverted index added successfully\n"); + + // 6. Create and add vector field with HNSW index + ZVecFieldSchema *vector_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + if (!vector_field) { + fprintf(stderr, "Failed to create vector field\n"); + zvec_collection_schema_destroy(schema); + return 1; + } + + zvec_field_schema_set_hnsw_index(vector_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, vector_field); + if (handle_error(error, "adding vector field") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + return 1; + } + printf("✓ Vector field with HNSW index added successfully\n"); + + // 7. Check field count + // Note: This function may not exist in current API, commenting out for now + // size_t field_count = zvec_collection_schema_get_field_count(schema); + // printf("✓ Total field count: %zu\n", field_count); + + // 8. Create collection with schema + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + ZVecCollection *collection = NULL; + + error = zvec_collection_create_and_open("./schema_example_collection", schema, + &options, &collection); + if (handle_error(error, "creating collection with schema") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + return 1; + } + printf("✓ Collection created successfully with schema\n"); + + // 9. Prepare test data + float vector1[128]; + float vector2[128]; + for (int i = 0; i < 128; i++) { + vector1[i] = (float)(i + 1) / 128.0f; + vector2[i] = (float)(i + 2) / 128.0f; + } + + // 10. Create documents + ZVecDoc *docs[2]; + for (int i = 0; i < 2; i++) { + docs[i] = zvec_doc_create(); + if (!docs[i]) { + fprintf(stderr, "Failed to create document %d\n", i); + // Cleanup + for (int j = 0; j < i; j++) { + zvec_doc_destroy(docs[j]); + } + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + return 1; + } + } + + // Add fields to document 1 + zvec_doc_set_pk(docs[0], "doc1"); + zvec_doc_add_field_by_value(docs[0], "id", ZVEC_DATA_TYPE_STRING, "doc1", + strlen("doc1")); + zvec_doc_add_field_by_value(docs[0], "content", ZVEC_DATA_TYPE_STRING, + "First test document", + strlen("First test document")); + zvec_doc_add_field_by_value(docs[0], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, + vector1, 128 * sizeof(float)); + + // Add fields to document 2 + zvec_doc_set_pk(docs[1], "doc2"); + zvec_doc_add_field_by_value(docs[1], "id", ZVEC_DATA_TYPE_STRING, "doc2", + strlen("doc2")); + zvec_doc_add_field_by_value(docs[1], "content", ZVEC_DATA_TYPE_STRING, + "Second test document", + strlen("Second test document")); + zvec_doc_add_field_by_value(docs[1], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, + vector2, 128 * sizeof(float)); + + // 11. Insert documents + size_t success_count = 0, error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)docs, 2, + &success_count, &error_count); + if (handle_error(error, "inserting documents") != ZVEC_OK) { + // Cleanup + for (int i = 0; i < 2; i++) { + zvec_doc_destroy(docs[i]); + } + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + return 1; + } + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + + // Cleanup documents + for (int i = 0; i < 2; i++) { + zvec_doc_destroy(docs[i]); + } + + // 12. Flush collection + error = zvec_collection_flush(collection); + if (handle_error(error, "flushing collection") == ZVEC_OK) { + printf("✓ Collection flushed successfully\n"); + } + + // 13. Query test + ZVecVectorQuery query = {0}; + query.field_name = + (ZVecString){.data = "embedding", .length = strlen("embedding")}; + query.query_vector = (ZVecByteArray){.data = (uint8_t *)vector1, + .length = 128 * sizeof(float)}; + query.topk = 5; + query.filter = (ZVecString){.data = "", .length = 0}; + query.include_vector = true; + query.include_doc_id = true; + query.output_fields.strings = NULL; + query.output_fields.count = 0; + + ZVecDoc **results = NULL; + size_t result_count = 0; + error = zvec_collection_query(collection, &query, &results, &result_count); + if (error == ZVEC_OK) { + printf("✓ Vector query successful - Returned %zu results\n", result_count); + zvec_docs_free(results, result_count); + } + + // 14. Cleanup resources + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + printf("✓ Schema example completed\n"); + + return 0; +} \ No newline at end of file diff --git a/examples/c_api/doc_example.c b/examples/c_api/doc_example.c new file mode 100644 index 00000000..8d8574bb --- /dev/null +++ b/examples/c_api/doc_example.c @@ -0,0 +1,520 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + free(error_msg); + } + return error; +} + +/** + * @brief Create a test document with all data types + * @param doc_index Document index for generating unique data + * @return ZVecDoc* Created document pointer + */ +static ZVecDoc *create_full_type_test_doc(int doc_index) { + ZVecDoc *doc = zvec_doc_create(); + if (!doc) { + fprintf(stderr, "Failed to create document\n"); + return NULL; + } + + // Set primary key + char pk_buffer[32]; + snprintf(pk_buffer, sizeof(pk_buffer), "doc_%d", doc_index); + zvec_doc_set_pk(doc, pk_buffer); + + // Add Id field with inverted index + char id_buffer[32]; + snprintf(id_buffer, sizeof(id_buffer), "id_%d", doc_index); + zvec_doc_add_field_by_value(doc, "id", ZVEC_DATA_TYPE_STRING, id_buffer, + strlen(id_buffer)); + + // Add scalar fields with different data types + // String field + char string_value[64]; + snprintf(string_value, sizeof(string_value), "test_string_%d", doc_index); + zvec_doc_add_field_by_value(doc, "string_field", ZVEC_DATA_TYPE_STRING, + string_value, strlen(string_value)); + + // Boolean field + bool bool_value = (doc_index % 2 == 0); + zvec_doc_add_field_by_value(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_value, sizeof(bool_value)); + + // Integer fields + int32_t int32_value = doc_index * 1000; + zvec_doc_add_field_by_value(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &int32_value, sizeof(int32_value)); + + int64_t int64_value = (int64_t)doc_index * 1000000LL; + zvec_doc_add_field_by_value(doc, "int64_field", ZVEC_DATA_TYPE_INT64, + &int64_value, sizeof(int64_value)); + + // Floating point fields + float float_value = (float)doc_index * 1.5f; + zvec_doc_add_field_by_value(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &float_value, sizeof(float_value)); + + double double_value = (double)doc_index * 2.718281828; + zvec_doc_add_field_by_value(doc, "double_field", ZVEC_DATA_TYPE_DOUBLE, + &double_value, sizeof(double_value)); + + // Vector fields with different dimensions + // FP32 vector (3D) + float fp32_vector[3] = {(float)doc_index, (float)doc_index * 2.0f, + (float)doc_index * 3.0f}; + zvec_doc_add_field_by_value(doc, "vector_fp32", ZVEC_DATA_TYPE_VECTOR_FP32, + fp32_vector, 3 * sizeof(float)); + + // Larger FP32 vector (16D) + float large_vector[16]; + for (int i = 0; i < 16; i++) { + large_vector[i] = (float)(doc_index * 16 + i) / 256.0f; + } + zvec_doc_add_field_by_value(doc, "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + large_vector, 16 * sizeof(float)); + + return doc; +} + +/** + * @brief Compare two documents for equality + */ +static bool compare_documents(const ZVecDoc *doc1, const ZVecDoc *doc2) { + if (!doc1 || !doc2) return false; + + // Compare primary keys + const char *pk1 = zvec_doc_get_pk_pointer(doc1); + const char *pk2 = zvec_doc_get_pk_pointer(doc2); + + if (!pk1 || !pk2 || strcmp(pk1, pk2) != 0) { + return false; + } + + // TODO: Compare other fields and values + + return true; +} + +/** + * @brief Print document fields and their values + * @param doc The document to print + * @param doc_index Document index for identification + */ +static void print_doc(const ZVecDoc *doc, int doc_index) { + if (!doc) { + printf("Document %d: NULL document\n", doc_index); + return; + } + + printf("\n=== Document %d ===\n", doc_index); + + // Print primary key + const char *pk = zvec_doc_get_pk_pointer(doc); + printf("Primary Key: %s\n", pk ? pk : "NULL"); + + // Print document ID + uint64_t doc_id = zvec_doc_get_doc_id(doc); + printf("Document ID: %llu\n", (unsigned long long)doc_id); + + // Print score + float score = zvec_doc_get_score(doc); + printf("Score: %.6f\n", score); + + // Print scalar fields + printf("\nScalar Fields:\n"); + + // ID field (using pointer function for strings) + const void *id_value = NULL; + size_t id_size = 0; + ZVecErrorCode error = zvec_doc_get_field_value_pointer( + doc, "id", ZVEC_DATA_TYPE_STRING, &id_value, &id_size); + if (error == ZVEC_OK && id_value) { + printf(" id: %.*s\n", (int)id_size, (const char *)id_value); + } + + // String field (using pointer function for strings) + const void *string_value = NULL; + size_t string_size = 0; + error = zvec_doc_get_field_value_pointer( + doc, "string_field", ZVEC_DATA_TYPE_STRING, &string_value, &string_size); + if (error == ZVEC_OK && string_value) { + printf(" string_field: %.*s\n", (int)string_size, + (const char *)string_value); + } + + // Boolean field + bool bool_value; + error = zvec_doc_get_field_value_basic(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_value, sizeof(bool_value)); + if (error == ZVEC_OK) { + printf(" bool_field: %s\n", bool_value ? "true" : "false"); + } + + // Int32 field + int32_t int32_value; + error = + zvec_doc_get_field_value_basic(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &int32_value, sizeof(int32_value)); + if (error == ZVEC_OK) { + printf(" int32_field: %d\n", int32_value); + } + + // Int64 field + int64_t int64_value; + error = + zvec_doc_get_field_value_basic(doc, "int64_field", ZVEC_DATA_TYPE_INT64, + &int64_value, sizeof(int64_value)); + if (error == ZVEC_OK) { + printf(" int64_field: %lld\n", (long long)int64_value); + } + + // Float field + float float_value; + error = + zvec_doc_get_field_value_basic(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &float_value, sizeof(float_value)); + if (error == ZVEC_OK) { + printf(" float_field: %.6f\n", float_value); + } + + // Double field + double double_value; + error = + zvec_doc_get_field_value_basic(doc, "double_field", ZVEC_DATA_TYPE_DOUBLE, + &double_value, sizeof(double_value)); + if (error == ZVEC_OK) { + printf(" double_field: %.6f\n", double_value); + } + + // Print vector fields (using copy function for complex types) + printf("\nVector Fields:\n"); + + // FP32 vector (3D) + void *fp32_vector = NULL; + size_t fp32_size = 0; + error = zvec_doc_get_field_value_copy( + doc, "vector_fp32", ZVEC_DATA_TYPE_VECTOR_FP32, &fp32_vector, &fp32_size); + if (error == ZVEC_OK && fp32_vector) { + const float *vec = (const float *)fp32_vector; + size_t dim = fp32_size / sizeof(float); + printf(" vector_fp32 (%zuD): [", dim); + for (size_t i = 0; i < dim && i < 10; i++) { // Limit to first 10 elements + printf("%.3f", vec[i]); + if (i < dim - 1 && i < 9) printf(", "); + } + if (dim > 10) printf(", ..."); + printf("]\n"); + free(fp32_vector); // Free the allocated memory + } + + // Large vector (16D) + void *large_vector = NULL; + size_t large_size = 0; + error = zvec_doc_get_field_value_copy(doc, "large_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, + &large_vector, &large_size); + if (error == ZVEC_OK && large_vector) { + const float *vec = (const float *)large_vector; + size_t dim = large_size / sizeof(float); + printf(" large_vector (%zuD): [", dim); + for (size_t i = 0; i < dim && i < 10; i++) { // Limit to first 10 elements + printf("%.3f", vec[i]); + if (i < dim - 1 && i < 9) printf(", "); + } + if (dim > 10) printf(", ..."); + printf("]\n"); + free(large_vector); // Free the allocated memory + } + + printf("==================\n\n"); +} + +/** + * @brief Document creation, manipulation, and query example + */ +int main() { + printf("=== ZVec Document Example ===\n\n"); + + ZVecErrorCode error; + + // 1. Create collection schema for document testing + ZVecCollectionSchema *schema = + zvec_collection_schema_create("doc_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return -1; + } + printf("✓ Collection schema created\n"); + + // 2. Create index parameters + // clang-format off + ZVecIndexParams invert_params_val = ZVEC_INVERT_PARAMS(true, false); + // clang-format on + ZVecIndexParams *invert_params = &invert_params_val; + // clang-format off + ZVecIndexParams hnsw_params_val = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_L2, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + ZVecIndexParams *hnsw_params = &hnsw_params_val; + + if (!invert_params || !hnsw_params) { + fprintf(stderr, "Failed to create index parameters\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + + // 3. Create fields for all data types + printf("Creating fields for all data types...\n"); + + // Id field with inverted index + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + if (id_field) { + zvec_field_schema_set_invert_index(id_field, invert_params); + error = zvec_collection_schema_add_field(schema, id_field); + if (handle_error(error, "adding ID field") == ZVEC_OK) { + printf("✓ ID field with inverted index added\n"); + } + } + + // Scalar fields + ZVecFieldSchema *string_field = + zvec_field_schema_create("string_field", ZVEC_DATA_TYPE_STRING, true, 0); + ZVecFieldSchema *bool_field = + zvec_field_schema_create("bool_field", ZVEC_DATA_TYPE_BOOL, true, 0); + ZVecFieldSchema *int32_field = + zvec_field_schema_create("int32_field", ZVEC_DATA_TYPE_INT32, true, 0); + ZVecFieldSchema *int64_field = + zvec_field_schema_create("int64_field", ZVEC_DATA_TYPE_INT64, true, 0); + ZVecFieldSchema *float_field = + zvec_field_schema_create("float_field", ZVEC_DATA_TYPE_FLOAT, true, 0); + ZVecFieldSchema *double_field = + zvec_field_schema_create("double_field", ZVEC_DATA_TYPE_DOUBLE, true, 0); + + if (string_field) zvec_collection_schema_add_field(schema, string_field); + if (bool_field) zvec_collection_schema_add_field(schema, bool_field); + if (int32_field) zvec_collection_schema_add_field(schema, int32_field); + if (int64_field) zvec_collection_schema_add_field(schema, int64_field); + if (float_field) zvec_collection_schema_add_field(schema, float_field); + if (double_field) zvec_collection_schema_add_field(schema, double_field); + + // Vector fields + ZVecFieldSchema *vector_fp32_field = zvec_field_schema_create( + "vector_fp32", ZVEC_DATA_TYPE_VECTOR_FP32, false, 3); + ZVecFieldSchema *large_vector_field = zvec_field_schema_create( + "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 16); + + if (vector_fp32_field) { + zvec_field_schema_set_hnsw_index(vector_fp32_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, vector_fp32_field); + if (handle_error(error, "adding vector FP32 field") == ZVEC_OK) { + printf("✓ Vector FP32 field with HNSW index added\n"); + } + } + + if (large_vector_field) { + zvec_field_schema_set_hnsw_index(large_vector_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, large_vector_field); + if (handle_error(error, "adding large vector field") == ZVEC_OK) { + printf("✓ Large vector field with HNSW index added\n"); + } + } + + // 4. Create collection + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + ZVecCollection *collection = NULL; + + error = zvec_collection_create_and_open("./doc_example_collection", schema, + &options, &collection); + if (handle_error(error, "creating collection") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + return -1; + } + printf("✓ Collection created successfully\n"); + + // 5. Create and insert multiple test documents + printf("Creating and inserting test documents...\n"); + + const int doc_count = 5; + ZVecDoc *test_docs[doc_count]; + + for (int i = 0; i < doc_count; i++) { + test_docs[i] = create_full_type_test_doc(i); + if (!test_docs[i]) { + fprintf(stderr, "Failed to create document %d\n", i); + // Cleanup + for (int j = 0; j < i; j++) { + zvec_doc_destroy(test_docs[j]); + } + goto cleanup; + } + printf("✓ Created document %d with PK: %s\n", i, + zvec_doc_get_pk_pointer(test_docs[i])); + } + + // Print all documents before insertion + printf("\nDocuments before insertion:\n"); + for (int i = 0; i < doc_count; i++) { + print_doc(test_docs[i], i); + } + + // Insert documents + size_t success_count = 0, error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)test_docs, + doc_count, &success_count, &error_count); + if (handle_error(error, "inserting documents") == ZVEC_OK) { + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + } + + // 6. Flush collection + error = zvec_collection_flush(collection); + if (handle_error(error, "flushing collection") != ZVEC_OK) { + printf("Warning: Collection flush failed\n"); + } else { + printf("✓ Collection flushed successfully\n"); + } + + // Use the first document's vector for querying + float query_vector[] = {0.0f, 0.0f, 0.0f}; + ZVecVectorQuery query = { + .field_name = + (ZVecString){.data = "vector_fp32", .length = strlen("vector_fp32")}, + .query_vector = (ZVecByteArray){.data = (uint8_t *)query_vector, + .length = 3 * sizeof(float)}, + .topk = 5, + .filter = (ZVecString){.data = "", .length = 0}, + .include_vector = true, + .include_doc_id = true, + .output_fields = {.strings = NULL, .count = 0}}; + + ZVecDoc **query_results = NULL; + size_t result_count = 0; + + error = + zvec_collection_query(collection, &query, &query_results, &result_count); + if (handle_error(error, "querying documents") != ZVEC_OK) { + query_results = NULL; + result_count = 0; + } + + printf("Query returned %zu results\n", result_count); + + // Print query results + printf("\nQuery Results:\n"); + for (size_t i = 0; i < result_count; i++) { + print_doc(query_results[i], i); + } + + // Compare query results + for (size_t i = 0; i < result_count && i < doc_count; i++) { + const char *result_pk = zvec_doc_get_pk_pointer(query_results[i]); + printf("Comparing query result[%zu]: %s\n", i, result_pk); + + // Find matching original document + bool found = false; + for (int j = 0; j < doc_count; j++) { + const char *original_pk = zvec_doc_get_pk_pointer(test_docs[j]); + if (strcmp(result_pk, original_pk) == 0) { + if (compare_documents(test_docs[j], query_results[i])) { + printf("✓ Query result %s matches original document\n", result_pk); + } else { + printf("✗ Query result %s does not match original document\n", + result_pk); + } + found = true; + break; + } + } + + if (!found) { + printf("⚠ Original document not found for: %s\n", result_pk); + } + } + + // 7. Filter query test + printf("\n=== Filter Query Test ===\n"); + + // Create filtered query + ZVecVectorQuery filtered_query = query; + filtered_query.filter = + (ZVecString){.data = "string_field = 'string_field_0'", + .length = strlen("string_field = 'string_field_0'")}; + + ZVecDoc **filtered_results = NULL; + size_t filtered_count = 0; + + error = zvec_collection_query(collection, &filtered_query, &filtered_results, + &filtered_count); + if (handle_error(error, "filtered querying") == ZVEC_OK) { + printf("Filtered query returned %zu results\n", filtered_count); + + // Verify filter results + bool filter_correct = true; + for (size_t i = 0; i < filtered_count; i++) { + // Note: Field value access may require different API + // For now, we'll just check that we got results + const char *pk = zvec_doc_get_pk_pointer(filtered_results[i]); + if (strstr(pk, "doc_") == NULL) { + filter_correct = false; + break; + } + } + + if (filter_correct) { + printf("✓ Filter query results are correct\n"); + } else { + printf("✗ Filter query results are incorrect\n"); + } + + if (filtered_results) { + zvec_docs_free(filtered_results, filtered_count); + } + } + + // 8. Cleanup query results + if (query_results) { + zvec_docs_free(query_results, result_count); + } + + // 9. Cleanup documents + for (int i = 0; i < doc_count; i++) { + zvec_doc_destroy(test_docs[i]); + } + + // 10. Final cleanup +cleanup: + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + + printf("✓ Document example completed\n"); + + return 0; +} \ No newline at end of file diff --git a/examples/c_api/field_schema_example.c b/examples/c_api/field_schema_example.c new file mode 100644 index 00000000..c41d0817 --- /dev/null +++ b/examples/c_api/field_schema_example.c @@ -0,0 +1,283 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + free(error_msg); + } + return error; +} + +/** + * @brief Field schema creation and management example + */ +int main() { + printf("=== ZVec Field Schema Example ===\n\n"); + + ZVecErrorCode error; + + // 1. Create collection schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("field_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return -1; + } + printf("✓ Collection schema created successfully\n"); + + // 2. Create different types of index parameters + // clang-format off + ZVecIndexParams invert_params_val = ZVEC_INVERT_PARAMS(true, false); + // clang-format on + ZVecIndexParams *invert_params = &invert_params_val; + // clang-format off + ZVecIndexParams hnsw_params_val = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_COSINE, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + ZVecIndexParams *hnsw_params = &hnsw_params_val; + // clang-format off + ZVecIndexParams flat_params_val = ZVEC_FLAT_PARAMS( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + ZVecIndexParams *flat_params = &flat_params_val; + + if (!invert_params || !hnsw_params || !flat_params) { + fprintf(stderr, "Failed to create index parameters\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + + // 3. Create scalar fields with different data types + printf("Creating scalar fields...\n"); + + // String field with inverted index + ZVecFieldSchema *name_field = + zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); + if (name_field) { + zvec_field_schema_set_invert_index(name_field, invert_params); + error = zvec_collection_schema_add_field(schema, name_field); + if (handle_error(error, "adding name field") == ZVEC_OK) { + printf("✓ String field 'name' with inverted index added\n"); + } + } + + // Integer field + ZVecFieldSchema *age_field = + zvec_field_schema_create("age", ZVEC_DATA_TYPE_INT32, true, 0); + if (age_field) { + error = zvec_collection_schema_add_field(schema, age_field); + if (handle_error(error, "adding age field") == ZVEC_OK) { + printf("✓ Integer field 'age' added\n"); + } + } + + // Float field + ZVecFieldSchema *score_field = + zvec_field_schema_create("score", ZVEC_DATA_TYPE_FLOAT, true, 0); + if (score_field) { + error = zvec_collection_schema_add_field(schema, score_field); + if (handle_error(error, "adding score field") == ZVEC_OK) { + printf("✓ Float field 'score' added\n"); + } + } + + // Boolean field + ZVecFieldSchema *active_field = + zvec_field_schema_create("active", ZVEC_DATA_TYPE_BOOL, false, 0); + if (active_field) { + error = zvec_collection_schema_add_field(schema, active_field); + if (handle_error(error, "adding active field") == ZVEC_OK) { + printf("✓ Boolean field 'active' added\n"); + } + } + + // 4. Create vector fields with different dimensions and indexes + printf("Creating vector fields...\n"); + + // Small dimension vector with HNSW index + ZVecFieldSchema *small_vector_field = zvec_field_schema_create( + "small_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 32); + if (small_vector_field) { + zvec_field_schema_set_hnsw_index(small_vector_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, small_vector_field); + if (handle_error(error, "adding small vector field") == ZVEC_OK) { + printf( + "✓ Small vector field 'small_vector' (32D) with HNSW index added\n"); + } + } + + // Medium dimension vector with Flat index + ZVecFieldSchema *medium_vector_field = zvec_field_schema_create( + "medium_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + if (medium_vector_field) { + zvec_field_schema_set_flat_index(medium_vector_field, flat_params); + error = zvec_collection_schema_add_field(schema, medium_vector_field); + if (handle_error(error, "adding medium vector field") == ZVEC_OK) { + printf( + "✓ Medium vector field 'medium_vector' (128D) with Flat index " + "added\n"); + } + } + + // Large dimension vector with HNSW index + ZVecFieldSchema *large_vector_field = zvec_field_schema_create( + "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 512); + if (large_vector_field) { + zvec_field_schema_set_hnsw_index(large_vector_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, large_vector_field); + if (handle_error(error, "adding large vector field") == ZVEC_OK) { + printf( + "✓ Large vector field 'large_vector' (512D) with HNSW index added\n"); + } + } + + // 5. Create collection with the schema + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + ZVecCollection *collection = NULL; + + error = zvec_collection_create_and_open("./field_example_collection", schema, + &options, &collection); + if (handle_error(error, "creating collection") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + return -1; + } + printf("✓ Collection created successfully\n"); + + // 6. Create test documents with various field types + printf("Creating test documents...\n"); + + ZVecDoc *doc1 = zvec_doc_create(); + ZVecDoc *doc2 = zvec_doc_create(); + + if (!doc1 || !doc2) { + fprintf(stderr, "Failed to create documents\n"); + goto cleanup; + } + + // Document 1 + zvec_doc_set_pk(doc1, "user1"); + zvec_doc_add_field_by_value(doc1, "name", ZVEC_DATA_TYPE_STRING, + "Alice Johnson", strlen("Alice Johnson")); + int32_t age1 = 28; + zvec_doc_add_field_by_value(doc1, "age", ZVEC_DATA_TYPE_INT32, &age1, + sizeof(age1)); + float score1 = 87.5f; + zvec_doc_add_field_by_value(doc1, "score", ZVEC_DATA_TYPE_FLOAT, &score1, + sizeof(score1)); + bool active1 = true; + zvec_doc_add_field_by_value(doc1, "active", ZVEC_DATA_TYPE_BOOL, &active1, + sizeof(active1)); + + // Add vector data + float small_vec1[32]; + float medium_vec1[128]; + float large_vec1[512]; + + for (int i = 0; i < 32; i++) small_vec1[i] = (float)i / 32.0f; + for (int i = 0; i < 128; i++) medium_vec1[i] = (float)i / 128.0f; + for (int i = 0; i < 512; i++) large_vec1[i] = (float)i / 512.0f; + + zvec_doc_add_field_by_value(doc1, "small_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + small_vec1, 32 * sizeof(float)); + zvec_doc_add_field_by_value(doc1, "medium_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + medium_vec1, 128 * sizeof(float)); + zvec_doc_add_field_by_value(doc1, "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + large_vec1, 512 * sizeof(float)); + + // Document 2 + zvec_doc_set_pk(doc2, "user2"); + zvec_doc_add_field_by_value(doc2, "name", ZVEC_DATA_TYPE_STRING, "Bob Smith", + strlen("Bob Smith")); + int32_t age2 = 35; + zvec_doc_add_field_by_value(doc2, "age", ZVEC_DATA_TYPE_INT32, &age2, + sizeof(age2)); + float score2 = 92.0f; + zvec_doc_add_field_by_value(doc2, "score", ZVEC_DATA_TYPE_FLOAT, &score2, + sizeof(score2)); + bool active2 = false; + zvec_doc_add_field_by_value(doc2, "active", ZVEC_DATA_TYPE_BOOL, &active2, + sizeof(active2)); + + // Add vector data + float small_vec2[32]; + float medium_vec2[128]; + float large_vec2[512]; + + for (int i = 0; i < 32; i++) small_vec2[i] = (float)(32 - i) / 32.0f; + for (int i = 0; i < 128; i++) medium_vec2[i] = (float)(128 - i) / 128.0f; + for (int i = 0; i < 512; i++) large_vec2[i] = (float)(512 - i) / 512.0f; + + zvec_doc_add_field_by_value(doc2, "small_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + small_vec2, 32 * sizeof(float)); + zvec_doc_add_field_by_value(doc2, "medium_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + medium_vec2, 128 * sizeof(float)); + zvec_doc_add_field_by_value(doc2, "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + large_vec2, 512 * sizeof(float)); + + // 7. Insert documents + ZVecDoc *docs[] = {doc1, doc2}; + size_t success_count = 0, error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)docs, 2, + &success_count, &error_count); + if (handle_error(error, "inserting documents") == ZVEC_OK) { + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + } + + // 8. Flush and test queries + zvec_collection_flush(collection); + printf("✓ Collection flushed\n"); + + // Test vector query on medium vector field + ZVecVectorQuery query = {0}; + query.field_name = + (ZVecString){.data = "medium_vector", .length = strlen("medium_vector")}; + query.query_vector = (ZVecByteArray){.data = (uint8_t *)medium_vec1, + .length = 128 * sizeof(float)}; + query.topk = 2; + query.filter = (ZVecString){.data = "", .length = 0}; + query.include_vector = false; + query.include_doc_id = true; + query.output_fields.strings = NULL; + query.output_fields.count = 0; + + ZVecDoc **results = NULL; + size_t result_count = 0; + error = zvec_collection_query(collection, &query, &results, &result_count); + if (error == ZVEC_OK) { + printf("✓ Vector query successful - Found %zu results\n", result_count); + zvec_docs_free(results, result_count); + } + + // 9. Cleanup +cleanup: + if (doc1) zvec_doc_destroy(doc1); + if (doc2) zvec_doc_destroy(doc2); + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + + printf("✓ Field schema example completed\n"); + return 0; +} \ No newline at end of file diff --git a/examples/c_api/index_example.c b/examples/c_api/index_example.c new file mode 100644 index 00000000..72877263 --- /dev/null +++ b/examples/c_api/index_example.c @@ -0,0 +1,327 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + free(error_msg); + } + return error; +} + +/** + * @brief Index creation and management example + */ +int main() { + printf("=== ZVec Index Example ===\n\n"); + + ZVecErrorCode error; + + // 1. Create collection schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("index_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return -1; + } + printf("✓ Collection schema created successfully\n"); + + // 2. Create different index parameter configurations + printf("Creating index parameters...\n"); + + // Inverted index parameters + // clang-format off + ZVecIndexParams invert_params_standard_val = ZVEC_INVERT_PARAMS(true, false); + ZVecIndexParams invert_params_extended_val = ZVEC_INVERT_PARAMS(true, true); + // clang-format on + ZVecIndexParams *invert_params_standard = &invert_params_standard_val; + ZVecIndexParams *invert_params_extended = &invert_params_extended_val; + + // HNSW index parameters with different configurations + // clang-format off + ZVecIndexParams hnsw_params_fast_val = ZVEC_HNSW_PARAMS( + ZVEC_METRIC_TYPE_L2, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + ZVecIndexParams hnsw_params_balanced_val = ZVEC_HNSW_PARAMS( + ZVEC_METRIC_TYPE_COSINE, 32, 200, 100, ZVEC_QUANTIZE_TYPE_UNDEFINED); + ZVecIndexParams hnsw_params_accurate_val = ZVEC_HNSW_PARAMS( + ZVEC_METRIC_TYPE_IP, 64, 400, 200, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + ZVecIndexParams *hnsw_params_fast = &hnsw_params_fast_val; + ZVecIndexParams *hnsw_params_balanced = &hnsw_params_balanced_val; + ZVecIndexParams *hnsw_params_accurate = &hnsw_params_accurate_val; + + // Flat index parameters + // clang-format off + ZVecIndexParams flat_params_l2_val = ZVEC_FLAT_PARAMS( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + ZVecIndexParams flat_params_cosine_val = ZVEC_FLAT_PARAMS( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + ZVecIndexParams *flat_params_l2 = &flat_params_l2_val; + ZVecIndexParams *flat_params_cosine = &flat_params_cosine_val; + + if (!invert_params_standard || !invert_params_extended || !hnsw_params_fast || + !hnsw_params_balanced || !hnsw_params_accurate || !flat_params_l2 || + !flat_params_cosine) { + fprintf(stderr, "Failed to create index parameters\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + + // 3. Create fields with different index types + printf("Creating fields with various index types...\n"); + + // Fields with inverted indexes + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + if (id_field) { + zvec_field_schema_set_invert_index(id_field, invert_params_standard); + error = zvec_collection_schema_add_field(schema, id_field); + if (handle_error(error, "adding ID field") == ZVEC_OK) { + printf("✓ ID field with standard inverted index added\n"); + } + } + + ZVecFieldSchema *category_field = + zvec_field_schema_create("category", ZVEC_DATA_TYPE_STRING, true, 0); + if (category_field) { + zvec_field_schema_set_invert_index(category_field, invert_params_extended); + error = zvec_collection_schema_add_field(schema, category_field); + if (handle_error(error, "adding category field") == ZVEC_OK) { + printf("✓ Category field with extended inverted index added\n"); + } + } + + // Vector fields with HNSW indexes (different configurations) + ZVecFieldSchema *fast_search_field = zvec_field_schema_create( + "fast_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 64); + if (fast_search_field) { + zvec_field_schema_set_hnsw_index(fast_search_field, hnsw_params_fast); + error = zvec_collection_schema_add_field(schema, fast_search_field); + if (handle_error(error, "adding fast search field") == ZVEC_OK) { + printf("✓ Fast search vector field (64D) with HNSW index added\n"); + } + } + + ZVecFieldSchema *balanced_field = zvec_field_schema_create( + "balanced_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + if (balanced_field) { + zvec_field_schema_set_hnsw_index(balanced_field, hnsw_params_balanced); + error = zvec_collection_schema_add_field(schema, balanced_field); + if (handle_error(error, "adding balanced field") == ZVEC_OK) { + printf("✓ Balanced vector field (128D) with HNSW index added\n"); + } + } + + ZVecFieldSchema *accurate_field = zvec_field_schema_create( + "accurate_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 256); + if (accurate_field) { + zvec_field_schema_set_hnsw_index(accurate_field, hnsw_params_accurate); + error = zvec_collection_schema_add_field(schema, accurate_field); + if (handle_error(error, "adding accurate field") == ZVEC_OK) { + printf("✓ Accurate vector field (256D) with HNSW index added\n"); + } + } + + // Vector field with Flat index + ZVecFieldSchema *exact_field = zvec_field_schema_create( + "exact_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 32); + if (exact_field) { + zvec_field_schema_set_flat_index(exact_field, flat_params_l2); + error = zvec_collection_schema_add_field(schema, exact_field); + if (handle_error(error, "adding exact field") == ZVEC_OK) { + printf("✓ Exact search vector field (32D) with Flat index added\n"); + } + } + + // 4. Create collection + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + ZVecCollection *collection = NULL; + + error = zvec_collection_create_and_open("./index_example_collection", schema, + &options, &collection); + if (handle_error(error, "creating collection") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + // Cleanup index parameters + return -1; + } + printf("✓ Collection created successfully\n"); + + // 5. Create test data + printf("Creating test documents...\n"); + + ZVecDoc *docs[3]; + for (int i = 0; i < 3; i++) { + docs[i] = zvec_doc_create(); + if (!docs[i]) { + fprintf(stderr, "Failed to create document %d\n", i); + // Cleanup + for (int j = 0; j < i; j++) { + zvec_doc_destroy(docs[j]); + } + goto cleanup; + } + } + + // Prepare vector data + float fast_vec[3][64]; + float balanced_vec[3][128]; + float accurate_vec[3][256]; + float exact_vec[3][32]; + + // Generate different vector patterns for testing + for (int doc_idx = 0; doc_idx < 3; doc_idx++) { + for (int i = 0; i < 64; i++) { + fast_vec[doc_idx][i] = (float)(doc_idx * 64 + i) / (64.0f * 3.0f); + } + for (int i = 0; i < 128; i++) { + balanced_vec[doc_idx][i] = (float)(doc_idx * 128 + i) / (128.0f * 3.0f); + } + for (int i = 0; i < 256; i++) { + accurate_vec[doc_idx][i] = (float)(doc_idx * 256 + i) / (256.0f * 3.0f); + } + for (int i = 0; i < 32; i++) { + exact_vec[doc_idx][i] = (float)(doc_idx * 32 + i) / (32.0f * 3.0f); + } + } + + // Populate documents + for (int i = 0; i < 3; i++) { + char pk[16]; + snprintf(pk, sizeof(pk), "doc%d", i + 1); + zvec_doc_set_pk(docs[i], pk); + + char id_val[16]; + snprintf(id_val, sizeof(id_val), "ID_%d", i + 1); + zvec_doc_add_field_by_value(docs[i], "id", ZVEC_DATA_TYPE_STRING, id_val, + strlen(id_val)); + + char category_val[16]; + snprintf(category_val, sizeof(category_val), "cat_%d", (i % 2) + 1); + zvec_doc_add_field_by_value(docs[i], "category", ZVEC_DATA_TYPE_STRING, + category_val, strlen(category_val)); + + zvec_doc_add_field_by_value(docs[i], "fast_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, fast_vec[i], + 64 * sizeof(float)); + zvec_doc_add_field_by_value(docs[i], "balanced_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, balanced_vec[i], + 128 * sizeof(float)); + zvec_doc_add_field_by_value(docs[i], "accurate_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, accurate_vec[i], + 256 * sizeof(float)); + zvec_doc_add_field_by_value(docs[i], "exact_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, exact_vec[i], + 32 * sizeof(float)); + } + + // 6. Insert documents + size_t success_count = 0, error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)docs, 3, + &success_count, &error_count); + if (handle_error(error, "inserting documents") == ZVEC_OK) { + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + } + + // Cleanup documents + for (int i = 0; i < 3; i++) { + zvec_doc_destroy(docs[i]); + } + + // 7. Flush collection to build indexes + error = zvec_collection_flush(collection); + if (handle_error(error, "flushing collection") == ZVEC_OK) { + printf("✓ Collection flushed - indexes built\n"); + } + + // 8. Test different query types + printf("Testing various index queries...\n"); + + // Test HNSW query (balanced) + ZVecVectorQuery hnsw_query = {0}; + hnsw_query.field_name = (ZVecString){.data = "balanced_vector", + .length = strlen("balanced_vector")}; + hnsw_query.query_vector = (ZVecByteArray){.data = (uint8_t *)balanced_vec[0], + .length = 128 * sizeof(float)}; + hnsw_query.topk = 2; + hnsw_query.filter = (ZVecString){.data = "", .length = 0}; + hnsw_query.include_vector = false; + hnsw_query.include_doc_id = true; + hnsw_query.output_fields.strings = NULL; + hnsw_query.output_fields.count = 0; + + ZVecDoc **hnsw_results = NULL; + size_t hnsw_result_count = 0; + error = zvec_collection_query(collection, &hnsw_query, &hnsw_results, + &hnsw_result_count); + if (error == ZVEC_OK) { + printf("✓ HNSW query successful - Found %zu results\n", hnsw_result_count); + zvec_docs_free(hnsw_results, hnsw_result_count); + } + + // Test Flat query (exact) + ZVecVectorQuery flat_query = {0}; + flat_query.field_name = + (ZVecString){.data = "exact_vector", .length = strlen("exact_vector")}; + flat_query.query_vector = (ZVecByteArray){.data = (uint8_t *)exact_vec[0], + .length = 32 * sizeof(float)}; + flat_query.topk = 2; + flat_query.filter = (ZVecString){.data = "", .length = 0}; + flat_query.include_vector = false; + flat_query.include_doc_id = true; + flat_query.output_fields.strings = NULL; + flat_query.output_fields.count = 0; + + ZVecDoc **flat_results = NULL; + size_t flat_result_count = 0; + error = zvec_collection_query(collection, &flat_query, &flat_results, + &flat_result_count); + if (error == ZVEC_OK) { + printf("✓ Flat (exact) query successful - Found %zu results\n", + flat_result_count); + zvec_docs_free(flat_results, flat_result_count); + } + + // 9. Performance comparison information + printf("\nIndex Performance Characteristics:\n"); + printf("- Inverted Index: Fast text search, supports filtering\n"); + printf( + "- HNSW Index: Approximate nearest neighbor search, good balance of " + "speed/accuracy\n"); + printf("- Flat Index: Exact search, slower but 100%% accurate\n"); + printf( + "- Trade-off: Speed vs Accuracy - choose based on your requirements\n"); + + // 10. Cleanup +cleanup: + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + + // Cleanup index parameters + + printf("✓ Index example completed\n"); + return 0; +} \ No newline at end of file diff --git a/examples/c_api/optimized_example.c b/examples/c_api/optimized_example.c new file mode 100644 index 00000000..2f87c93d --- /dev/null +++ b/examples/c_api/optimized_example.c @@ -0,0 +1,300 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + free(error_msg); + } + return error; +} + +/** + * @brief Create test vector data + */ +static float *create_test_vector(size_t dimension) { + float *vector = malloc(dimension * sizeof(float)); + if (!vector) { + return NULL; + } + + for (size_t i = 0; i < dimension; i++) { + vector[i] = (float)rand() / RAND_MAX; + } + + return vector; +} + +/** + * @brief Optimized C API usage example with performance considerations + */ +int main() { + printf("=== ZVec Optimized C API Example ===\n\n"); + + // Get version information + const char *version = zvec_get_version(); + printf("ZVec Version: %s\n\n", version ? version : "Unknown"); + + ZVecErrorCode error; + + // 1. Create optimized collection schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("optimized_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return -1; + } + printf("✓ Collection schema created\n"); + + // 2. Create optimized index parameters + // clang-format off + ZVecIndexParams hnsw_params_val = ZVEC_HNSW_PARAMS( + ZVEC_METRIC_TYPE_L2, 32, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + ZVecIndexParams *hnsw_params = &hnsw_params_val; + + if (!hnsw_params) { + fprintf(stderr, "Failed to create HNSW parameters\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + + // 3. Create fields with optimized configuration + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + ZVecFieldSchema *text_field = + zvec_field_schema_create("text", ZVEC_DATA_TYPE_STRING, true, 0); + ZVecFieldSchema *embedding_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + + if (!id_field || !text_field || !embedding_field) { + fprintf(stderr, "Failed to create field schemas\n"); + goto cleanup_params; + } + + // Set indexes + zvec_field_schema_set_hnsw_index(embedding_field, hnsw_params); + + // Add fields to schema + error = zvec_collection_schema_add_field(schema, id_field); + if (handle_error(error, "adding ID field") != ZVEC_OK) goto cleanup_fields; + + error = zvec_collection_schema_add_field(schema, text_field); + if (handle_error(error, "adding text field") != ZVEC_OK) goto cleanup_fields; + + error = zvec_collection_schema_add_field(schema, embedding_field); + if (handle_error(error, "adding embedding field") != ZVEC_OK) + goto cleanup_fields; + + printf("✓ Fields configured with indexes\n"); + + // 4. Create collection with optimized options + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + options.enable_mmap = true; // Enable memory mapping for better performance + + ZVecCollection *collection = NULL; + error = zvec_collection_create_and_open("./optimized_example_collection", + schema, &options, &collection); + if (handle_error(error, "creating collection") != ZVEC_OK) { + goto cleanup_fields; + } + printf("✓ Collection created with optimized settings\n"); + + // 5. Bulk insert test data + const size_t DOC_COUNT = 1000; + const size_t BATCH_SIZE = 100; + + printf("Inserting %zu documents in batches of %zu...\n", DOC_COUNT, + BATCH_SIZE); + + clock_t start_time = clock(); + + for (size_t batch_start = 0; batch_start < DOC_COUNT; + batch_start += BATCH_SIZE) { + size_t current_batch_size = (batch_start + BATCH_SIZE > DOC_COUNT) + ? DOC_COUNT - batch_start + : BATCH_SIZE; + + ZVecDoc **batch_docs = malloc(current_batch_size * sizeof(ZVecDoc *)); + if (!batch_docs) { + fprintf(stderr, "Failed to allocate batch documents\n"); + break; + } + + // Create batch documents + for (size_t i = 0; i < current_batch_size; i++) { + batch_docs[i] = zvec_doc_create(); + if (!batch_docs[i]) { + fprintf(stderr, "Failed to create document\n"); + // Cleanup previous documents in batch + for (size_t j = 0; j < i; j++) { + zvec_doc_destroy(batch_docs[j]); + } + free(batch_docs); + goto cleanup_collection; + } + + size_t doc_id = batch_start + i; + char pk[32]; + snprintf(pk, sizeof(pk), "doc_%zu", doc_id); + zvec_doc_set_pk(batch_docs[i], pk); + + // Add ID field + char id_str[32]; + snprintf(id_str, sizeof(id_str), "ID_%zu", doc_id); + zvec_doc_add_field_by_value(batch_docs[i], "id", ZVEC_DATA_TYPE_STRING, + id_str, strlen(id_str)); + + // Add text field + char text_str[64]; + snprintf(text_str, sizeof(text_str), + "Document number %zu with sample text", doc_id); + zvec_doc_add_field_by_value(batch_docs[i], "text", ZVEC_DATA_TYPE_STRING, + text_str, strlen(text_str)); + + // Add vector field + float *vector = create_test_vector(128); + if (vector) { + zvec_doc_add_field_by_value(batch_docs[i], "embedding", + ZVEC_DATA_TYPE_VECTOR_FP32, vector, + 128 * sizeof(float)); + free(vector); + } + } + + // Insert batch + size_t success_count, error_count; + error = zvec_collection_insert(collection, (const ZVecDoc **)batch_docs, + current_batch_size, &success_count, + &error_count); + if (handle_error(error, "inserting batch") != ZVEC_OK) { + // Cleanup batch documents + for (size_t i = 0; i < current_batch_size; i++) { + zvec_doc_destroy(batch_docs[i]); + } + free(batch_docs); + goto cleanup_collection; + } + + printf(" Batch %zu-%zu: %zu successful, %zu failed\n", batch_start, + batch_start + current_batch_size - 1, success_count, error_count); + + // Cleanup batch documents + for (size_t i = 0; i < current_batch_size; i++) { + zvec_doc_destroy(batch_docs[i]); + } + free(batch_docs); + } + + clock_t insert_end_time = clock(); + double insert_time = + ((double)(insert_end_time - start_time)) / CLOCKS_PER_SEC; + printf("✓ Bulk insertion completed in %.3f seconds (%.0f docs/sec)\n", + insert_time, DOC_COUNT / insert_time); + + // 6. Flush and optimize collection + printf("Flushing and optimizing collection...\n"); + zvec_collection_flush(collection); + zvec_collection_optimize(collection); + printf("✓ Collection optimized\n"); + + // 7. Performance query test + printf("Testing query performance...\n"); + + float *query_vector = create_test_vector(128); + if (!query_vector) { + fprintf(stderr, "Failed to create query vector\n"); + goto cleanup_collection; + } + + ZVecVectorQuery query = {0}; + query.field_name = + (ZVecString){.data = "embedding", .length = strlen("embedding")}; + query.query_vector = (ZVecByteArray){.data = (uint8_t *)query_vector, + .length = 128 * sizeof(float)}; + query.topk = 10; + query.filter = (ZVecString){.data = "", .length = 0}; + query.include_vector = false; + query.include_doc_id = true; + query.output_fields.strings = NULL; + query.output_fields.count = 0; + + const int QUERY_COUNT = 100; + start_time = clock(); + + for (int q = 0; q < QUERY_COUNT; q++) { + ZVecDoc **results = NULL; + size_t result_count = 0; + + error = zvec_collection_query(collection, &query, &results, &result_count); + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + printf("Query %d failed: %s\n", q, + error_msg ? error_msg : "Unknown error"); + free(error_msg); + continue; + } + + if (results) { + zvec_docs_free(results, result_count); + } + } + + clock_t query_end_time = clock(); + double query_time = ((double)(query_end_time - start_time)) / CLOCKS_PER_SEC; + double avg_query_time = (query_time * 1000) / QUERY_COUNT; + + printf("✓ Performance test completed\n"); + printf(" Average query time: %.2f ms\n", avg_query_time); + printf(" Queries per second: %.0f\n", 1000.0 / avg_query_time); + + free(query_vector); + + // 8. Memory usage information + ZVecCollectionStats *stats = NULL; + error = zvec_collection_get_stats(collection, &stats); + if (error == ZVEC_OK && stats) { + printf("Collection Statistics:\n"); + printf(" Document count: %llu\n", (unsigned long long)stats->doc_count); + zvec_collection_stats_destroy(stats); + } + + // 9. Cleanup +cleanup_collection: + zvec_collection_destroy(collection); + +cleanup_fields: + // Field schemas are managed by the collection schema, no need to destroy + // individually + +cleanup_params: + zvec_collection_schema_destroy(schema); + + printf("✓ Optimized example completed\n"); + + return 0; +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 486b0b36..fe9c090f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,6 +109,14 @@ build-dir = "build" wheel.expand-macos-universal-tags = true wheel.packages = ["python/zvec"] +# Exclude unnecessary files from wheel +wheel.exclude = [ + "**/*.dylib", + "**/*.a", + "lib/cmake/**", + "lib/pkgconfig/**", +] + # Source distribution sdist.include = [ "README.md", diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 00383c99..0f3a85ee 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,6 +9,7 @@ cc_directory(ailego) cc_directory(turbo) cc_directory(core) cc_directory(db) +cc_directory(c_api) if(BUILD_PYTHON_BINDINGS) cc_directory(binding) endif() diff --git a/src/binding/python/CMakeLists.txt b/src/binding/python/CMakeLists.txt index 160b25ea..c78aa033 100644 --- a/src/binding/python/CMakeLists.txt +++ b/src/binding/python/CMakeLists.txt @@ -56,4 +56,4 @@ elseif (APPLE) ) endif () -target_include_directories(_zvec PRIVATE ${PYBIND11_INCLUDE_DIR} ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/binding/python/include) +target_include_directories(_zvec PRIVATE ${PYBIND11_INCLUDE_DIR} ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/binding/python/include) \ No newline at end of file diff --git a/src/c_api/API_REFERENCE_CN.md b/src/c_api/API_REFERENCE_CN.md new file mode 100644 index 00000000..115313c7 --- /dev/null +++ b/src/c_api/API_REFERENCE_CN.md @@ -0,0 +1,1843 @@ +# ZVec C API 参考文档 + +**版本**: 0.3.0 +**许可**: Apache License 2.0 + +--- + +## 目录 + +1. [概述](#概述) +2. [快速开始](#快速开始) +3. [版本管理](#版本管理) +4. [错误处理](#错误处理) +5. [初始化与关闭](#初始化与关闭) +6. [配置管理](#配置管理) +7. [数据结构](#数据结构) +8. [Schema 管理](#schema-管理) +9. [Collection 管理](#collection-管理) +10. [索引管理](#索引管理) +11. [文档操作](#文档操作) +12. [数据增删改](#数据增删改) +13. [数据查询](#数据查询) +14. [工具函数](#工具函数) +15. [完整示例](#完整示例) + +--- + +## 概述 + +ZVec C API 是 ZVec 向量数据库的 C 语言接口,提供了完整的向量存储、索引和检索功能。本接口采用 C ABI,可与 C、C++、Rust、Go 等语言互操作。 + +### 核心概念 + +| 概念 | 说明 | +|------|------| +| **Collection** | 数据集合,类似数据库中的表 | +| **Schema** | 集合的结构定义,包含字段信息 | +| **Document** | 单条数据记录 | +| **Index** | 字段索引,加速查询 | +| **Field** | 字段,支持标量和向量类型 | + +--- + +## 快速开始 + +### 最小可用示例 + +```c +#include "zvec/c_api.h" +#include + +int main() { + // 1. 初始化库 + zvec_initialize(NULL); + + // 2. 创建集合 Schema + ZVecCollectionSchema *schema = zvec_collection_schema_create("my_collection"); + ZVecFieldSchema *field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 3); + zvec_collection_schema_add_field(schema, field); + + // 3. 创建并打开集合 + ZVecCollection *collection = NULL; + ZVecErrorCode rc = zvec_collection_create_and_open( + "./my_data", schema, NULL, &collection); + + if (rc != ZVEC_OK) { + char *err_msg; + zvec_get_last_error(&err_msg); + printf("Error: %s\n", err_msg); + return 1; + } + + // 4. 创建索引 + ZVecHnswIndexParams *params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 200, 50); + zvec_collection_create_hnsw_index(collection, "embedding", params); + + // 5. 插入数据 + ZVecDoc *doc = zvec_doc_create(); + zvec_doc_set_pk(doc, "doc_001"); + float vec[] = {0.1f, 0.2f, 0.3f}; + zvec_doc_add_field_by_value(doc, "embedding", + ZVEC_DATA_TYPE_VECTOR_FP32, vec, sizeof(vec)); + + size_t success, errors; + zvec_collection_insert(collection, &doc, 1, &success, &errors); + zvec_doc_destroy(doc); + + // 6. 查询 + ZVecVectorQuery query = ZVEC_VECTOR_QUERY( + "embedding", ZVEC_FLOAT_ARRAY(vec, 3), 10, ""); + ZVecDoc **results; + size_t count; + zvec_collection_query(collection, &query, &results, &count); + + // 7. 清理 + zvec_docs_free(results, count); + zvec_index_params_hnsw_destroy(params); + zvec_collection_close(collection); + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + zvec_shutdown(); + + return 0; +} +``` + +--- + +## 版本管理 + +### 获取版本信息 + +```c +// 获取完整版本字符串 +const char *version = zvec_get_version(); +// 输出示例:"0.3.0-g3f8a2b1 (built 2025-05-13 10:30:45)" + +// 获取各部分版本号 +int major = zvec_get_version_major(); // 0 +int minor = zvec_get_version_minor(); // 3 +int patch = zvec_get_version_patch(); // 0 +``` + +### 版本兼容性检查 + +```c +// 检查当前库版本是否满足最低要求 +bool compatible = zvec_check_version(0, 2, 0); +if (!compatible) { + printf("Library version too old!\n"); +} +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_get_version()` | 无 | `const char*` | 获取完整版本字符串 | +| `zvec_get_version_major()` | 无 | `int` | 获取主版本号 | +| `zvec_get_version_minor()` | 无 | `int` | 获取次版本号 | +| `zvec_get_version_patch()` | 无 | `int` | 获取补丁版本号 | +| `zvec_check_version()` | `major, minor, patch` | `bool` | 检查版本兼容性 | + +--- + +## 错误处理 + +### 错误码枚举 + +```c +typedef enum { + ZVEC_OK = 0, // 成功 + ZVEC_ERROR_NOT_FOUND = 1, // 资源未找到 + ZVEC_ERROR_ALREADY_EXISTS = 2, // 资源已存在 + ZVEC_ERROR_INVALID_ARGUMENT = 3, // 无效参数 + ZVEC_ERROR_PERMISSION_DENIED = 4, // 权限拒绝 + ZVEC_ERROR_FAILED_PRECONDITION = 5, // 前置条件失败 + ZVEC_ERROR_RESOURCE_EXHAUSTED = 6, // 资源耗尽 + ZVEC_ERROR_UNAVAILABLE = 7, // 服务不可用 + ZVEC_ERROR_INTERNAL_ERROR = 8, // 内部错误 + ZVEC_ERROR_NOT_SUPPORTED = 9, // 不支持的操作 + ZVEC_ERROR_UNKNOWN = 10 // 未知错误 +} ZVecErrorCode; +``` + +### 获取错误信息 + +```c +// 获取详细错误信息 +ZVecErrorDetails details; +zvec_get_last_error_details(&details); +printf("Error %d: %s\n", details.code, details.message); +printf(" at %s:%d in %s()\n", details.file, details.line, details.function); + +// 获取错误消息字符串 +char *error_msg; +ZVecErrorCode code = zvec_get_last_error(&error_msg); +if (code != ZVEC_OK) { + printf("Error: %s\n", error_msg); + free(error_msg); // 需要调用者释放 +} + +// 清除错误状态 +zvec_clear_error(); + +// 错误码转字符串 +const char *err_str = zvec_error_code_to_string(ZVEC_ERROR_INVALID_ARGUMENT); +// 返回:"Invalid argument" +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_get_last_error(&msg)` | `char **msg` | `ZVecErrorCode` | 获取最后错误消息 | +| `zvec_get_last_error_details(&details)` | `ZVecErrorDetails*` | `ZVecErrorCode` | 获取详细错误信息 | +| `zvec_clear_error()` | 无 | void | 清除错误状态 | +| `zvec_error_code_to_string(code)` | `ZVecErrorCode` | `const char*` | 错误码转字符串 | + +--- + +## 初始化与关闭 + +### 初始化库 + +```c +// 使用默认配置初始化 +ZVecErrorCode rc = zvec_initialize(NULL); + +// 使用自定义配置初始化 +ZVecConfigData *config = zvec_config_data_create(); +zvec_config_data_set_memory_limit(config, 2UL * 1024 * 1024 * 1024); // 2GB +zvec_config_data_set_query_thread_count(config, 4); +rc = zvec_initialize(config); +zvec_config_data_destroy(config); + +if (rc != ZVEC_OK) { + // 处理初始化失败 +} +``` + +### 关闭库 + +```c +// 关闭前确保所有 Collection 已关闭 +zvec_collection_close(collection); +zvec_collection_destroy(collection); + +// 关闭库,释放所有资源 +ZVecErrorCode rc = zvec_shutdown(); +``` + +### 检查初始化状态 + +```c +bool initialized; +zvec_is_initialized(&initialized); +if (!initialized) { + zvec_initialize(NULL); +} +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_initialize(config)` | `const ZVecConfigData*` | `ZVecErrorCode` | 初始化库 | +| `zvec_shutdown()` | 无 | `ZVecErrorCode` | 关闭库 | +| `zvec_is_initialized(&initialized)` | `bool*` | `ZVecErrorCode` | 检查是否已初始化 | + +--- + +## 配置管理 + +### 配置数据结构 + +```c +typedef struct { + uint64_t memory_limit_bytes; // 内存限制(字节) + + // 日志配置 + ZVecLogType log_type; + void *log_config; // ZVecConsoleLogConfig 或 ZVecFileLogConfig + + // 查询配置 + uint32_t query_thread_count; // 查询线程数 + float invert_to_forward_scan_ratio; // 倒排转正扫比例 + float brute_force_by_keys_ratio; // 暴力检索比例 + + // 优化配置 + uint32_t optimize_thread_count; // 优化线程数 +} ZVecConfigData; +``` + +### 日志配置 + +```c +// 控制台日志配置 +typedef struct { + ZVecLogLevel level; // 日志级别 +} ZVecConsoleLogConfig; + +// 文件日志配置 +typedef struct { + ZVecLogLevel level; // 日志级别 + ZVecString dir; // 日志目录 + ZVecString basename; // 日志文件基础名 + uint32_t file_size; // 文件大小 (MB) + uint32_t overdue_days; // 过期天数 +} ZVecFileLogConfig; +``` + +### 日志级别 + +```c +typedef enum { + ZVEC_LOG_LEVEL_DEBUG = 0, + ZVEC_LOG_LEVEL_INFO = 1, + ZVEC_LOG_LEVEL_WARN = 2, + ZVEC_LOG_LEVEL_ERROR = 3, + ZVEC_LOG_LEVEL_FATAL = 4 +} ZVecLogLevel; +``` + +### 配置创建与销毁 + +```c +// 创建配置 +ZVecConfigData *config = zvec_config_data_create(); + +// 创建控制台日志配置 +ZVecConsoleLogConfig *console_log = zvec_config_console_log_create( + ZVEC_LOG_LEVEL_INFO); + +// 创建文件日志配置 +ZVecFileLogConfig *file_log = zvec_config_file_log_create( + ZVEC_LOG_LEVEL_DEBUG, + "/var/log/zvec", // 日志目录 + "zvec", // 基础文件名 + 100, // 文件大小 100MB + 30 // 保留 30 天 +); + +// 设置配置 +zvec_config_data_set_memory_limit(config, 1024 * 1024 * 1024); +zvec_config_data_set_log_config(config, ZVEC_LOG_TYPE_CONSOLE, console_log); +zvec_config_data_set_query_thread_count(config, 8); +zvec_config_data_set_optimize_thread_count(config, 4); + +// 销毁配置 +zvec_config_console_log_destroy(console_log); +zvec_config_file_log_destroy(file_log); +zvec_config_data_destroy(config); +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_config_data_create()` | 无 | `ZVecConfigData*` | 创建配置数据 | +| `zvec_config_data_destroy(config)` | `ZVecConfigData*` | void | 销毁配置数据 | +| `zvec_config_data_set_memory_limit(config, bytes)` | config, 字节数 | `ZVecErrorCode` | 设置内存限制 | +| `zvec_config_data_set_log_config(config, type, cfg)` | config, 类型,配置 | `ZVecErrorCode` | 设置日志配置 | +| `zvec_config_data_set_query_thread_count(config, count)` | config, 线程数 | `ZVecErrorCode` | 设置查询线程数 | +| `zvec_config_data_set_optimize_thread_count(config, count)` | config, 线程数 | `ZVecErrorCode` | 设置优化线程数 | +| `zvec_config_console_log_create(level)` | 日志级别 | `ZVecConsoleLogConfig*` | 创建控制台日志配置 | +| `zvec_config_console_log_destroy(cfg)` | 配置指针 | void | 销毁控制台日志配置 | +| `zvec_config_file_log_create(...)` | 级别,目录,文件名,大小,天数 | `ZVecFileLogConfig*` | 创建文件日志配置 | +| `zvec_config_file_log_destroy(cfg)` | 配置指针 | void | 销毁文件日志配置 | + +--- + +## 数据结构 + +### 字符串类型 + +```c +// 字符串视图(不拥有内存) +typedef struct { + const char *data; + size_t length; +} ZVecStringView; + +// 可变字符串(拥有内存) +typedef struct { + char *data; + size_t length; + size_t capacity; +} ZVecString; + +// 字符串数组 +typedef struct { + ZVecString *strings; + size_t count; +} ZVecStringArray; +``` + +### 数组类型 + +```c +// Float 数组 +typedef struct { + const float *data; + size_t length; +} ZVecFloatArray; + +// Int64 数组 +typedef struct { + const int64_t *data; + size_t length; +} ZVecInt64Array; + +// 字节数组 +typedef struct { + const uint8_t *data; + size_t length; +} ZVecByteArray; + +// 可变字节数组 +typedef struct { + uint8_t *data; + size_t length; + size_t capacity; +} ZVecMutableByteArray; +``` + +### 字符串操作 + +```c +// 从 C 字符串创建 +ZVecString *str = zvec_string_create("Hello, World!"); + +// 从字符串视图创建 +ZVecStringView view = {"Hello", 5}; +ZVecString *str2 = zvec_string_create_from_view(&view); + +// 创建二进制安全字符串(可包含 null 字节) +uint8_t data[] = {0x00, 0x01, 0x02, 0x03}; +ZVecString *bin_str = zvec_bin_create(data, sizeof(data)); + +// 复制字符串 +ZVecString *copy = zvec_string_copy(str); + +// 获取 C 字符串 +const char *c_str = zvec_string_c_str(str); + +// 获取长度 +size_t len = zvec_string_length(str); + +// 比较字符串 +int cmp = zvec_string_compare(str1, str2); // 返回 -1, 0, 1 + +// 释放字符串 +zvec_free_string(str); +``` + +### 数组操作 + +```c +// 创建字符串数组 +ZVecStringArray *arr = zvec_string_array_create(10); + +// 添加字符串 +zvec_string_array_add(arr, 0, "first"); +zvec_string_array_add(arr, 1, "second"); + +// 销毁字符串数组 +zvec_string_array_destroy(arr); + +// 创建字节数组 +ZVecMutableByteArray *byte_arr = zvec_byte_array_create(1024); +zvec_byte_array_destroy(byte_arr); + +// 创建 float 数组 +ZVecFloatArray *float_arr = zvec_float_array_create(100); +zvec_float_array_destroy(float_arr); + +// 创建 int64 数组 +ZVecInt64Array *int_arr = zvec_int64_array_create(50); +zvec_int64_array_destroy(int_arr); + +// 释放 uint8 数组 +zvec_free_uint8_array(uint8_t *array); +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_string_create(str)` | `const char*` | `ZVecString*` | 从 C 字符串创建 | +| `zvec_string_create_from_view(view)` | `ZVecStringView*` | `ZVecString*` | 从视图创建字符串 | +| `zvec_bin_create(data, length)` | `uint8_t*`, size_t | `ZVecString*` | 创建二进制字符串 | +| `zvec_string_copy(str)` | `ZVecString*` | `ZVecString*` | 复制字符串 | +| `zvec_string_c_str(str)` | `ZVecString*` | `const char*` | 获取 C 字符串 | +| `zvec_string_length(str)` | `ZVecString*` | size_t | 获取长度 | +| `zvec_string_compare(s1, s2)` | 两个字符串 | int | 比较字符串 | +| `zvec_free_string(str)` | `ZVecString*` | void | 释放字符串 | +| `zvec_string_array_create(count)` | size_t | `ZVecStringArray*` | 创建字符串数组 | +| `zvec_string_array_add(arr, idx, str)` | arr, 索引,字符串 | void | 添加字符串 | +| `zvec_string_array_destroy(arr)` | `ZVecStringArray*` | void | 销毁字符串数组 | +| `zvec_byte_array_create(capacity)` | size_t | `ZVecMutableByteArray*` | 创建字节数组 | +| `zvec_byte_array_destroy(arr)` | `ZVecMutableByteArray*` | void | 销毁字节数组 | +| `zvec_float_array_create(count)` | size_t | `ZVecFloatArray*` | 创建 float 数组 | +| `zvec_float_array_destroy(arr)` | `ZVecFloatArray*` | void | 销毁 float 数组 | +| `zvec_int64_array_create(count)` | size_t | `ZVecInt64Array*` | 创建 int64 数组 | +| `zvec_int64_array_destroy(arr)` | `ZVecInt64Array*` | void | 销毁 int64 数组 | +| `zvec_free_uint8_array(arr)` | `uint8_t*` | void | 释放 uint8 数组 | + +--- + +## Schema 管理 + +### 数据类型 + +```c +typedef enum { + // 标量类型 + ZVEC_DATA_TYPE_UNDEFINED = 0, + ZVEC_DATA_TYPE_BINARY = 1, + ZVEC_DATA_TYPE_STRING = 2, + ZVEC_DATA_TYPE_BOOL = 3, + ZVEC_DATA_TYPE_INT32 = 4, + ZVEC_DATA_TYPE_INT64 = 5, + ZVEC_DATA_TYPE_UINT32 = 6, + ZVEC_DATA_TYPE_UINT64 = 7, + ZVEC_DATA_TYPE_FLOAT = 8, + ZVEC_DATA_TYPE_DOUBLE = 9, + + // 向量类型 + ZVEC_DATA_TYPE_VECTOR_BINARY32 = 20, + ZVEC_DATA_TYPE_VECTOR_BINARY64 = 21, + ZVEC_DATA_TYPE_VECTOR_FP16 = 22, + ZVEC_DATA_TYPE_VECTOR_FP32 = 23, + ZVEC_DATA_TYPE_VECTOR_FP64 = 24, + ZVEC_DATA_TYPE_VECTOR_INT4 = 25, + ZVEC_DATA_TYPE_VECTOR_INT8 = 26, + ZVEC_DATA_TYPE_VECTOR_INT16 = 27, + + // 稀疏向量类型 + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16 = 30, + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 = 31, + + // 数组类型 + ZVEC_DATA_TYPE_ARRAY_BINARY = 40, + ZVEC_DATA_TYPE_ARRAY_STRING = 41, + ZVEC_DATA_TYPE_ARRAY_BOOL = 42, + ZVEC_DATA_TYPE_ARRAY_INT32 = 43, + ZVEC_DATA_TYPE_ARRAY_INT64 = 44, + ZVEC_DATA_TYPE_ARRAY_UINT32 = 45, + ZVEC_DATA_TYPE_ARRAY_UINT64 = 46, + ZVEC_DATA_TYPE_ARRAY_FLOAT = 47, + ZVEC_DATA_TYPE_ARRAY_DOUBLE = 48 +} ZVecDataType; +``` + +### 字段 Schema + +```c +typedef struct { + ZVecString *name; // 字段名 + ZVecDataType data_type; // 数据类型 + bool nullable; // 是否可空 + uint32_t dimension; // 向量维度(仅向量类型使用) + ZVecIndexParams *index_params; // 索引参数 +} ZVecFieldSchema; +``` + +### 创建字段 Schema + +```c +// 创建标量字段 +ZVecFieldSchema *id_field = zvec_field_schema_create( + "id", ZVEC_DATA_TYPE_STRING, false, 0); + +// 创建向量字段(768 维) +ZVecFieldSchema *embedding_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 768); + +// 创建带索引的字段 +ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 200, 50); +zvec_field_schema_set_hnsw_index(embedding_field, hnsw_params); + +// 或者使用专用函数 +zvec_field_schema_set_invert_index(field, invert_params); +zvec_field_schema_set_hnsw_index(field, hnsw_params); +zvec_field_schema_set_flat_index(field, flat_params); +zvec_field_schema_set_ivf_index(field, ivf_params); + +// 设置索引参数 +zvec_field_schema_set_index_params(field, index_params); + +// 销毁字段 Schema +zvec_field_schema_destroy(field); +zvec_free_field_schema(field); +``` + +### Collection Schema + +```c +typedef struct { + ZVecString *name; // 集合名 + ZVecFieldSchema **fields; // 字段数组 + size_t field_count; // 字段数量 + size_t field_capacity; // 字段容量 + uint64_t max_doc_count_per_segment; // 每段最大文档数 +} ZVecCollectionSchema; +``` + +### 创建 Collection Schema + +```c +// 创建 Schema +ZVecCollectionSchema *schema = zvec_collection_schema_create("my_collection"); + +// 添加单个字段 +ZVecFieldSchema *field = zvec_field_schema_create( + "title", ZVEC_DATA_TYPE_STRING, false, 0); +zvec_collection_schema_add_field(schema, field); + +// 批量添加字段 +ZVecFieldSchema fields[3] = { + *zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0), + *zvec_field_schema_create("embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 768), + *zvec_field_schema_create("timestamp", ZVEC_DATA_TYPE_INT64, true, 0) +}; +zvec_collection_schema_add_fields(schema, fields, 3); + +// 获取字段数量 +size_t count = zvec_collection_schema_get_field_count(schema); + +// 按索引获取字段 +ZVecFieldSchema *f = zvec_collection_schema_get_field(schema, 0); + +// 按名称查找字段 +ZVecFieldSchema *f = zvec_collection_schema_find_field(schema, "embedding"); + +// 删除字段 +zvec_collection_schema_remove_field(schema, "title"); + +// 批量删除字段 +const char *field_names[] = {"field1", "field2"}; +zvec_collection_schema_remove_fields(schema, field_names, 2); + +// 设置每段最大文档数 +zvec_collection_schema_set_max_doc_count_per_segment(schema, 500000); + +// 获取每段最大文档数 +uint64_t max_docs = zvec_collection_schema_get_max_doc_count_per_segment(schema); + +// 验证 Schema +ZVecString *error_msg; +ZVecErrorCode rc = zvec_collection_schema_validate(schema, &error_msg); +if (rc != ZVEC_OK) { + printf("Invalid schema: %s\n", error_msg->data); + zvec_free_string(error_msg); +} + +// 销毁 Schema +zvec_collection_schema_destroy(schema); +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_field_schema_create(name, type, nullable, dim)` | 名,类型,是否可空,维度 | `ZVecFieldSchema*` | 创建字段 Schema | +| `zvec_field_schema_destroy(schema)` | `ZVecFieldSchema*` | void | 销毁字段 Schema | +| `zvec_field_schema_set_index_params(schema, params)` | schema, 索引参数 | `ZVecErrorCode` | 设置索引参数 | +| `zvec_field_schema_set_invert_index(schema, params)` | schema, 倒排参数 | void | 设置倒排索引 | +| `zvec_field_schema_set_hnsw_index(schema, params)` | schema, HNSW 参数 | void | 设置 HNSW 索引 | +| `zvec_field_schema_set_flat_index(schema, params)` | schema, Flat 参数 | void | 设置 Flat 索引 | +| `zvec_field_schema_set_ivf_index(schema, params)` | schema, IVF 参数 | void | 设置 IVF 索引 | +| `zvec_free_field_schema(schema)` | `ZVecFieldSchema*` | void | 释放字段 Schema | +| `zvec_collection_schema_create(name)` | 集合名 | `ZVecCollectionSchema*` | 创建集合 Schema | +| `zvec_collection_schema_destroy(schema)` | `ZVecCollectionSchema*` | void | 销毁集合 Schema | +| `zvec_collection_schema_add_field(schema, field)` | schema, 字段 | `ZVecErrorCode` | 添加字段 | +| `zvec_collection_schema_add_fields(schema, fields, count)` | schema, 字段数组,数量 | `ZVecErrorCode` | 批量添加字段 | +| `zvec_collection_schema_remove_field(schema, name)` | schema, 字段名 | `ZVecErrorCode` | 删除字段 | +| `zvec_collection_schema_remove_fields(schema, names, count)` | schema, 字段名数组,数量 | `ZVecErrorCode` | 批量删除字段 | +| `zvec_collection_schema_get_field_count(schema)` | `ZVecCollectionSchema*` | size_t | 获取字段数量 | +| `zvec_collection_schema_get_field(schema, index)` | schema, 索引 | `ZVecFieldSchema*` | 按索引获取字段 | +| `zvec_collection_schema_find_field(schema, name)` | schema, 字段名 | `ZVecFieldSchema*` | 按名查找字段 | +| `zvec_collection_schema_validate(schema, &error)` | schema, 错误输出 | `ZVecErrorCode` | 验证 Schema | +| `zvec_collection_schema_set_max_doc_count_per_segment(schema, count)` | schema, 数量 | `ZVecErrorCode` | 设置段最大文档数 | +| `zvec_collection_schema_get_max_doc_count_per_segment(schema)` | `ZVecCollectionSchema*` | uint64_t | 获取段最大文档数 | + +--- + +## Collection 管理 + +### Collection 选项 + +```c +typedef struct { + bool enable_mmap; // 是否启用内存映射 + size_t max_buffer_size; // 最大缓冲区大小 + bool read_only; // 是否只读模式 + uint64_t max_doc_count_per_segment; // 每段最大文档数 +} ZVecCollectionOptions; +``` + +### 创建和打开 Collection + +```c +// 初始化默认选项 +ZVecCollectionOptions options; +zvec_collection_options_init_default(&options); + +// 或使用宏 +ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + +// 自定义选项 +options.enable_mmap = true; +options.max_buffer_size = 2 * 1024 * 1024; // 2MB +options.read_only = false; +options.max_doc_count_per_segment = 500000; + +// 创建并打开 +ZVecCollection *collection; +ZVecErrorCode rc = zvec_collection_create_and_open( + "/path/to/data", schema, &options, &collection); + +// 打开已有集合 +rc = zvec_collection_open("/path/to/data", &options, &collection); +``` + +### Collection 操作 + +```c +// 关闭集合 +rc = zvec_collection_close(collection); + +// 销毁集合 +rc = zvec_collection_destroy(collection); + +// 刷盘数据 +rc = zvec_collection_flush(collection); + +// 获取 Schema +ZVecCollectionSchema *schema; +rc = zvec_collection_get_schema(collection, &schema); +// 使用后销毁 +zvec_collection_schema_destroy(schema); + +// 获取选项 +ZVecCollectionOptions *options; +rc = zvec_collection_get_options(collection, &options); +// 使用后销毁 +free(options); + +// 获取统计信息 +typedef struct { + uint64_t doc_count; // 文档总数 + ZVecString **index_names; // 索引名数组 + float *index_completeness; // 索引完成度数组 + size_t index_count; // 索引数量 +} ZVecCollectionStats; + +ZVecCollectionStats *stats; +rc = zvec_collection_get_stats(collection, &stats); +printf("Documents: %lu\n", stats->doc_count); +printf("Indexes: %zu\n", stats->index_count); +zvec_collection_stats_destroy(stats); +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_collection_options_init_default(&opts)` | `ZVecCollectionOptions*` | void | 初始化默认选项 | +| `zvec_collection_create_and_open(path, schema, opts, &coll)` | 路径,Schema, 选项,输出 | `ZVecErrorCode` | 创建并打开集合 | +| `zvec_collection_open(path, opts, &coll)` | 路径,选项,输出 | `ZVecErrorCode` | 打开已有集合 | +| `zvec_collection_close(coll)` | `ZVecCollection*` | `ZVecErrorCode` | 关闭集合 | +| `zvec_collection_destroy(coll)` | `ZVecCollection*` | `ZVecErrorCode` | 销毁集合 | +| `zvec_collection_flush(coll)` | `ZVecCollection*` | `ZVecErrorCode` | 刷盘数据 | +| `zvec_collection_get_schema(coll, &schema)` | 集合,输出 | `ZVecErrorCode` | 获取 Schema | +| `zvec_collection_get_options(coll, &opts)` | 集合,输出 | `ZVecErrorCode` | 获取选项 | +| `zvec_collection_get_stats(coll, &stats)` | 集合,输出 | `ZVecErrorCode` | 获取统计信息 | +| `zvec_collection_stats_destroy(stats)` | `ZVecCollectionStats*` | void | 销毁统计信息 | + +--- + +## 索引管理 + +### 索引类型 + +```c +typedef enum { + ZVEC_INDEX_TYPE_UNDEFINED = 0, + ZVEC_INDEX_TYPE_HNSW = 1, // HNSW 图索引 + ZVEC_INDEX_TYPE_IVF = 3, // 倒排文件索引 + ZVEC_INDEX_TYPE_FLAT = 4, // 暴力检索 + ZVEC_INDEX_TYPE_INVERT = 10 // 标量倒排索引 +} ZVecIndexType; +``` + +### 距离度量类型 + +```c +typedef enum { + ZVEC_METRIC_TYPE_UNDEFINED = 0, + ZVEC_METRIC_TYPE_L2 = 1, // L2 距离 + ZVEC_METRIC_TYPE_IP = 2, // 内积 + ZVEC_METRIC_TYPE_COSINE = 3, // 余弦相似度 + ZVEC_METRIC_TYPE_MIPSL2 = 4 // L2 内积 +} ZVecMetricType; +``` + +### 量化类型 + +```c +typedef enum { + ZVEC_QUANTIZE_TYPE_UNDEFINED = 0, + ZVEC_QUANTIZE_TYPE_FP16 = 1, // FP16 量化 + ZVEC_QUANTIZE_TYPE_INT8 = 2, // INT8 量化 + ZVEC_QUANTIZE_TYPE_INT4 = 3 // INT4 量化 +} ZVecQuantizeType; +``` + +### HNSW 索引参数 + +```c +typedef struct { + ZVecVectorIndexParams base; // 基类参数 + int m; // 图连接度参数 + int ef_construction; // 构建时探索因子 + int ef_search; // 搜索时探索因子 +} ZVecHnswIndexParams; + +// 创建 HNSW 参数 +ZVecHnswIndexParams *params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_COSINE, // 距离类型 + ZVEC_QUANTIZE_TYPE_UNDEFINED, // 量化类型 + 16, // m: 图连接度 + 200, // ef_construction: 构建探索因子 + 50 // ef_search: 搜索探索因子 +); + +// 或使用初始化函数 +ZVecHnswIndexParams params; +zvec_index_params_hnsw_init(¶ms, + ZVEC_METRIC_TYPE_COSINE, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + +// 或使用宏 +ZVecHnswIndexParams params = ZVEC_HNSW_PARAMS( + ZVEC_METRIC_TYPE_COSINE, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + +zvec_index_params_hnsw_destroy(params); +``` + +### IVF 索引参数 + +```c +typedef struct { + ZVecVectorIndexParams base; // 基类参数 + int n_list; // 聚类中心数量 + int n_iters; // 迭代次数 + bool use_soar; // 是否使用 SOAR 算法 + int n_probe; // 搜索时探测的聚类数 +} ZVecIVFIndexParams; + +// 创建 IVF 参数 +ZVecIVFIndexParams *params = zvec_index_params_ivf_create( + ZVEC_METRIC_TYPE_L2, // 距离类型 + ZVEC_QUANTIZE_TYPE_INT8, // 量化类型 + 1024, // n_list: 聚类中心数 + 25, // n_iters: 迭代次数 + true, // use_soar: 使用 SOAR + 20 // n_probe: 探测聚类数 +); + +// 或使用宏 +ZVecIVFIndexParams params = ZVEC_IVF_PARAMS( + ZVEC_METRIC_TYPE_L2, 1024, 25, true, 20, ZVEC_QUANTIZE_TYPE_INT8); + +zvec_index_params_ivf_destroy(params); +``` + +### Flat 索引参数 + +```c +typedef struct { + ZVecVectorIndexParams base; // 基类参数 +} ZVecFlatIndexParams; + +// 创建 Flat 参数 +ZVecFlatIndexParams *params = zvec_index_params_flat_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED); + +// 或使用宏 +ZVecFlatIndexParams params = ZVEC_FLAT_PARAMS( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED); + +zvec_index_params_flat_destroy(params); +``` + +### 标量倒排索引参数 + +```c +typedef struct { + ZVecBaseIndexParams base; // 基类参数 + bool enable_range_optimization; // 是否启用范围优化 + bool enable_extended_wildcard; // 是否启用通配符 +} ZVecInvertIndexParams; + +// 创建倒排索引参数 +ZVecInvertIndexParams *params = zvec_index_params_invert_create( + true, // enable_range_optimization + false // enable_extended_wildcard +); + +// 或使用宏 +ZVecInvertIndexParams params = ZVEC_INVERT_PARAMS(true, false); + +// 或使用初始化函数 +ZVecInvertIndexParams params; +zvec_index_params_invert_init(¶ms, true, false); + +zvec_index_params_invert_destroy(params); +``` + +### 创建索引 + +```c +// 通用创建索引函数 +zvec_collection_create_index(collection, "embedding", index_params); + +// 类型安全的创建索引函数 +zvec_collection_create_hnsw_index(collection, "embedding", hnsw_params); +zvec_collection_create_ivf_index(collection, "embedding", ivf_params); +zvec_collection_create_flat_index(collection, "embedding", flat_params); +zvec_collection_create_invert_index(collection, "title", invert_params); + +// 删除索引 +zvec_collection_drop_index(collection, "embedding"); + +// 优化集合(重建索引、合并段) +zvec_collection_optimize(collection); +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_index_params_base_init(params, type)` | 参数,类型 | void | 初始化基础参数 | +| `zvec_index_params_invert_init(params, range_opt, wildcard)` | 参数,范围优化,通配符 | void | 初始化倒排参数 | +| `zvec_index_params_vector_init(params, idx, metric, quant)` | 参数,索引类型,度量,量化 | void | 初始化向量索引参数 | +| `zvec_index_params_hnsw_init(params, metric, m, ef_c, ef_s, quant)` | 参数,度量,m, ef_construction, ef_search, 量化 | void | 初始化 HNSW 参数 | +| `zvec_index_params_ivf_init(params, metric, nlist, niters, soar, nprobe, quant)` | 参数,度量,nlist, niters, soar, nprobe, 量化 | void | 初始化 IVF 参数 | +| `zvec_index_params_flat_init(params, metric, quant)` | 参数,度量,量化 | void | 初始化 Flat 参数 | +| `zvec_index_params_invert_create(range_opt, wildcard)` | 范围优化,通配符 | `ZVecInvertIndexParams*` | 创建倒排参数 | +| `zvec_index_params_vector_create(type, metric, quant)` | 类型,度量,量化 | `ZVecVectorIndexParams*` | 创建向量索引参数 | +| `zvec_index_params_hnsw_create(metric, quant, m, ef_c, ef_s)` | 度量,量化,m, ef_construction, ef_search | `ZVecHnswIndexParams*` | 创建 HNSW 参数 | +| `zvec_index_params_ivf_create(metric, quant, nlist, niters, soar, nprobe)` | 度量,量化,nlist, niters, soar, nprobe | `ZVecIVFIndexParams*` | 创建 IVF 参数 | +| `zvec_index_params_flat_create(metric, quant)` | 度量,量化 | `ZVecFlatIndexParams*` | 创建 Flat 参数 | +| `zvec_index_params_invert_destroy(params)` | 参数 | void | 销毁倒排参数 | +| `zvec_index_params_vector_destroy(params)` | 参数 | void | 销毁向量索引参数 | +| `zvec_index_params_hnsw_destroy(params)` | 参数 | void | 销毁 HNSW 参数 | +| `zvec_index_params_ivf_destroy(params)` | 参数 | void | 销毁 IVF 参数 | +| `zvec_index_params_flat_destroy(params)` | 参数 | void | 销毁 Flat 参数 | +| `zvec_collection_create_index(coll, field, params)` | 集合,字段,参数 | `ZVecErrorCode` | 创建索引 | +| `zvec_collection_create_hnsw_index(...)` | 集合,字段,HNSW 参数 | `ZVecErrorCode` | 创建 HNSW 索引 | +| `zvec_collection_create_ivf_index(...)` | 集合,字段,IVF 参数 | `ZVecErrorCode` | 创建 IVF 索引 | +| `zvec_collection_create_flat_index(...)` | 集合,字段,Flat 参数 | `ZVecErrorCode` | 创建 Flat 索引 | +| `zvec_collection_create_invert_index(...)` | 集合,字段,倒排参数 | `ZVecErrorCode` | 创建倒排索引 | +| `zvec_collection_drop_index(coll, field)` | 集合,字段名 | `ZVecErrorCode` | 删除索引 | +| `zvec_collection_optimize(coll)` | 集合 | `ZVecErrorCode` | 优化集合 | + +--- + +## 文档操作 + +### 文档结构 + +```c +typedef struct ZVecDoc ZVecDoc; // 不透明指针 + +// 字段值联合 +typedef union { + bool bool_value; + int32_t int32_value; + int64_t int64_value; + uint32_t uint32_value; + uint64_t uint64_value; + float float_value; + double double_value; + ZVecString string_value; + ZVecFloatArray vector_value; + ZVecByteArray binary_value; +} ZVecFieldValue; + +// 文档字段 +typedef struct { + ZVecString name; + ZVecDataType data_type; + ZVecFieldValue value; +} ZVecDocField; +``` + +### 创建和销毁文档 + +```c +// 创建文档 +ZVecDoc *doc = zvec_doc_create(); + +// 清空文档 +zvec_doc_clear(doc); + +// 销毁文档 +zvec_doc_destroy(doc); +``` + +### 设置文档属性 + +```c +// 设置主键 +zvec_doc_set_pk(doc, "doc_001"); + +// 设置文档 ID +zvec_doc_set_doc_id(doc, 12345); + +// 设置分数 +zvec_doc_set_score(doc, 0.95f); + +// 设置操作类型 +typedef enum { + ZVEC_DOC_OP_INSERT = 0, // 插入 + ZVEC_DOC_OP_UPDATE = 1, // 更新 + ZVEC_DOC_OP_UPSERT = 2, // 插入或更新 + ZVEC_DOC_OP_DELETE = 3 // 删除 +} ZVecDocOperator; + +zvec_doc_set_operator(doc, ZVEC_DOC_OP_INSERT); +``` + +### 获取文档属性 + +```c +// 获取文档 ID +uint64_t id = zvec_doc_get_doc_id(doc); + +// 获取分数 +float score = zvec_doc_get_score(doc); + +// 获取操作类型 +ZVecDocOperator op = zvec_doc_get_operator(doc); + +// 获取主键指针(不复制) +const char *pk = zvec_doc_get_pk_pointer(doc); + +// 获取主键副本(需手动释放) +const char *pk = zvec_doc_get_pk_copy(doc); +free((void*)pk); + +// 获取字段数量 +size_t count = zvec_doc_get_field_count(doc); + +// 检查文档是否为空 +bool empty = zvec_doc_is_empty(doc); + +// 检查是否包含字段 +bool has = zvec_doc_has_field(doc, "embedding"); + +// 检查字段是否有值 +bool has_value = zvec_doc_has_field_value(doc, "embedding"); + +// 检查字段是否为 null +bool is_null = zvec_doc_is_field_null(doc, "optional_field"); +``` + +### 添加字段 + +```c +// 按值添加字段 +float embedding[768] = {0.1f, 0.2f, ...}; +zvec_doc_add_field_by_value(doc, "embedding", + ZVEC_DATA_TYPE_VECTOR_FP32, embedding, sizeof(embedding)); + +// 添加字符串字段 +const char *title = "Hello World"; +zvec_doc_add_field_by_value(doc, "title", + ZVEC_DATA_TYPE_STRING, title, strlen(title) + 1); + +// 添加整数字段 +int64_t timestamp = 1234567890; +zvec_doc_add_field_by_value(doc, "timestamp", + ZVEC_DATA_TYPE_INT64, ×tamp, sizeof(timestamp)); + +// 按结构添加字段 +ZVecDocField field; +field.name = ZVEC_STRING("score"); +field.data_type = ZVEC_DATA_TYPE_FLOAT; +field.value.float_value = 0.95f; +zvec_doc_add_field_by_struct(doc, &field); + +// 删除字段 +zvec_doc_remove_field(doc, "title"); +``` + +### 获取字段值 + +```c +// 获取基本类型值 +float float_val; +zvec_doc_get_field_value_basic(doc, "score", + ZVEC_DATA_TYPE_FLOAT, &float_val, sizeof(float_val)); + +int64_t int_val; +zvec_doc_get_field_value_basic(doc, "timestamp", + ZVEC_DATA_TYPE_INT64, &int_val, sizeof(int_val)); + +// 获取字段值副本(需手动释放) +void *value; +size_t value_size; + +// 获取字符串 +zvec_doc_get_field_value_copy(doc, "title", ZVEC_DATA_TYPE_STRING, &value, &value_size); +printf("Title: %s\n", (char*)value); +free(value); + +// 获取向量 +zvec_doc_get_field_value_copy(doc, "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, &value, &value_size); +float *vec = (float*)value; +// 使用... +free(value); + +// 获取二进制数据 +zvec_doc_get_field_value_copy(doc, "data", ZVEC_DATA_TYPE_BINARY, &value, &value_size); +zvec_free_uint8_array((uint8_t*)value); + +// 获取字段值指针(无需释放,数据在文档内) +const void *value; +size_t value_size; +zvec_doc_get_field_value_pointer(doc, "score", ZVEC_DATA_TYPE_FLOAT, &value, &value_size); +float score = *(float*)value; +``` + +### 获取所有字段名 + +```c +char **field_names; +size_t count; +zvec_doc_get_field_names(doc, &field_names, &count); + +for (size_t i = 0; i < count; i++) { + printf("Field %zu: %s\n", i, field_names[i]); +} + +// 释放 +zvec_free_str_array(field_names, count); +``` + +### 序列化/反序列化 + +```c +// 序列化 +uint8_t *data; +size_t size; +ZVecErrorCode rc = zvec_doc_serialize(doc, &data, &size); + +// 保存到文件 +FILE *f = fopen("doc.bin", "wb"); +fwrite(data, 1, size, f); +fclose(f); +zvec_free_uint8_array(data); + +// 从文件读取 +FILE *f = fopen("doc.bin", "rb"); +fseek(f, 0, SEEK_END); +size_t file_size = ftell(f); +fseek(f, 0, SEEK_SET); +uint8_t *buffer = malloc(file_size); +fread(buffer, 1, file_size, f); +fclose(f); + +// 反序列化 +ZVecDoc *new_doc; +rc = zvec_doc_deserialize(buffer, file_size, &new_doc); +free(buffer); + +// 使用... +zvec_doc_destroy(new_doc); +``` + +### 文档合并 + +```c +// 合并两个文档 +ZVecDoc *doc1 = zvec_doc_create(); +ZVecDoc *doc2 = zvec_doc_create(); + +// 设置字段... +zvec_doc_merge(doc1, doc2); // 将 doc2 的字段合并到 doc1 +``` + +### 内存使用 + +```c +size_t bytes = zvec_doc_memory_usage(doc); +printf("Document uses %zu bytes\n", bytes); +``` + +### 验证文档 + +```c +char *error_msg; +ZVecErrorCode rc = zvec_doc_validate(doc, schema, false, &error_msg); +if (rc != ZVEC_OK) { + printf("Invalid document: %s\n", error_msg); + free(error_msg); +} +``` + +### 文档详细信息 + +```c +char *detail_str; +zvec_doc_to_detail_string(doc, &detail_str); +printf("Document: %s\n", detail_str); +free(detail_str); +``` + +### 批量释放文档 + +```c +ZVecDoc **docs = malloc(count * sizeof(ZVecDoc*)); +// 填充 docs... + +// 批量释放 +zvec_docs_free(docs, count); +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_doc_create()` | 无 | `ZVecDoc*` | 创建文档 | +| `zvec_doc_destroy(doc)` | `ZVecDoc*` | void | 销毁文档 | +| `zvec_doc_clear(doc)` | `ZVecDoc*` | void | 清空文档 | +| `zvec_doc_set_pk(doc, pk)` | doc, 主键 | void | 设置主键 | +| `zvec_doc_set_doc_id(doc, id)` | doc, ID | void | 设置文档 ID | +| `zvec_doc_set_score(doc, score)` | doc, 分数 | void | 设置分数 | +| `zvec_doc_set_operator(doc, op)` | doc, 操作类型 | void | 设置操作类型 | +| `zvec_doc_get_doc_id(doc)` | `ZVecDoc*` | uint64_t | 获取文档 ID | +| `zvec_doc_get_score(doc)` | `ZVecDoc*` | float | 获取分数 | +| `zvec_doc_get_operator(doc)` | `ZVecDoc*` | `ZVecDocOperator` | 获取操作类型 | +| `zvec_doc_get_pk_pointer(doc)` | `ZVecDoc*` | `const char*` | 获取主键指针 | +| `zvec_doc_get_pk_copy(doc)` | `ZVecDoc*` | `const char*` | 获取主键副本 | +| `zvec_doc_get_field_count(doc)` | `ZVecDoc*` | size_t | 获取字段数量 | +| `zvec_doc_is_empty(doc)` | `ZVecDoc*` | bool | 检查是否为空 | +| `zvec_doc_has_field(doc, name)` | doc, 字段名 | bool | 检查是否包含字段 | +| `zvec_doc_has_field_value(doc, name)` | doc, 字段名 | bool | 检查字段是否有值 | +| `zvec_doc_is_field_null(doc, name)` | doc, 字段名 | bool | 检查字段是否为 null | +| `zvec_doc_add_field_by_value(doc, name, type, value, size)` | doc, 名,类型,值,大小 | `ZVecErrorCode` | 添加字段 | +| `zvec_doc_add_field_by_struct(doc, field)` | doc, 字段结构 | `ZVecErrorCode` | 按结构添加字段 | +| `zvec_doc_remove_field(doc, name)` | doc, 字段名 | `ZVecErrorCode` | 删除字段 | +| `zvec_doc_get_field_value_basic(doc, name, type, buf, size)` | doc, 名,类型,缓冲区,大小 | `ZVecErrorCode` | 获取基本类型值 | +| `zvec_doc_get_field_value_copy(doc, name, type, &val, &size)` | doc, 名,类型,值输出,大小输出 | `ZVecErrorCode` | 获取字段值副本 | +| `zvec_doc_get_field_value_pointer(doc, name, type, &val, &size)` | doc, 名,类型,值输出,大小输出 | `ZVecErrorCode` | 获取字段值指针 | +| `zvec_doc_get_field_names(doc, &names, &count)` | doc, 名称输出,数量输出 | `ZVecErrorCode` | 获取所有字段名 | +| `zvec_doc_serialize(doc, &data, &size)` | doc, 数据输出,大小输出 | `ZVecErrorCode` | 序列化 | +| `zvec_doc_deserialize(data, size, &doc)` | 数据,大小,文档输出 | `ZVecErrorCode` | 反序列化 | +| `zvec_doc_merge(doc, other)` | doc, 源文档 | void | 合并文档 | +| `zvec_doc_memory_usage(doc)` | `ZVecDoc*` | size_t | 获取内存使用 | +| `zvec_doc_validate(doc, schema, is_update, &err)` | doc, schema, 是否更新,错误输出 | `ZVecErrorCode` | 验证文档 | +| `zvec_doc_to_detail_string(doc, &str)` | doc, 字符串输出 | `ZVecErrorCode` | 获取详细信息字符串 | +| `zvec_docs_free(docs, count)` | 文档数组,数量 | void | 批量释放文档 | +| `zvec_free_str_array(arr, count)` | 字符串数组,数量 | void | 释放字符串数组 | + +--- + +## 数据增删改 + +### 插入文档 + +```c +ZVecDoc *docs[3]; +docs[0] = zvec_doc_create(); +docs[1] = zvec_doc_create(); +docs[2] = zvec_doc_create(); + +zvec_doc_set_pk(docs[0], "doc_001"); +zvec_doc_set_pk(docs[1], "doc_002"); +zvec_doc_set_pk(docs[2], "doc_003"); + +// 添加字段... + +size_t success_count, error_count; +ZVecErrorCode rc = zvec_collection_insert(collection, + (const ZVecDoc**)docs, 3, &success_count, &error_count); + +printf("Inserted: %zu, Failed: %zu\n", success_count, error_count); + +// 清理 +zvec_docs_free(docs, 3); +``` + +### 更新文档 + +```c +ZVecDoc *doc = zvec_doc_create(); +zvec_doc_set_pk(doc, "doc_001"); + +// 设置要更新的字段 +float new_embedding[768] = {0.2f, 0.3f, ...}; +zvec_doc_add_field_by_value(doc, "embedding", + ZVEC_DATA_TYPE_VECTOR_FP32, new_embedding, sizeof(new_embedding)); + +size_t success_count, error_count; +ZVecErrorCode rc = zvec_collection_update(collection, + (const ZVecDoc**)&doc, 1, &success_count, &error_count); + +zvec_doc_destroy(doc); +``` + +### 插入或更新(Upsert) + +```c +ZVecDoc *doc = zvec_doc_create(); +zvec_doc_set_pk(doc, "doc_001"); +// 设置字段... + +size_t success_count, error_count; +ZVecErrorCode rc = zvec_collection_upsert(collection, + (const ZVecDoc**)&doc, 1, &success_count, &error_count); + +zvec_doc_destroy(doc); +``` + +### 删除文档 + +```c +// 按主键删除 +const char *pks[] = {"doc_001", "doc_002", "doc_003"}; +size_t success_count, error_count; +ZVecErrorCode rc = zvec_collection_delete(collection, + pks, 3, &success_count, &error_count); + +// 按过滤条件删除 +rc = zvec_collection_delete_by_filter(collection, "category='spam'"); +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_collection_insert(coll, docs, count, &success, &error)` | 集合,文档数组,数量,成功数输出,错误数输出 | `ZVecErrorCode` | 插入文档 | +| `zvec_collection_update(coll, docs, count, &success, &error)` | 集合,文档数组,数量,成功数输出,错误数输出 | `ZVecErrorCode` | 更新文档 | +| `zvec_collection_upsert(coll, docs, count, &success, &error)` | 集合,文档数组,数量,成功数输出,错误数输出 | `ZVecErrorCode` | 插入或更新 | +| `zvec_collection_delete(coll, pks, count, &success, &error)` | 集合,主键数组,数量,成功数输出,错误数输出 | `ZVecErrorCode` | 按主键删除 | +| `zvec_collection_delete_by_filter(coll, filter)` | 集合,过滤表达式 | `ZVecErrorCode` | 按条件删除 | + +--- + +## 数据查询 + +### 向量查询参数 + +```c +typedef struct { + ZVecIndexType index_type; // 索引类型 + float radius; // 搜索半径 + bool is_linear; // 是否线性搜索 + bool is_using_refiner; // 是否使用优化器 +} ZVecQueryParams; +``` + +### HNSW 查询参数 + +```c +typedef struct { + ZVecQueryParams base; + int ef; // 搜索时探索因子 +} ZVecHnswQueryParams; + +// 创建 +ZVecHnswQueryParams *params = zvec_query_params_hnsw_create( + ZVEC_INDEX_TYPE_HNSW, + 100, // ef + 0.0f, // radius + false, // is_linear + true // is_using_refiner +); + +zvec_query_params_hnsw_set_ef(params, 200); +zvec_query_params_hnsw_destroy(params); +``` + +### IVF 查询参数 + +```c +typedef struct { + ZVecQueryParams base; + int nprobe; // 探测聚类数 + float scale_factor; // 缩放因子 +} ZVecIVFQueryParams; + +// 创建 +ZVecIVFQueryParams *params = zvec_query_params_ivf_create( + ZVEC_INDEX_TYPE_IVF, + 20, // nprobe + true, // is_using_refiner + 1.0f // scale_factor +); + +zvec_query_params_ivf_set_nprobe(params, 50); +zvec_query_params_ivf_set_scale_factor(params, 1.5f); +zvec_query_params_ivf_destroy(params); +``` + +### Flat 查询参数 + +```c +typedef struct { + ZVecQueryParams base; + float scale_factor; // 缩放因子 +} ZVecFlatQueryParams; + +ZVecFlatQueryParams *params = zvec_query_params_flat_create( + ZVEC_INDEX_TYPE_FLAT, + false, // is_using_refiner + 1.0f // scale_factor +); + +zvec_query_params_flat_destroy(params); +``` + +### 基础查询参数 + +```c +// 创建基础参数 +ZVecQueryParams *params = zvec_query_params_create(ZVEC_INDEX_TYPE_HNSW); + +// 设置属性 +zvec_query_params_set_index_type(params, ZVEC_INDEX_TYPE_HNSW); +zvec_query_params_set_radius(params, 0.5f); +zvec_query_params_set_is_linear(params, true); +zvec_query_params_set_is_using_refiner(params, true); + +zvec_query_params_destroy(params); +``` + +### 向量查询 + +```c +typedef struct { + int topk; // 返回结果数 + ZVecString field_name; // 查询字段名 + ZVecByteArray query_vector; // 查询向量 + ZVecByteArray query_sparse_indices; // 稀疏向量索引 + ZVecByteArray query_sparse_values; // 稀疏向量值 + ZVecString filter; // 过滤表达式 + bool include_vector; // 是否返回向量 + bool include_doc_id; // 是否返回文档 ID + ZVecStringArray output_fields; // 输出字段列表 + ZVecQueryParamsUnion *query_params; // 查询参数 +} ZVecVectorQuery; + +// 使用宏快速创建 +float query_vec[768] = {0.1f, 0.2f, ...}; +ZVecVectorQuery query = ZVEC_VECTOR_QUERY( + "embedding", // 字段名 + ZVEC_FLOAT_ARRAY(query_vec, 768), + 10, // topK + "category='news'" // 过滤条件 +); + +// 手动创建 +ZVecVectorQuery query = { + .topk = 10, + .field_name = ZVEC_STRING("embedding"), + .query_vector = ZVEC_FLOAT_ARRAY(query_vec, 768), + .filter = ZVEC_STRING(""), + .include_vector = true, + .include_doc_id = true, + .output_fields.strings = NULL, + .output_fields.count = 0, + .query_params = NULL +}; + +// 执行查询 +ZVecDoc **results; +size_t result_count; +ZVecErrorCode rc = zvec_collection_query(collection, &query, &results, &result_count); + +if (rc == ZVEC_OK) { + for (size_t i = 0; i < result_count; i++) { + const char *pk = zvec_doc_get_pk_pointer(results[i]); + float score = zvec_doc_get_score(results[i]); + printf("Result %zu: pk=%s, score=%f\n", i, pk, score); + } +} + +// 释放结果 +zvec_docs_free(results, result_count); +``` + +### 分组向量查询 + +```c +typedef struct { + ZVecString field_name; // 查询字段名 + ZVecByteArray query_vector; // 查询向量 + ZVecByteArray query_sparse_indices; // 稀疏向量索引 + ZVecByteArray query_sparse_values; // 稀疏向量值 + ZVecString filter; // 过滤表达式 + bool include_vector; // 是否返回向量 + ZVecStringArray output_fields; // 输出字段列表 + ZVecString group_by_field_name; // 分组字段名 + uint32_t group_count; // 分组数量 + uint32_t group_topk; // 每组返回结果数 + ZVecQueryParamsUnion *query_params; // 查询参数 +} ZVecGroupByVectorQuery; + +// 创建分组查询 +ZVecGroupByVectorQuery query = { + .field_name = ZVEC_STRING("embedding"), + .query_vector = ZVEC_FLOAT_ARRAY(query_vec, 768), + .filter = ZVEC_STRING(""), + .include_vector = false, + .group_by_field_name = ZVEC_STRING("category"), + .group_count = 5, + .group_topk = 3, + .query_params = NULL +}; + +// 执行查询 +ZVecDoc **results; +ZVecString **group_values; +size_t result_count; + +ZVecErrorCode rc = zvec_collection_query_by_group( + collection, &query, &results, &group_values, &result_count); + +if (rc == ZVEC_OK) { + for (size_t i = 0; i < result_count; i++) { + printf("Group: %s\n", group_values[i]->data); + // 处理结果... + } +} + +// 释放结果 +zvec_docs_free(results, result_count); +zvec_string_array_destroy((ZVecStringArray*)group_values); +``` + +### 按主键获取 + +```c +const char *pks[] = {"doc_001", "doc_002", "doc_003"}; +ZVecDoc **documents; +size_t found_count; + +ZVecErrorCode rc = zvec_collection_fetch(collection, + pks, 3, &documents, &found_count); + +printf("Found %zu documents\n", found_count); + +// 使用... +zvec_docs_free(documents, found_count); +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_query_params_create(type)` | 索引类型 | `ZVecQueryParams*` | 创建查询参数 | +| `zvec_query_params_hnsw_create(type, ef, radius, linear, refiner)` | 类型,ef, 半径,线性,优化器 | `ZVecHnswQueryParams*` | 创建 HNSW 查询参数 | +| `zvec_query_params_ivf_create(type, nprobe, refiner, scale)` | 类型,nprobe, 优化器,缩放因子 | `ZVecIVFQueryParams*` | 创建 IVF 查询参数 | +| `zvec_query_params_flat_create(type, refiner, scale)` | 类型,优化器,缩放因子 | `ZVecFlatQueryParams*` | 创建 Flat 查询参数 | +| `zvec_query_params_union_create(type)` | 索引类型 | `ZVecQueryParamsUnion*` | 创建查询参数联合 | +| `zvec_query_params_destroy(params)` | 参数 | void | 销毁查询参数 | +| `zvec_query_params_hnsw_destroy(params)` | 参数 | void | 销毁 HNSW 查询参数 | +| `zvec_query_params_ivf_destroy(params)` | 参数 | void | 销毁 IVF 查询参数 | +| `zvec_query_params_flat_destroy(params)` | 参数 | void | 销毁 Flat 查询参数 | +| `zvec_query_params_union_destroy(params)` | 参数 | void | 销毁查询参数联合 | +| `zvec_query_params_set_index_type(params, type)` | 参数,类型 | `ZVecErrorCode` | 设置索引类型 | +| `zvec_query_params_set_radius(params, radius)` | 参数,半径 | `ZVecErrorCode` | 设置搜索半径 | +| `zvec_query_params_set_is_linear(params, linear)` | 参数,是否线性 | `ZVecErrorCode` | 设置线性搜索 | +| `zvec_query_params_set_is_using_refiner(params, refiner)` | 参数,是否优化器 | `ZVecErrorCode` | 设置优化器 | +| `zvec_query_params_hnsw_set_ef(params, ef)` | 参数,ef | `ZVecErrorCode` | 设置 ef | +| `zvec_query_params_ivf_set_nprobe(params, nprobe)` | 参数,nprobe | `ZVecErrorCode` | 设置 nprobe | +| `zvec_query_params_ivf_set_scale_factor(params, scale)` | 参数,缩放因子 | `ZVecErrorCode` | 设置缩放因子 | +| `zvec_collection_query(coll, query, &results, &count)` | 集合,查询,结果输出,数量输出 | `ZVecErrorCode` | 向量查询 | +| `zvec_collection_query_by_group(coll, query, &results, &groups, &count)` | 集合,分组查询,结果输出,分组值输出,数量输出 | `ZVecErrorCode` | 分组向量查询 | +| `zvec_collection_fetch(coll, pks, count, &docs, &found)` | 集合,主键数组,数量,文档输出,找到数量 | `ZVecErrorCode` | 按主键获取 | + +--- + +## 工具函数 + +### 类型转字符串 + +```c +// 数据类型转字符串 +const char *type_str = zvec_data_type_to_string(ZVEC_DATA_TYPE_VECTOR_FP32); +// 返回:"VECTOR_FP32" + +// 索引类型转字符串 +const char *idx_str = zvec_index_type_to_string(ZVEC_INDEX_TYPE_HNSW); +// 返回:"HNSW" + +// 距离类型转字符串 +const char *metric_str = zvec_metric_type_to_string(ZVEC_METRIC_TYPE_COSINE); +// 返回:"COSINE" + +// 错误码转字符串 +const char *err_str = zvec_error_code_to_string(ZVEC_ERROR_INVALID_ARGUMENT); +// 返回:"Invalid argument" +``` + +| 函数 | 参数 | 返回值 | 说明 | +|------|------|--------|------| +| `zvec_data_type_to_string(type)` | `ZVecDataType` | `const char*` | 数据类型转字符串 | +| `zvec_index_type_to_string(type)` | `ZVecIndexType` | `const char*` | 索引类型转字符串 | +| `zvec_metric_type_to_string(type)` | `ZVecMetricType` | `const char*` | 距离类型转字符串 | +| `zvec_error_code_to_string(code)` | `ZVecErrorCode` | `const char*` | 错误码转字符串 | + +--- + +## 完整示例 + +### 构建可搜索的向量数据库 + +```c +#include "zvec/c_api.h" +#include +#include + +#define DIM 768 +#define DOC_COUNT 1000 + +// 生成随机向量 +void generate_vector(float *vec, size_t dim) { + for (size_t i = 0; i < dim; i++) { + vec[i] = (float)rand() / RAND_MAX; + } +} + +int main() { + ZVecErrorCode rc; + + // ========== 1. 初始化 ========== + printf("Initializing ZVec...\n"); + rc = zvec_initialize(NULL); + if (rc != ZVEC_OK) { + fprintf(stderr, "Failed to initialize: %s\n", + zvec_error_code_to_string(rc)); + return 1; + } + printf("Version: %s\n", zvec_get_version()); + + // ========== 2. 创建 Schema ========== + printf("Creating schema...\n"); + ZVecCollectionSchema *schema = zvec_collection_schema_create("documents"); + + // ID 字段 + ZVecFieldSchema *id_field = zvec_field_schema_create( + "id", ZVEC_DATA_TYPE_STRING, false, 0); + zvec_collection_schema_add_field(schema, id_field); + + // 向量字段 + ZVecFieldSchema *embedding_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, DIM); + zvec_collection_schema_add_field(schema, embedding_field); + + // 标题字段 + ZVecFieldSchema *title_field = zvec_field_schema_create( + "title", ZVEC_DATA_TYPE_STRING, true, 0); + ZVecInvertIndexParams *invert_params = zvec_index_params_invert_create( + true, true); // 启用范围优化和通配符 + zvec_field_schema_set_invert_index(title_field, invert_params); + zvec_collection_schema_add_field(schema, title_field); + + // 时间戳字段 + ZVecFieldSchema *ts_field = zvec_field_schema_create( + "timestamp", ZVEC_DATA_TYPE_INT64, true, 0); + zvec_collection_schema_add_field(schema, ts_field); + + // 验证 Schema + ZVecString *error_msg; + rc = zvec_collection_schema_validate(schema, &error_msg); + if (rc != ZVEC_OK) { + fprintf(stderr, "Invalid schema: %s\n", error_msg->data); + zvec_free_string(error_msg); + return 1; + } + + // ========== 3. 创建 Collection ========== + printf("Creating collection...\n"); + ZVecCollection *collection; + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + + rc = zvec_collection_create_and_open( + "./my_vector_db", schema, &options, &collection); + if (rc != ZVEC_OK) { + fprintf(stderr, "Failed to create collection: %s\n", + zvec_error_code_to_string(rc)); + return 1; + } + + // ========== 4. 创建索引 ========== + printf("Creating HNSW index...\n"); + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_COSINE, + ZVEC_QUANTIZE_TYPE_UNDEFINED, + 16, // m + 200, // ef_construction + 50 // ef_search + ); + rc = zvec_collection_create_hnsw_index(collection, "embedding", hnsw_params); + zvec_index_params_hnsw_destroy(hnsw_params); + + // ========== 5. 批量插入数据 ========== + printf("Inserting %d documents...\n", DOC_COUNT); + + ZVecDoc **docs = malloc(DOC_COUNT * sizeof(ZVecDoc*)); + float vectors[DOC_COUNT][DIM]; + + for (int i = 0; i < DOC_COUNT; i++) { + docs[i] = zvec_doc_create(); + + // 设置主键 + char pk[32]; + snprintf(pk, sizeof(pk), "doc_%06d", i); + zvec_doc_set_pk(docs[i], pk); + + // 生成随机向量 + generate_vector(vectors[i], DIM); + zvec_doc_add_field_by_value(docs[i], "embedding", + ZVEC_DATA_TYPE_VECTOR_FP32, vectors[i], sizeof(float) * DIM); + + // 添加标题 + char title[64]; + snprintf(title, sizeof(title), "Document Title %d", i); + zvec_doc_add_field_by_value(docs[i], "title", + ZVEC_DATA_TYPE_STRING, title, strlen(title) + 1); + + // 添加时间戳 + int64_t ts = 1700000000 + i * 1000; + zvec_doc_add_field_by_value(docs[i], "timestamp", + ZVEC_DATA_TYPE_INT64, &ts, sizeof(ts)); + } + + size_t success_count, error_count; + rc = zvec_collection_insert(collection, + (const ZVecDoc**)docs, DOC_COUNT, &success_count, &error_count); + printf("Inserted: %zu, Failed: %zu\n", success_count, error_count); + + // 清理文档 + zvec_docs_free(docs, DOC_COUNT); + free(docs); + + // 刷盘 + zvec_collection_flush(collection); + + // ========== 6. 查询 ========== + printf("\nPerforming vector search...\n"); + + // 生成查询向量 + float query_vec[DIM]; + generate_vector(query_vec, DIM); + + // 创建查询 + ZVecVectorQuery query = ZVEC_VECTOR_QUERY( + "embedding", + ZVEC_FLOAT_ARRAY(query_vec, DIM), + 10, // topK + "timestamp > 1700500000" // 过滤条件 + ); + + // 执行查询 + ZVecDoc **results; + size_t result_count; + rc = zvec_collection_query(collection, &query, &results, &result_count); + + if (rc == ZVEC_OK) { + printf("Found %zu results:\n", result_count); + for (size_t i = 0; i < result_count; i++) { + const char *pk = zvec_doc_get_pk_pointer(results[i]); + float score = zvec_doc_get_score(results[i]); + + // 获取标题 + const char *title; + size_t title_size; + zvec_doc_get_field_value_copy(results[i], "title", + ZVEC_DATA_TYPE_STRING, (void**)&title, &title_size); + + printf(" [%zu] %s - score: %.4f - title: %s\n", + i, pk, score, title); + free((void*)title); + } + } + + // 释放结果 + zvec_docs_free(results, result_count); + + // ========== 7. 获取统计信息 ========== + printf("\nCollection statistics:\n"); + ZVecCollectionStats *stats; + rc = zvec_collection_get_stats(collection, &stats); + if (rc == ZVEC_OK) { + printf(" Total documents: %lu\n", stats->doc_count); + printf(" Index count: %zu\n", stats->index_count); + for (size_t i = 0; i < stats->index_count; i++) { + printf(" Index %zu: %s (%.1f%% complete)\n", + i, stats->index_names[i]->data, + stats->index_completeness[i] * 100); + } + zvec_collection_stats_destroy(stats); + } + + // ========== 8. 清理 ========== + printf("\nCleaning up...\n"); + zvec_collection_close(collection); + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + zvec_shutdown(); + + printf("Done!\n"); + return 0; +} +``` + +### 编译示例 + +```bash +gcc -o example example.c -lzvec -I./include -L./lib +./example +``` + +--- + +## 附录 + +### 内存管理约定 + +| 创建函数 | 释放函数 | 说明 | +|----------|----------|------| +| `zvec_*_create()` | `zvec_*_destroy()` | 需要成对调用 | +| `zvec_collection_create_and_open()` | `zvec_collection_close()` + `zvec_collection_destroy()` | Collection 生命周期 | +| `zvec_doc_create()` | `zvec_doc_destroy()` | 文档生命周期 | +| `zvec_get_last_error(&msg)` | `free(msg)` | 错误消息需手动释放 | +| `zvec_doc_get_field_value_copy()` | `free()` 或 `zvec_free_uint8_array()` | 字段值副本需释放 | +| 查询返回的 `results` | `zvec_docs_free()` | 查询结果批量释放 | + +### 宏定义速查 + +```c +// 索引参数宏 +ZVEC_HNSW_PARAMS(metric, m, ef_construction, ef_search, quant) +ZVEC_IVF_PARAMS(metric, nlist, niters, soar, nprobe, quant) +ZVEC_FLAT_PARAMS(metric, quant) +ZVEC_INVERT_PARAMS(range_opt, wildcard) + +// 数据结构宏 +ZVEC_STRING(str) +ZVEC_STRING_VIEW(str) +ZVEC_FLOAT_ARRAY(data_ptr, len) +ZVEC_INT64_ARRAY(data_ptr, len) + +// 选项宏 +ZVEC_DEFAULT_OPTIONS() + +// 查询宏 +ZVEC_VECTOR_QUERY(field_name, query_vec, top_k, filter) + +// 文档字段宏 +ZVEC_DOC_FIELD(name, type, value_union) +``` + +### 最佳实践 + +1. **初始化检查**: 总是检查 `zvec_initialize()` 的返回值 +2. **错误处理**: 每次 API 调用后检查返回值,使用 `zvec_get_last_error()` 获取详情 +3. **资源释放**: 确保所有创建的资源都被正确释放 +4. **批量操作**: 使用批量插入/更新/删除提高性能 +5. **索引选择**: + - 小规模数据 (< 10 万): 使用 Flat 索引 + - 中等规模 (10 万 -1000 万): 使用 HNSW 索引 + - 大规模 (> 1000 万): 使用 IVF 索引 +6. **查询优化**: 合理使用过滤条件减少扫描范围 diff --git a/src/c_api/CMakeLists.txt b/src/c_api/CMakeLists.txt new file mode 100644 index 00000000..565479ab --- /dev/null +++ b/src/c_api/CMakeLists.txt @@ -0,0 +1,168 @@ +# Copyright 2025-present the zvec project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) +include(${PROJECT_ROOT_DIR}/cmake/option.cmake) +include(GNUInstallDirs) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +# C API library source files +set(ZVEC_C_API_SOURCES + c_api.cc +) + +# C API library header files +set(ZVEC_C_API_HEADERS + ${PROJECT_SOURCE_DIR}/src/include/zvec/c_api.h +) + +# ============================================================================= +# Build FAT Shared Library (zvec_c_api.so) +# ============================================================================= +# BUILD_RELEASE_FAT_LIBS=ON: Fully self-contained, zero external dependencies +# Users only need -lzvec_c_api +# BUILD_RELEASE_FAT_LIBS=OFF: Development mode, third-party libs linked normally +# Allows parallel test execution without symbol conflicts +# +# Implementation: +# - Always embeds zvec_db, zvec_core, zvec_ailego via --whole-archive +# - For release: also embeds all third-party libs (rocksdb, glog, protobuf, etc.) +# - Uses --exclude-libs,ALL to hide third-party symbols from export +# ============================================================================= +add_library(zvec_c_api SHARED + ${ZVEC_C_API_SOURCES} + ${ZVEC_C_API_HEADERS} +) + +# Set library properties +set_target_properties(zvec_c_api PROPERTIES + OUTPUT_NAME "zvec_c_api" + POSITION_INDEPENDENT_CODE ON + # Hide all symbols by default, only export C API + CXX_VISIBILITY_PRESET hidden + VISIBILITY_INLINES_HIDDEN ON +) + +find_package(Threads REQUIRED) + +# FAT mode: embed ALL libraries (including third-party) statically +# This creates a truly self-contained library with zero external dependencies +# Users only need to link libzvec_c_api.so without installing any dependencies +if(APPLE) + # Combine all libraries in a single target_link_libraries call + target_link_libraries(zvec_c_api + PRIVATE + # zvec static libraries + zvec_db + zvec_core + zvec_ailego + # Third-party libraries + roaring + Arrow::arrow_static + Arrow::parquet_static + Arrow::arrow_compute + Arrow::arrow_dataset + Arrow::arrow_acero + rocksdb + glog + libprotobuf + antlr4 + sparsehash + magic_enum + Threads::Threads + ${CMAKE_DL_LIBS} + ) + + # Then use target_link_libraries with -force_load on macOS + # This ensures all symbols from static libraries are included + # Note: sparsehash and magic_enum are header-only, skip them + target_link_libraries(zvec_c_api PRIVATE + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + -Wl,-force_load,$ + ) + +else() + target_link_libraries(zvec_c_api + PRIVATE + # Force load all zvec static libraries (extract all objects) + "-Wl,--whole-archive" + zvec_db + zvec_core + zvec_ailego + "-Wl,--no-whole-archive" + # Force load ALL third-party libraries for zero-dependency deployment + "-Wl,--whole-archive" + roaring + Arrow::arrow_static + Arrow::parquet_static + Arrow::arrow_compute + Arrow::arrow_dataset + Arrow::arrow_acero + rocksdb + glog + libprotobuf + antlr4 + sparsehash + magic_enum + "-Wl,--no-whole-archive" + Threads::Threads + ${CMAKE_DL_LIBS} + ) +endif() + +# Include directories +target_include_directories(zvec_c_api + PUBLIC + $ + $ + PRIVATE + ${PROJECT_SOURCE_DIR}/src +) + +# Compile options +target_compile_options(zvec_c_api PRIVATE + $<$:-Wall -Wextra -Wpedantic> + $<$:-Wall -Wextra -Wpedantic> +) + +# ============================================================================= +# Installation Rules +# ============================================================================= + +# Install shared library +install(TARGETS zvec_c_api + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} +) + +# Install headers +install(FILES ${PROJECT_SOURCE_DIR}/src/include/zvec/c_api.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/zvec +) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc new file mode 100644 index 00000000..20834a20 --- /dev/null +++ b/src/c_api/c_api.cc @@ -0,0 +1,5650 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "zvec/c_api.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// ============================================================================= +// RAII Helpers and Error Handling Macros +// ============================================================================= + +namespace { + +// RAII guard for malloc-allocated memory +template +struct MallocGuard { + T *ptr; + explicit MallocGuard(T *p = nullptr) : ptr(p) {} + ~MallocGuard() { + if (ptr) std::free(ptr); + } + MallocGuard(const MallocGuard &) = delete; + MallocGuard &operator=(const MallocGuard &) = delete; + MallocGuard(MallocGuard &&other) noexcept : ptr(other.ptr) { + other.ptr = nullptr; + } + MallocGuard &operator=(MallocGuard &&other) noexcept { + if (this != &other) { + if (ptr) std::free(ptr); + ptr = other.ptr; + other.ptr = nullptr; + } + return *this; + } + T *get() const { + return ptr; + } + T *release() { + T *p = ptr; + ptr = nullptr; + return p; + } + T **ptr_ptr() { + return &ptr; + } +}; + +// RAII guard for C++ objects allocated with new +template +struct DeleteGuard { + T *ptr; + explicit DeleteGuard(T *p = nullptr) : ptr(p) {} + ~DeleteGuard() { + delete ptr; + } + DeleteGuard(const DeleteGuard &) = delete; + DeleteGuard &operator=(const DeleteGuard &) = delete; + DeleteGuard(DeleteGuard &&other) noexcept : ptr(other.ptr) { + other.ptr = nullptr; + } + T *get() const { + return ptr; + } + T *release() { + T *p = ptr; + ptr = nullptr; + return p; + } +}; + +// RAII guard for array allocated with new[] +template +struct DeleteArrayGuard { + T *ptr; + explicit DeleteArrayGuard(T *p = nullptr) : ptr(p) {} + ~DeleteArrayGuard() { + delete[] ptr; + } + DeleteArrayGuard(const DeleteArrayGuard &) = delete; + DeleteArrayGuard &operator=(const DeleteArrayGuard &) = delete; + DeleteArrayGuard(DeleteArrayGuard &&other) noexcept : ptr(other.ptr) { + other.ptr = nullptr; + } + T *get() const { + return ptr; + } + T *release() { + T *p = ptr; + ptr = nullptr; + return p; + } +}; + +} // namespace + +// Error checking macros - these preserve __LINE__ accuracy +#define ZVEC_CHECK_NOTNULL(ptr, error_code, msg) \ + if (!(ptr)) { \ + set_last_error_details(error_code, msg, __FILE__, __LINE__, __FUNCTION__); \ + return nullptr; \ + } + +#define ZVEC_CHECK_NOTNULL_ERRCODE(ptr, error_code, msg) \ + if (!(ptr)) { \ + set_last_error_details(error_code, msg, __FILE__, __LINE__, __FUNCTION__); \ + return (error_code); \ + } + +#define ZVEC_CHECK_COND(cond, error_code, msg) \ + if (cond) { \ + set_last_error_details(error_code, msg, __FILE__, __LINE__, __FUNCTION__); \ + return nullptr; \ + } + +#define ZVEC_CHECK_COND_ERRCODE(cond, error_code, msg) \ + if (cond) { \ + set_last_error_details(error_code, msg, __FILE__, __LINE__, __FUNCTION__); \ + return (error_code); \ + } + +// For void functions (no return value): +#define ZVEC_TRY_BEGIN_VOID try { +#define ZVEC_CATCH_END_VOID \ + } \ + catch (const std::exception &e) { \ + set_last_error(std::string("Exception: ") + e.what()); \ + } + +// For functions returning pointer - complete try-catch wrapper +// Usage: ZVEC_TRY_RETURN_NULL("error msg", code...) +// Note: Use variadic macro to handle commas in template arguments +#define ZVEC_TRY_RETURN_NULL(msg, ...) \ + try { \ + { __VA_ARGS__ } \ + } catch (const std::bad_alloc &e) { \ + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, \ + std::string(msg) + ": " + e.what(), __FILE__, \ + __LINE__, __FUNCTION__); \ + return nullptr; \ + } catch (const std::exception &e) { \ + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, \ + std::string(msg) + ": " + e.what(), __FILE__, \ + __LINE__, __FUNCTION__); \ + return nullptr; \ + } + +// For functions returning ErrorCode +// Usage: ZVEC_TRY_RETURN_ERROR("error msg", code...) +// Note: Use variadic macro to handle commas in template arguments +#define ZVEC_TRY_RETURN_ERROR(msg, ...) \ + try { \ + { __VA_ARGS__ } \ + } catch (const std::bad_alloc &e) { \ + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, \ + std::string(msg) + ": " + e.what(), __FILE__, \ + __LINE__, __FUNCTION__); \ + return ZVEC_ERROR_RESOURCE_EXHAUSTED; \ + } catch (const std::exception &e) { \ + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, \ + std::string(msg) + ": " + e.what(), __FILE__, \ + __LINE__, __FUNCTION__); \ + return ZVEC_ERROR_INTERNAL_ERROR; \ + } + +// For functions returning scalar values (int, float, size_t, etc.) +// Usage: ZVEC_TRY_RETURN_SCALAR("error msg", error_value, code...) +// Note: Use variadic macro to handle commas in template arguments +#define ZVEC_TRY_RETURN_SCALAR(msg, error_val, ...) \ + try { \ + { __VA_ARGS__ } \ + } catch (const std::bad_alloc &e) { \ + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, \ + std::string(msg) + ": " + e.what(), __FILE__, \ + __LINE__, __FUNCTION__); \ + return (error_val); \ + } catch (const std::exception &e) { \ + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, \ + std::string(msg) + ": " + e.what(), __FILE__, \ + __LINE__, __FUNCTION__); \ + return (error_val); \ + } + +// Global status flags +static std::atomic g_initialized{false}; +static std::mutex g_init_mutex; + +// Thread-local storage for error information +static thread_local std::string last_error_message; +static thread_local ZVecErrorDetails last_error_details; + +// Helper function: set error information +static void set_last_error(const std::string &msg) { + last_error_message = msg; + + last_error_details.code = ZVEC_ERROR_UNKNOWN; + last_error_details.message = last_error_message.c_str(); + last_error_details.file = nullptr; + last_error_details.line = 0; + last_error_details.function = nullptr; +} + +// Error setting function with detailed information +static void set_last_error_details(ZVecErrorCode code, const std::string &msg, + const char *file = nullptr, int line = 0, + const char *function = nullptr) { + last_error_message = msg; + last_error_details.code = code; + last_error_details.message = last_error_message.c_str(); + last_error_details.file = file; + last_error_details.line = line; + last_error_details.function = function; +} + +// ============================================================================= +// Version information interface implementation +// ============================================================================= + +// Store dynamically generated version information +static std::string g_version_info; +static std::mutex g_version_mutex; + +const char *zvec_get_version(void) { + std::lock_guard lock(g_version_mutex); + + if (g_version_info.empty()) { + ZVEC_TRY_BEGIN_VOID + std::string version = ZVEC_VERSION_STRING; + + // Try to get Git information + std::string git_info; +#ifdef ZVEC_GIT_DESCRIBE + git_info = ZVEC_GIT_DESCRIBE; +#elif defined(ZVEC_GIT_COMMIT_HASH) + git_info = std::string("g") + ZVEC_GIT_COMMIT_HASH; +#endif + + if (!git_info.empty()) { + version += "-" + git_info; + } + + version += + " (built " + std::string(__DATE__) + " " + std::string(__TIME__) + ")"; + + g_version_info = version; + ZVEC_CATCH_END_VOID + } + + return g_version_info.c_str(); +} + +bool zvec_check_version(int major, int minor, int patch) { + if (major < 0 || minor < 0 || patch < 0) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Version numbers must be non-negative", __FILE__, + __LINE__, __FUNCTION__); + return false; + } + + if (ZVEC_VERSION_MAJOR > major) return true; + if (ZVEC_VERSION_MAJOR < major) return false; + + if (ZVEC_VERSION_MINOR > minor) return true; + if (ZVEC_VERSION_MINOR < minor) return false; + + return ZVEC_VERSION_PATCH >= patch; +} + +int zvec_get_version_major(void) { + return ZVEC_VERSION_MAJOR; +} + +int zvec_get_version_minor(void) { + return ZVEC_VERSION_MINOR; +} + +int zvec_get_version_patch(void) { + return ZVEC_VERSION_PATCH; +} + +// ============================================================================= +// String management functions implementation +// ============================================================================= + +ZVecString *zvec_string_create(const char *str) { + if (!str) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointer cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return nullptr; + } + + size_t len = strlen(str); + ZVecString *zstr = static_cast(malloc(sizeof(ZVecString))); + if (!zstr) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecString", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + char *data_buffer = static_cast(malloc(len + 1)); + if (!data_buffer) { + free(zstr); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for string data", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + + memcpy(data_buffer, str, len + 1); + zstr->data = data_buffer; + zstr->length = len; + zstr->capacity = len + 1; + return zstr; +} + +ZVecString *zvec_string_create_from_view(const ZVecStringView *view) { + if (!view || !view->data) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String view or data cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + ZVecString *zstr = static_cast(malloc(sizeof(ZVecString))); + if (!zstr) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecString", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + char *data_buffer = static_cast(malloc(view->length + 1)); + if (!data_buffer) { + free(zstr); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for string data", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + + memcpy(data_buffer, view->data, view->length); + data_buffer[view->length] = '\0'; + zstr->data = data_buffer; + zstr->length = view->length; + zstr->capacity = view->length + 1; + + return zstr; +} + +ZVecString *zvec_bin_create(const uint8_t *data, size_t length) { + if (!data) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Binary data pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + ZVecString *zstr = static_cast(malloc(sizeof(ZVecString))); + if (!zstr) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecString", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + char *data_buffer = static_cast(malloc(length + 1)); + if (!data_buffer) { + free(zstr); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for binary data", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + + memcpy(data_buffer, data, length); + data_buffer[length] = '\0'; + zstr->data = data_buffer; + zstr->length = length; + zstr->capacity = length + 1; + + return zstr; +} + +ZVecString *zvec_string_copy(const ZVecString *str) { + if (!str || !str->data) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Source string or data cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + return zvec_string_create(str->data); +} + +const char *zvec_string_c_str(const ZVecString *str) { + if (!str) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointer cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return nullptr; + } + + return str->data; +} + +size_t zvec_string_length(const ZVecString *str) { + if (!str) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointer cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return 0; + } + + return str->length; +} + +int zvec_string_compare(const ZVecString *str1, const ZVecString *str2) { + if (!str1 || !str2) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointers cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return -1; + } + + if (!str1->data || !str2->data) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String data cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return -1; + } + + return strcmp(str1->data, str2->data); +} + +// ============================================================================= +// Configuration-related functions implementation +// ============================================================================= + +ZVecConsoleLogConfig *zvec_config_console_log_create(ZVecLogLevel level) { + ZVecConsoleLogConfig *config = + static_cast(malloc(sizeof(ZVecConsoleLogConfig))); + if (!config) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecConsoleLogConfig", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + config->level = level; + return config; +} + +ZVecFileLogConfig *zvec_config_file_log_create(ZVecLogLevel level, + const char *dir, + const char *basename, + uint32_t file_size, + uint32_t overdue_days) { + if (!dir || !basename) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Directory or basename cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + ZVecFileLogConfig *config = + static_cast(malloc(sizeof(ZVecFileLogConfig))); + if (!config) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecFileLogConfig", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + + config->level = level; + ZVecString *dir_str = zvec_string_create(dir); + ZVecString *basename_str = zvec_string_create(basename); + + if (!dir_str || !basename_str) { + if (dir_str) zvec_free_string(dir_str); + if (basename_str) zvec_free_string(basename_str); + free(config); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create strings for file log config", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + + config->dir = *dir_str; + config->basename = *basename_str; + config->file_size = file_size; + config->overdue_days = overdue_days; + + // Free the temporary string wrappers (data is copied by value) + free(dir_str); + free(basename_str); + + return config; +} + +ZVecConfigData *zvec_config_data_create(void) { + ZVecConfigData *config = + static_cast(malloc(sizeof(ZVecConfigData))); + if (!config) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecConfigData", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + + ZVecConsoleLogConfig *log_config = + zvec_config_console_log_create(ZVEC_LOG_LEVEL_WARN); + if (!log_config) { + free(config); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create console log config", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + config->log_config = log_config; + config->log_type = ZVEC_LOG_TYPE_CONSOLE; + + // Set default values from C++ ConfigData + zvec::GlobalConfig::ConfigData config_data; + config->memory_limit_bytes = config_data.memory_limit_bytes; + config->query_thread_count = config_data.query_thread_count; + config->invert_to_forward_scan_ratio = + config_data.invert_to_forward_scan_ratio; + config->brute_force_by_keys_ratio = config_data.brute_force_by_keys_ratio; + config->optimize_thread_count = config_data.optimize_thread_count; + + return config; +} + +void zvec_config_console_log_destroy(ZVecConsoleLogConfig *config) { + if (config) { + free(config); + } +} + +void zvec_config_file_log_destroy(ZVecFileLogConfig *config) { + if (config) { + if (config->dir.data) free((void *)config->dir.data); + if (config->basename.data) free((void *)config->basename.data); + free(config); + } +} + +void zvec_config_data_destroy(ZVecConfigData *config) { + if (config->log_config) { + if (config->log_type == ZVEC_LOG_TYPE_CONSOLE) { + zvec_config_console_log_destroy( + (ZVecConsoleLogConfig *)config->log_config); + } else { + zvec_config_file_log_destroy((ZVecFileLogConfig *)config->log_config); + } + } + free(config); +} + +ZVecErrorCode zvec_config_data_set_memory_limit(ZVecConfigData *config, + uint64_t memory_limit_bytes) { + if (!config) { + set_last_error("Config data pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + config->memory_limit_bytes = memory_limit_bytes; + return ZVEC_OK; +} + +ZVecErrorCode zvec_config_data_set_log_config(ZVecConfigData *config, + ZVecLogType log_type, + void *log_config) { + if (!config || !log_config) { + set_last_error("Config data pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (config->log_config) { + if (config->log_type == ZVEC_LOG_TYPE_CONSOLE) { + zvec_config_console_log_destroy( + (ZVecConsoleLogConfig *)config->log_config); + } else { + zvec_config_file_log_destroy((ZVecFileLogConfig *)config->log_config); + } + } + + config->log_type = log_type; + config->log_config = log_config; + return ZVEC_OK; +} + +ZVecErrorCode zvec_config_data_set_query_thread_count(ZVecConfigData *config, + uint32_t thread_count) { + if (!config) { + set_last_error("Config data pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + config->query_thread_count = thread_count; + return ZVEC_OK; +} + +ZVecErrorCode zvec_config_data_set_optimize_thread_count( + ZVecConfigData *config, uint32_t thread_count) { + if (!config) { + set_last_error("Config data pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + config->optimize_thread_count = thread_count; + return ZVEC_OK; +} + + +// ============================================================================= +// Initialization and cleanup interface implementation +// ============================================================================= + +ZVecErrorCode zvec_initialize(const ZVecConfigData *config) { + std::lock_guard lock(g_init_mutex); + + if (g_initialized.load()) { + set_last_error_details(ZVEC_ERROR_ALREADY_EXISTS, + "Library already initialized"); + return ZVEC_ERROR_ALREADY_EXISTS; + } + + ZVEC_TRY_RETURN_ERROR( + "Initialization failed", + // Convert to C++ configuration object + if (config) { + zvec::GlobalConfig::ConfigData cpp_config{}; + cpp_config.memory_limit_bytes = config->memory_limit_bytes; + cpp_config.query_thread_count = config->query_thread_count; + cpp_config.invert_to_forward_scan_ratio = + config->invert_to_forward_scan_ratio; + cpp_config.brute_force_by_keys_ratio = + config->brute_force_by_keys_ratio; + cpp_config.optimize_thread_count = config->optimize_thread_count; + + // Set log configuration + if (config->log_config) { + std::shared_ptr log_config; + + switch (config->log_type) { + case ZVEC_LOG_TYPE_CONSOLE: { + ZVecConsoleLogConfig *console_config = + (ZVecConsoleLogConfig *)config->log_config; + auto console_level = static_cast( + console_config->level); + log_config = + std::make_shared( + console_level); + break; + } + case ZVEC_LOG_TYPE_FILE: { + ZVecFileLogConfig *file_config = + (ZVecFileLogConfig *)config->log_config; + auto file_level = + static_cast(file_config->level); + std::string dir(file_config->dir.data, file_config->dir.length); + std::string basename(file_config->basename.data, + file_config->basename.length); + log_config = std::make_shared( + file_level, dir, basename); + break; + } + default: + throw std::runtime_error("Unknown log type"); + } + cpp_config.log_config = log_config; + } + // Initialize global configuration + auto status = zvec::GlobalConfig::Instance().Initialize(cpp_config); + if (!status.ok()) { + set_last_error(status.message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + } else { + // Initialize with default configuration + zvec::GlobalConfig::ConfigData default_config; + auto status = zvec::GlobalConfig::Instance().Initialize(default_config); + if (!status.ok()) { + set_last_error(status.message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + } g_initialized.store(true); + return ZVEC_OK;) +} + +ZVecErrorCode zvec_shutdown(void) { + std::lock_guard lock(g_init_mutex); + + if (!g_initialized.load()) { + set_last_error_details(ZVEC_ERROR_FAILED_PRECONDITION, + "Library not initialized"); + return ZVEC_ERROR_FAILED_PRECONDITION; + } + + ZVEC_TRY_RETURN_ERROR("Shutdown failed", g_initialized.store(false); + return ZVEC_OK;) +} + +ZVecErrorCode zvec_is_initialized(bool *initialized) { + if (!initialized) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Initialized flag pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *initialized = g_initialized.load(); + return ZVEC_OK; +} + +// ============================================================================= +// Error handling interface implementation +// ============================================================================= + +ZVecErrorCode zvec_get_last_error_details(ZVecErrorDetails *error_details) { + if (!error_details) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Error details pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *error_details = last_error_details; + return ZVEC_OK; +} + +void zvec_clear_error(void) { + last_error_message.clear(); + last_error_details = {}; +} + +// Helper functions: convert internal status to error code +static ZVecErrorCode status_to_error_code(const zvec::Status &status) { + if (status.code() < zvec::StatusCode::OK || + status.code() > zvec::StatusCode::UNKNOWN) { + set_last_error("Unexpected status code: " + + std::to_string(static_cast(status.code()))); + return ZVEC_ERROR_UNKNOWN; + } + + return static_cast(status.code()); +} + +// Helper function: handle Expected results +template +static ZVecErrorCode handle_expected_result( + const tl::expected &result, T *out_value = nullptr) { + if (result.has_value()) { + if (out_value) { + *out_value = result.value(); + } + return ZVEC_OK; + } else { + set_last_error(result.error().message()); + return status_to_error_code(result.error()); + } +} + +// Helper function: copy strings +static char *copy_string(const std::string &str) { + if (str.empty()) return nullptr; + char *copy = static_cast(malloc(str.length() + 1)); + strcpy(copy, str.c_str()); + return copy; +} + +// Helper function: free write results returned by detailed DML APIs. +static void free_write_results_internal(ZVecWriteResult *results, + size_t result_count) { + if (!results) { + return; + } + for (size_t i = 0; i < result_count; ++i) { + if (results[i].pk) { + free((void *)results[i].pk); + results[i].pk = nullptr; + } + if (results[i].message) { + free((void *)results[i].message); + results[i].message = nullptr; + } + } + free(results); +} + +// Helper function: convert per-doc statuses to C API write result array. +static ZVecErrorCode build_write_results( + const std::vector &statuses, + const std::vector &pks, ZVecWriteResult **results, + size_t *result_count) { + if (!results || !result_count) { + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *result_count = statuses.size(); + if (*result_count == 0) { + *results = nullptr; + return ZVEC_OK; + } + + *results = static_cast( + calloc(*result_count, sizeof(ZVecWriteResult))); + if (!*results) { + set_last_error("Failed to allocate memory for write results"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + for (size_t i = 0; i < *result_count; ++i) { + const std::string pk = i < pks.size() ? pks[i] : std::string(); + const std::string message = statuses[i].message(); + (*results)[i].pk = copy_string(pk); + (*results)[i].message = copy_string(message); + (*results)[i].code = status_to_error_code(statuses[i]); + } + + return ZVEC_OK; +} + +static std::vector collect_doc_pks(const ZVecDoc **docs, + size_t doc_count) { + std::vector pks; + pks.reserve(doc_count); + for (size_t i = 0; i < doc_count; ++i) { + if (!docs[i]) { + pks.emplace_back(""); + continue; + } + auto doc_ptr = + reinterpret_cast *>(docs[i]); + pks.emplace_back((*doc_ptr)->pk_ref()); + } + return pks; +} + +static zvec::DataType convert_data_type(ZVecDataType zvec_type) { + if (zvec_type < ZVEC_DATA_TYPE_UNDEFINED || + zvec_type > ZVEC_DATA_TYPE_ARRAY_DOUBLE) { + return zvec::DataType::UNDEFINED; + } + + return static_cast(zvec_type); +} + +static ZVecDataType convert_zvec_data_type(zvec::DataType cpp_type) { + if (cpp_type < zvec::DataType::UNDEFINED || + cpp_type > zvec::DataType::ARRAY_DOUBLE) { + return ZVEC_DATA_TYPE_UNDEFINED; + } + + return static_cast(cpp_type); +} + +// Helper function: convert metric type +static zvec::MetricType convert_metric_type(ZVecMetricType metric_type) { + if (metric_type < ZVEC_METRIC_TYPE_UNDEFINED || + metric_type > ZVEC_METRIC_TYPE_MIPSL2) { + return zvec::MetricType::UNDEFINED; + } + + return static_cast(metric_type); +} + +// Helper function: convert ZVecIndexType to internal IndexType +static zvec::IndexType convert_index_type(ZVecIndexType zvec_type) { + if (zvec_type < ZVEC_INDEX_TYPE_UNDEFINED || + zvec_type > ZVEC_INDEX_TYPE_INVERT) { + return zvec::IndexType::UNDEFINED; + } + + return static_cast(zvec_type); +} + +// Helper function: convert ZVecQuantizeType to internal QuantizeType +static zvec::QuantizeType convert_quantize_type(ZVecQuantizeType zvec_type) { + if (zvec_type < ZVEC_QUANTIZE_TYPE_UNDEFINED || + zvec_type > ZVEC_QUANTIZE_TYPE_INT4) { + return zvec::QuantizeType::UNDEFINED; + } + + return static_cast(zvec_type); +} + +// Helper function: set field index params +static zvec::Status set_field_index_params(zvec::FieldSchema::Ptr &field_schema, + const ZVecFieldSchema *zvec_field) { + if (!zvec_field->has_index) { + return zvec::Status::OK(); + } + + const ZVecIndexParams *params = &zvec_field->index_params; + + switch (params->index_type) { + case ZVEC_INDEX_TYPE_HNSW: { + auto metric = convert_metric_type(params->metric_type); + auto quantize = convert_quantize_type(params->quantize_type); + auto index_params = std::make_shared( + metric, params->hnsw.m, params->hnsw.ef_construction, quantize); + field_schema->set_index_params(index_params); + break; + } + case ZVEC_INDEX_TYPE_FLAT: { + auto metric = convert_metric_type(params->metric_type); + auto quantize = convert_quantize_type(params->quantize_type); + auto index_params = + std::make_shared(metric, quantize); + field_schema->set_index_params(index_params); + break; + } + case ZVEC_INDEX_TYPE_INVERT: { + auto index_params = std::make_shared( + params->invert.enable_range_optimization, + params->invert.enable_extended_wildcard); + field_schema->set_index_params(index_params); + break; + } + case ZVEC_INDEX_TYPE_IVF: { + auto metric = convert_metric_type(params->metric_type); + auto quantize = convert_quantize_type(params->quantize_type); + auto index_params = std::make_shared( + metric, params->ivf.n_list, params->ivf.n_iters, params->ivf.use_soar, + quantize); + field_schema->set_index_params(index_params); + break; + } + default: + break; + } + + return zvec::Status::OK(); +} + +// ============================================================================= +// Memory Management interface implementation +// ============================================================================= + +void zvec_free_string(ZVecString *str) { + if (str) { + if (str->data) { + free((void *)str->data); + } + free(str); + } +} + +ZVecStringArray *zvec_string_array_create(size_t count) { + ZVecStringArray *array = (ZVecStringArray *)malloc(sizeof(ZVecStringArray)); + array->count = count; + array->strings = (ZVecString *)malloc(sizeof(ZVecString) * count); + memset(array->strings, 0, sizeof(ZVecString) * count); + return array; +} + +void zvec_string_array_add(ZVecStringArray *array, size_t idx, + const char *str) { + if (idx >= array->count) return; + size_t len = strlen(str); + array->strings[idx].data = (char *)malloc(len + 1); + memcpy(array->strings[idx].data, str, len + 1); + array->strings[idx].length = len; + array->strings[idx].capacity = len + 1; +} + +void zvec_string_array_destroy(ZVecStringArray *array) { + if (!array) return; + for (size_t i = 0; i < array->count; i++) { + free((void *)array->strings[i].data); + } + free(array->strings); + free(array); +} + + +// Byte array helper functions +ZVecMutableByteArray *zvec_byte_array_create(size_t capacity) { + ZVecMutableByteArray *array = + (ZVecMutableByteArray *)malloc(sizeof(ZVecMutableByteArray)); + if (!array) return nullptr; + + array->data = (uint8_t *)malloc(capacity); + if (!array->data) { + free(array); + return nullptr; + } + + array->length = 0; + array->capacity = capacity; + memset(array->data, 0, capacity); + return array; +} + +void zvec_byte_array_destroy(ZVecMutableByteArray *array) { + if (!array) return; + if (array->data) { + free(array->data); + } + free(array); +} + +// Float array helper functions +ZVecFloatArray *zvec_float_array_create(size_t count) { + ZVecFloatArray *array = (ZVecFloatArray *)malloc(sizeof(ZVecFloatArray)); + if (!array) return nullptr; + + array->data = (const float *)malloc(sizeof(float) * count); + if (!array->data) { + free(array); + return nullptr; + } + + array->length = count; + memset((void *)array->data, 0, sizeof(float) * count); + return array; +} + +void zvec_float_array_destroy(ZVecFloatArray *array) { + if (!array) return; + if (array->data) { + free((void *)array->data); + } + free(array); +} + +// Int64 array helper functions +ZVecInt64Array *zvec_int64_array_create(size_t count) { + ZVecInt64Array *array = (ZVecInt64Array *)malloc(sizeof(ZVecInt64Array)); + if (!array) return nullptr; + + array->data = (const int64_t *)malloc(sizeof(int64_t) * count); + if (!array->data) { + free(array); + return nullptr; + } + + array->length = count; + memset((void *)array->data, 0, sizeof(int64_t) * count); + return array; +} + +void zvec_int64_array_destroy(ZVecInt64Array *array) { + if (!array) return; + if (array->data) { + free((void *)array->data); + } + free(array); +} + +void zvec_free_float_array(float *array) { + if (array) { + free(array); + } +} + +void zvec_free_str_array(char **array, size_t count) { + if (!array) return; + + // If count is 0, only free the string array itself, don't process internal + // strings + if (count == 0) { + free(array); + return; + } + + for (size_t i = 0; i < count; ++i) { + if (array[i]) { // Only free when string pointer is not null + free(array[i]); + } + } + free(array); +} + +ZVecErrorCode zvec_get_last_error(char **error_msg) { + if (!error_msg) { + set_last_error("Invalid argument: error_msg cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *error_msg = copy_string(last_error_message); + return ZVEC_OK; +} + +void zvec_free_uint8_array(uint8_t *array) { + if (array) { + free(array); + } +} + +void zvec_free_ptr(void *ptr) { + if (ptr) { + free(ptr); + } +} + +void zvec_free_field_schema(ZVecFieldSchema *field_schema) { + if (field_schema) { + // index_params is embedded, no need to free + free(field_schema); + } +} + +// ============================================================================= +// Index parameters management interface implementation +// ============================================================================= + +void zvec_index_params_init(ZVecIndexParams *params, ZVecIndexType index_type, + ZVecMetricType metric_type) { + if (!params) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Index params pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return; + } + + // Zero-initialize the entire structure + memset(params, 0, sizeof(ZVecIndexParams)); + + params->index_type = index_type; + params->metric_type = metric_type; + params->quantize_type = ZVEC_QUANTIZE_TYPE_UNDEFINED; + + // Set default values based on index type + switch (index_type) { + case ZVEC_INDEX_TYPE_INVERT: + params->invert.enable_range_optimization = false; + params->invert.enable_extended_wildcard = false; + break; + + case ZVEC_INDEX_TYPE_HNSW: + params->hnsw.m = 16; + params->hnsw.ef_construction = 200; + params->hnsw.ef_search = 50; + break; + + case ZVEC_INDEX_TYPE_FLAT: + // No additional parameters for Flat + break; + + case ZVEC_INDEX_TYPE_IVF: + params->ivf.n_list = 100; + params->ivf.n_iters = 10; + params->ivf.use_soar = false; + params->ivf.n_probe = 10; + break; + + default: + set_last_error_details(ZVEC_ERROR_NOT_SUPPORTED, "Unsupported index type", + __FILE__, __LINE__, __FUNCTION__); + break; + } +} + +void zvec_index_params_set_hnsw(ZVecIndexParams *params, int m, + int ef_construction, int ef_search) { + if (!params || params->index_type != ZVEC_INDEX_TYPE_HNSW) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not HNSW index type", __FILE__, + __LINE__, __FUNCTION__); + return; + } + params->hnsw.m = m; + params->hnsw.ef_construction = ef_construction; + params->hnsw.ef_search = ef_search; +} + +void zvec_index_params_set_ivf(ZVecIndexParams *params, int n_list, int n_iters, + bool use_soar, int n_probe) { + if (!params || params->index_type != ZVEC_INDEX_TYPE_IVF) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not IVF index type", __FILE__, + __LINE__, __FUNCTION__); + return; + } + params->ivf.n_list = n_list; + params->ivf.n_iters = n_iters; + params->ivf.use_soar = use_soar; + params->ivf.n_probe = n_probe; +} + +void zvec_index_params_set_invert(ZVecIndexParams *params, + bool enable_range_opt, bool enable_wildcard) { + if (!params || params->index_type != ZVEC_INDEX_TYPE_INVERT) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not INVERT index type", __FILE__, + __LINE__, __FUNCTION__); + return; + } + params->invert.enable_range_optimization = enable_range_opt; + params->invert.enable_extended_wildcard = enable_wildcard; +} + +// ============================================================================= +// FieldSchema management interface implementation +// ============================================================================= + +ZVecFieldSchema *zvec_field_schema_create(const char *name, + ZVecDataType data_type, bool nullable, + uint32_t dimension) { + if (!name) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field name cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return nullptr; + } + + ZVecFieldSchema *schema = + static_cast(malloc(sizeof(ZVecFieldSchema))); + if (!schema) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecFieldSchema", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + + schema->name = zvec_string_create(name); + if (!schema->name) { + free(schema); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create string for field name", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + schema->data_type = data_type; + schema->nullable = nullable; + schema->dimension = dimension; + memset(&schema->index_params, 0, sizeof(ZVecIndexParams)); + schema->has_index = false; + + return schema; +} + +void zvec_field_schema_destroy(ZVecFieldSchema *schema) { + if (schema) { + zvec_free_string(schema->name); + // index_params is embedded, no need to free + free(schema); + } +} + +ZVecErrorCode zvec_field_schema_set_index_params( + ZVecFieldSchema *schema, const ZVecIndexParams *index_params) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!index_params) { + memset(&schema->index_params, 0, sizeof(ZVecIndexParams)); + schema->has_index = false; + return ZVEC_OK; + } + + schema->index_params = *index_params; + schema->has_index = true; + + return ZVEC_OK; +} + +void zvec_field_schema_set_invert_index(ZVecFieldSchema *field_schema, + const ZVecIndexParams *invert_params) { + if (field_schema && invert_params) { + field_schema->index_params = *invert_params; + field_schema->index_params.index_type = ZVEC_INDEX_TYPE_INVERT; + field_schema->has_index = true; + } +} + +void zvec_field_schema_set_hnsw_index(ZVecFieldSchema *field_schema, + const ZVecIndexParams *hnsw_params) { + if (field_schema && hnsw_params) { + field_schema->index_params = *hnsw_params; + field_schema->index_params.index_type = ZVEC_INDEX_TYPE_HNSW; + field_schema->has_index = true; + } +} + +void zvec_field_schema_set_flat_index(ZVecFieldSchema *field_schema, + const ZVecIndexParams *flat_params) { + if (field_schema && flat_params) { + field_schema->index_params = *flat_params; + field_schema->index_params.index_type = ZVEC_INDEX_TYPE_FLAT; + field_schema->has_index = true; + } +} + +void zvec_field_schema_set_ivf_index(ZVecFieldSchema *field_schema, + const ZVecIndexParams *ivf_params) { + if (field_schema && ivf_params) { + field_schema->index_params = *ivf_params; + field_schema->index_params.index_type = ZVEC_INDEX_TYPE_IVF; + field_schema->has_index = true; + } +} + +static void zvec_field_schema_cleanup(ZVecFieldSchema *field_schema) { + if (!field_schema) return; + + // index_params is embedded, no need to free + zvec_free_string(field_schema->name); + field_schema->name = nullptr; +} + +// ============================================================================= +// CollectionOptions management interface implementation +// ============================================================================= + +void zvec_collection_options_init_default(ZVecCollectionOptions *options) { + if (!options) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection options pointer cannot be null", + __FILE__, __LINE__, __FUNCTION__); + return; + } + + options->enable_mmap = true; + options->max_buffer_size = zvec::DEFAULT_MAX_BUFFER_SIZE; + options->read_only = false; + options->max_doc_count_per_segment = zvec::MAX_DOC_COUNT_PER_SEGMENT; +} + +// ============================================================================= +// CollectionSchema management interface implementation +// ============================================================================= + +ZVecCollectionSchema *zvec_collection_schema_create(const char *name) { + if (!name) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection name cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return nullptr; + } + + ZVecCollectionSchema *schema = + static_cast(malloc(sizeof(ZVecCollectionSchema))); + if (!schema) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecCollectionSchema", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + + schema->name = zvec_string_create(name); + if (!schema->name) { + free(schema); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to create string for collection name", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + + schema->fields = nullptr; + schema->field_count = 0; + schema->field_capacity = 0; + schema->max_doc_count_per_segment = zvec::MAX_DOC_COUNT_PER_SEGMENT; + + return schema; +} + +void zvec_collection_schema_destroy(ZVecCollectionSchema *schema) { + if (schema) { + zvec_free_string(schema->name); + + if (schema->fields) { + for (size_t i = 0; i < schema->field_count; ++i) { + zvec_field_schema_destroy(schema->fields[i]); + } + free(schema->fields); + } + + free(schema); + } +} + +ZVecErrorCode zvec_collection_schema_add_field(ZVecCollectionSchema *schema, + ZVecFieldSchema *field) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!field || !field->name) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field or field name cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + if (schema->fields[i]->name && field->name && + zvec_string_compare(schema->fields[i]->name, field->name) == 0) { + set_last_error_details( + ZVEC_ERROR_ALREADY_EXISTS, + std::string("Field '") + field->name->data + "' already exists", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_ALREADY_EXISTS; + } + } + + if (schema->field_count >= schema->field_capacity) { + size_t new_capacity = + schema->field_capacity == 0 ? 8 : schema->field_capacity * 2; + ZVecFieldSchema **new_fields = static_cast( + malloc(new_capacity * sizeof(ZVecFieldSchema *))); + if (!new_fields) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for fields", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + new_fields[i] = schema->fields[i]; + } + + free(schema->fields); + schema->fields = new_fields; + schema->field_capacity = new_capacity; + } + + schema->fields[schema->field_count] = field; + schema->field_count++; + + return ZVEC_OK; +} + +ZVecErrorCode zvec_collection_schema_add_fields(ZVecCollectionSchema *schema, + const ZVecFieldSchema *fields, + size_t field_count) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!fields && field_count > 0) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Fields array cannot be null when field_count > 0", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (field_count == 0) { + return ZVEC_OK; + } + + for (size_t i = 0; i < field_count; ++i) { + const ZVecFieldSchema &field = fields[i]; + if (!field.name || !field.name->data || field.name->length == 0) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + std::string("Field at index ") + + std::to_string(i) + " has invalid name", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + size_t total_needed = schema->field_count + field_count; + if (total_needed > schema->field_capacity) { + size_t new_capacity = schema->field_capacity; + while (new_capacity < total_needed) { + new_capacity = new_capacity == 0 ? 8 : new_capacity * 2; + } + + ZVecFieldSchema **new_fields = static_cast( + malloc(new_capacity * sizeof(ZVecFieldSchema *))); + if (!new_fields) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for fields", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + new_fields[i] = schema->fields[i]; + } + + free(schema->fields); + schema->fields = new_fields; + schema->field_capacity = new_capacity; + } + + for (size_t i = 0; i < field_count; ++i) { + const ZVecFieldSchema &src_field = fields[i]; + + ZVecFieldSchema *new_field = + static_cast(malloc(sizeof(ZVecFieldSchema))); + if (!new_field) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for new field", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + new_field->name = zvec_string_copy(src_field.name); + if (!new_field->name) { + free(new_field); + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to copy field name", __FILE__, __LINE__, + __FUNCTION__); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + new_field->data_type = src_field.data_type; + new_field->nullable = src_field.nullable; + new_field->dimension = src_field.dimension; + new_field->index_params = src_field.index_params; + new_field->has_index = src_field.has_index; + + schema->fields[schema->field_count] = new_field; + schema->field_count++; + } + + return ZVEC_OK; +} + +ZVecErrorCode zvec_collection_schema_remove_field(ZVecCollectionSchema *schema, + const char *field_name) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!field_name) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field name cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + if (schema->fields[i]->name && + strcmp(schema->fields[i]->name->data, field_name) == 0) { + zvec_field_schema_destroy(schema->fields[i]); + + for (size_t j = i; j < schema->field_count - 1; ++j) { + schema->fields[j] = schema->fields[j + 1]; + } + + schema->field_count--; + return ZVEC_OK; + } + } + + set_last_error_details(ZVEC_ERROR_NOT_FOUND, + std::string("Field '") + field_name + "' not found", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_NOT_FOUND; +} + +ZVecErrorCode zvec_collection_schema_remove_fields( + ZVecCollectionSchema *schema, const char *const *field_names, + size_t field_count) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!field_names && field_count > 0) { + set_last_error_details( + ZVEC_ERROR_INVALID_ARGUMENT, + "Field names array cannot be null when field_count > 0", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (field_count == 0) { + return ZVEC_OK; + } + + for (size_t i = 0; i < field_count; ++i) { + if (!field_names[i]) { + set_last_error_details( + ZVEC_ERROR_INVALID_ARGUMENT, + std::string("Field name at index ") + std::to_string(i) + " is null", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + std::vector remove_indices; + std::vector not_found_fields; + + for (size_t field_idx = 0; field_idx < field_count; ++field_idx) { + std::string target_name(field_names[field_idx]); + bool found = false; + + for (size_t i = 0; i < schema->field_count; ++i) { + if (schema->fields[i]->name && + strcmp(schema->fields[i]->name->data, target_name.c_str()) == 0) { + remove_indices.push_back(i); + found = true; + break; + } + } + + if (!found) { + not_found_fields.push_back(target_name); + } + } + + + if (!not_found_fields.empty()) { + std::string error_msg = "Fields not found: "; + for (size_t i = 0; i < not_found_fields.size(); ++i) { + error_msg += "'" + not_found_fields[i] + "'"; + if (i < not_found_fields.size() - 1) { + error_msg += ", "; + } + } + set_last_error_details(ZVEC_ERROR_NOT_FOUND, error_msg, __FILE__, __LINE__, + __FUNCTION__); + return ZVEC_ERROR_NOT_FOUND; + } + + std::sort(remove_indices.begin(), remove_indices.end(), + std::greater()); + + for (size_t remove_index : remove_indices) { + zvec_field_schema_destroy(schema->fields[remove_index]); + + for (size_t j = remove_index; j < schema->field_count - 1; ++j) { + schema->fields[j] = schema->fields[j + 1]; + } + + schema->field_count--; + } + + return ZVEC_OK; +} + +ZVecFieldSchema *zvec_collection_schema_find_field( + const ZVecCollectionSchema *schema, const char *field_name) { + if (!schema || !field_name) { + return nullptr; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + if (schema->fields[i]->name && + strcmp(schema->fields[i]->name->data, field_name) == 0) { + return schema->fields[i]; + } + } + + return nullptr; +} + +size_t zvec_collection_schema_get_field_count( + const ZVecCollectionSchema *schema) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return 0; + } + + return schema->field_count; +} + +ZVecFieldSchema *zvec_collection_schema_get_field( + const ZVecCollectionSchema *schema, size_t index) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + if (index >= schema->field_count) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field index out of bounds", __FILE__, __LINE__, + __FUNCTION__); + return nullptr; + } + + return schema->fields[index]; +} + +ZVecErrorCode zvec_collection_schema_set_max_doc_count_per_segment( + ZVecCollectionSchema *schema, uint64_t max_doc_count) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + schema->max_doc_count_per_segment = max_doc_count; + return ZVEC_OK; +} + +uint64_t zvec_collection_schema_get_max_doc_count_per_segment( + const ZVecCollectionSchema *schema) { + if (!schema) return 0; + return schema->max_doc_count_per_segment; +} + +ZVecErrorCode zvec_collection_schema_validate( + const ZVecCollectionSchema *schema, ZVecString **error_msg) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (error_msg) { + *error_msg = nullptr; + } + + if (!schema->name) { + if (error_msg) { + *error_msg = zvec_string_create("Collection name is required"); + } + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection name is required", __FILE__, __LINE__, + __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (schema->field_count == 0) { + if (error_msg) { + *error_msg = zvec_string_create("At least one field is required"); + } + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "At least one field is required", __FILE__, __LINE__, + __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + auto field = schema->fields[i]; + if (!field) { + if (error_msg) { + *error_msg = zvec_string_create("Null field found"); + } + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, "Null field found", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!field->name) { + if (error_msg) { + *error_msg = zvec_string_create("Field name is required"); + } + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field name is required", __FILE__, __LINE__, + __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + return ZVEC_OK; +} + +void zvec_collection_schema_cleanup(ZVecCollectionSchema *schema) { + if (!schema) return; + + ZVEC_TRY_BEGIN_VOID + if (schema->name) { + zvec_free_string(schema->name); + } + + if (schema->fields) { + for (size_t i = 0; i < schema->field_count; ++i) { + zvec_field_schema_cleanup(schema->fields[i]); + } + delete[] schema->fields; + schema->fields = nullptr; + schema->field_count = 0; + } + + schema->max_doc_count_per_segment = 0; + ZVEC_CATCH_END_VOID +} + +// ============================================================================= +// Helper functions +// ============================================================================= + +const char *zvec_error_code_to_string(ZVecErrorCode error_code) { + switch (error_code) { + case ZVEC_OK: + return "OK"; + case ZVEC_ERROR_NOT_FOUND: + return "NOT_FOUND"; + case ZVEC_ERROR_ALREADY_EXISTS: + return "ALREADY_EXISTS"; + case ZVEC_ERROR_INVALID_ARGUMENT: + return "INVALID_ARGUMENT"; + case ZVEC_ERROR_PERMISSION_DENIED: + return "PERMISSION_DENIED"; + case ZVEC_ERROR_FAILED_PRECONDITION: + return "FAILED_PRECONDITION"; + case ZVEC_ERROR_RESOURCE_EXHAUSTED: + return "RESOURCE_EXHAUSTED"; + case ZVEC_ERROR_UNAVAILABLE: + return "UNAVAILABLE"; + case ZVEC_ERROR_INTERNAL_ERROR: + return "INTERNAL_ERROR"; + case ZVEC_ERROR_NOT_SUPPORTED: + return "NOT_SUPPORTED"; + case ZVEC_ERROR_UNKNOWN: + return "UNKNOWN"; + default: + return "UNKNOWN_ERROR_CODE"; + } +} + +const char *zvec_data_type_to_string(ZVecDataType data_type) { + switch (data_type) { + case ZVEC_DATA_TYPE_UNDEFINED: + return "UNDEFINED"; + case ZVEC_DATA_TYPE_BINARY: + return "BINARY"; + case ZVEC_DATA_TYPE_STRING: + return "STRING"; + case ZVEC_DATA_TYPE_BOOL: + return "BOOL"; + case ZVEC_DATA_TYPE_INT32: + return "INT32"; + case ZVEC_DATA_TYPE_INT64: + return "INT64"; + case ZVEC_DATA_TYPE_UINT32: + return "UINT32"; + case ZVEC_DATA_TYPE_UINT64: + return "UINT64"; + case ZVEC_DATA_TYPE_FLOAT: + return "FLOAT"; + case ZVEC_DATA_TYPE_DOUBLE: + return "DOUBLE"; + case ZVEC_DATA_TYPE_VECTOR_BINARY32: + return "VECTOR_BINARY32"; + case ZVEC_DATA_TYPE_VECTOR_BINARY64: + return "VECTOR_BINARY64"; + case ZVEC_DATA_TYPE_VECTOR_FP16: + return "VECTOR_FP16"; + case ZVEC_DATA_TYPE_VECTOR_FP32: + return "VECTOR_FP32"; + case ZVEC_DATA_TYPE_VECTOR_FP64: + return "VECTOR_FP64"; + case ZVEC_DATA_TYPE_VECTOR_INT4: + return "VECTOR_INT4"; + case ZVEC_DATA_TYPE_VECTOR_INT8: + return "VECTOR_INT8"; + case ZVEC_DATA_TYPE_VECTOR_INT16: + return "VECTOR_INT16"; + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: + return "SPARSE_VECTOR_FP16"; + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: + return "SPARSE_VECTOR_FP32"; + case ZVEC_DATA_TYPE_ARRAY_BINARY: + return "ARRAY_BINARY"; + case ZVEC_DATA_TYPE_ARRAY_STRING: + return "ARRAY_STRING"; + case ZVEC_DATA_TYPE_ARRAY_BOOL: + return "ARRAY_BOOL"; + case ZVEC_DATA_TYPE_ARRAY_INT32: + return "ARRAY_INT32"; + case ZVEC_DATA_TYPE_ARRAY_INT64: + return "ARRAY_INT64"; + case ZVEC_DATA_TYPE_ARRAY_UINT32: + return "ARRAY_UINT32"; + case ZVEC_DATA_TYPE_ARRAY_UINT64: + return "ARRAY_UINT64"; + case ZVEC_DATA_TYPE_ARRAY_FLOAT: + return "ARRAY_FLOAT"; + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: + return "ARRAY_DOUBLE"; + default: + return "UNKNOWN_DATA_TYPE"; + } +} + +const char *zvec_index_type_to_string(ZVecIndexType index_type) { + switch (index_type) { + case ZVEC_INDEX_TYPE_UNDEFINED: + return "UNDEFINED"; + case ZVEC_INDEX_TYPE_HNSW: + return "HNSW"; + case ZVEC_INDEX_TYPE_IVF: + return "IVF"; + case ZVEC_INDEX_TYPE_FLAT: + return "FLAT"; + case ZVEC_INDEX_TYPE_INVERT: + return "INVERT"; + default: + return "UNKNOWN_INDEX_TYPE"; + } +} + +const char *zvec_metric_type_to_string(ZVecMetricType metric_type) { + switch (metric_type) { + case ZVEC_METRIC_TYPE_UNDEFINED: + return "UNDEFINED"; + case ZVEC_METRIC_TYPE_L2: + return "L2"; + case ZVEC_METRIC_TYPE_IP: + return "IP"; + case ZVEC_METRIC_TYPE_COSINE: + return "COSINE"; + case ZVEC_METRIC_TYPE_MIPSL2: + return "MIPSL2"; + default: + return "UNKNOWN_METRIC_TYPE"; + } +} + +bool check_is_vector_field(const ZVecFieldSchema &zvec_field) { + bool is_vector_field = + (zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_FP32 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_FP64 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_FP16 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_BINARY32 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_BINARY64 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_INT4 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_INT8 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_INT16 || + zvec_field.data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 || + zvec_field.data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16); + return is_vector_field; +} + +// ============================================================================= +// Doc functions implementation +// ============================================================================= + +ZVecDoc *zvec_doc_create(void) { + ZVEC_TRY_RETURN_NULL("Failed to create document", { + auto doc_ptr = + new std::shared_ptr(std::make_shared()); + return reinterpret_cast(doc_ptr); + }) +} + +void zvec_doc_destroy(ZVecDoc *doc) { + if (doc) { + delete reinterpret_cast *>(doc); + } +} + +void zvec_doc_clear(ZVecDoc *doc) { + if (!doc) return; + + ZVEC_TRY_BEGIN_VOID + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->clear(); + ZVEC_CATCH_END_VOID +} + +void zvec_docs_free(ZVecDoc **docs, size_t count) { + if (!docs) return; + + for (size_t i = 0; i < count; ++i) { + zvec_doc_destroy(docs[i]); + } + + free(docs); +} + +void zvec_write_results_free(ZVecWriteResult *results, size_t result_count) { + free_write_results_internal(results, result_count); +} + +void zvec_doc_set_pk(ZVecDoc *doc, const char *pk) { + if (!doc || !pk) return; + + ZVEC_TRY_BEGIN_VOID + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_pk(std::string(pk)); + ZVEC_CATCH_END_VOID +} + +void zvec_doc_set_doc_id(ZVecDoc *doc, uint64_t doc_id) { + if (!doc) return; + + ZVEC_TRY_BEGIN_VOID + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_doc_id(doc_id); + ZVEC_CATCH_END_VOID +} + +void zvec_doc_set_score(ZVecDoc *doc, float score) { + if (!doc) return; + + ZVEC_TRY_BEGIN_VOID + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_score(score); + ZVEC_CATCH_END_VOID +} + +void zvec_doc_set_operator(ZVecDoc *doc, ZVecDocOperator op) { + if (!doc) return; + + ZVEC_TRY_BEGIN_VOID + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_operator(static_cast(op)); + ZVEC_CATCH_END_VOID +} + +ZVecErrorCode zvec_doc_set_field_null(ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to set null field", + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_null(std::string(field_name)); return ZVEC_OK;) +} + +// ============================================================================= +// Document interface implementation +// ============================================================================= + +// Helper function to extract scalar values from raw data +template +T extract_scalar_value(const void *value, size_t value_size, + ZVecErrorCode *error_code) { + if (value_size != sizeof(T)) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return T{}; + } + return *static_cast(value); +} + +// Helper function to extract vector values from raw data +template +std::vector extract_vector_values(const void *value, size_t value_size, + ZVecErrorCode *error_code) { + if (value_size % sizeof(T) != 0) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return std::vector(); + } + size_t count = value_size / sizeof(T); + const T *vals = static_cast(value); + return std::vector(vals, vals + count); +} + +// Helper function to extract array values from raw data +template +std::vector extract_array_values(const void *value, size_t value_size, + ZVecErrorCode *error_code) { + if (value_size % sizeof(T) != 0) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return std::vector(); + } + size_t count = value_size / sizeof(T); + const T *vals = static_cast(value); + return std::vector(vals, vals + count); +} + +// Helper function to handle sparse vector extraction +template +std::pair, std::vector> extract_sparse_vector( + const void *value, size_t value_size, ZVecErrorCode *error_code) { + if (value_size < sizeof(uint32_t)) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return std::make_pair(std::vector(), std::vector()); + } + + const uint32_t *data = static_cast(value); + uint32_t nnz = data[0]; + + size_t required_size = + sizeof(uint32_t) + nnz * (sizeof(uint32_t) + sizeof(T)); + if (value_size < required_size) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return std::make_pair(std::vector(), std::vector()); + } + + const uint32_t *indices = data + 1; + const T *values = reinterpret_cast(indices + nnz); + + std::vector index_vec(indices, indices + nnz); + std::vector value_vec(values, values + nnz); + + return std::make_pair(std::move(index_vec), std::move(value_vec)); +} + +// Helper function to extract string array from raw data (C-string array) +std::vector extract_string_array(const void *value, + size_t value_size) { + std::vector string_array; + const char *data = static_cast(value); + size_t pos = 0; + + while (pos < value_size) { + size_t str_len = strlen(data + pos); + if (pos + str_len >= value_size) { + break; + } + string_array.emplace_back(data + pos, str_len); + pos += str_len + 1; + } + return string_array; +} + +// Helper function to extract string array from ZVecString** array +std::vector extract_string_array_from_zvec( + ZVecString **zvec_strings, size_t count) { + std::vector string_array; + string_array.reserve(count); + + for (size_t i = 0; i < count; ++i) { + if (zvec_strings[i] && zvec_strings[i]->data) { + string_array.emplace_back(zvec_strings[i]->data, zvec_strings[i]->length); + } else { + string_array.emplace_back("", 0); + } + } + + return string_array; +} + +// Helper function to extract binary array from raw data +std::vector extract_binary_array(const void *value, + size_t value_size) { + std::vector binary_array; + const char *data = static_cast(value); + size_t pos = 0; + + while (pos < value_size) { + if (pos + sizeof(uint32_t) > value_size) { + break; + } + uint32_t bin_len = *reinterpret_cast(data + pos); + pos += sizeof(uint32_t); + + if (pos + bin_len > value_size) { + break; + } + binary_array.emplace_back(data + pos, bin_len); + pos += bin_len; + } + return binary_array; +} + +static std::vector convert_zvec_docs_to_internal( + const ZVecDoc **zvec_docs, size_t doc_count) { + std::vector docs; + docs.reserve(doc_count); + + for (size_t i = 0; i < doc_count; ++i) { + docs.push_back( + *(*reinterpret_cast *>(zvec_docs[i]))); + } + + return docs; +} + + +static zvec::Status convert_zvec_collection_schema_to_internal( + const ZVecCollectionSchema *schema, + zvec::CollectionSchema::Ptr &collection_schema) { + std::string coll_name(schema->name->data, schema->name->length); + collection_schema = std::make_shared(coll_name); + collection_schema->set_max_doc_count_per_segment( + schema->max_doc_count_per_segment); + + for (size_t i = 0; i < schema->field_count; ++i) { + const ZVecFieldSchema &zvec_field = *schema->fields[i]; + zvec::DataType data_type = convert_data_type(zvec_field.data_type); + std::string field_name = + std::string(zvec_field.name->data, zvec_field.name->length); + zvec::FieldSchema::Ptr field_schema; + + bool is_vector_field = check_is_vector_field(zvec_field); + + if (is_vector_field) { + field_schema = std::make_shared( + field_name, data_type, zvec_field.dimension, zvec_field.nullable); + } else { + field_schema = std::make_shared(field_name, data_type, + zvec_field.nullable); + } + + if (zvec_field.has_index) { + zvec::Status status = set_field_index_params(field_schema, &zvec_field); + if (!status.ok()) { + return status; + } + } + + zvec::Status status = collection_schema->add_field(field_schema); + if (!status.ok()) { + return status; + } + } + + return zvec::Status::OK(); +} + +static zvec::Status convert_zvec_field_schema_to_internal( + const ZVecFieldSchema &zvec_field, zvec::FieldSchema::Ptr &field_schema) { + // Validate input + if (!zvec_field.name) { + return zvec::Status::InvalidArgument("Field name cannot be null"); + } + + zvec::DataType data_type = convert_data_type(zvec_field.data_type); + if (data_type == zvec::DataType::UNDEFINED) { + return zvec::Status::InvalidArgument("Invalid data type"); + } + + std::string field_name(zvec_field.name->data, zvec_field.name->length); + bool is_vector_field = check_is_vector_field(zvec_field); + + if (is_vector_field) { + field_schema = std::make_shared( + field_name, data_type, zvec_field.dimension, zvec_field.nullable); + + if (zvec_field.has_index) { + switch (zvec_field.index_params.index_type) { + case ZVEC_INDEX_TYPE_HNSW: { + auto metric = + convert_metric_type(zvec_field.index_params.metric_type); + auto quantize = + convert_quantize_type(zvec_field.index_params.quantize_type); + auto index_params = std::make_shared( + metric, zvec_field.index_params.hnsw.m, + zvec_field.index_params.hnsw.ef_construction, quantize); + field_schema->set_index_params(index_params); + break; + } + case ZVEC_INDEX_TYPE_FLAT: { + auto metric = + convert_metric_type(zvec_field.index_params.metric_type); + auto quantize = + convert_quantize_type(zvec_field.index_params.quantize_type); + auto index_params = + std::make_shared(metric, quantize); + field_schema->set_index_params(index_params); + break; + } + case ZVEC_INDEX_TYPE_IVF: { + auto metric = + convert_metric_type(zvec_field.index_params.metric_type); + auto quantize = + convert_quantize_type(zvec_field.index_params.quantize_type); + auto index_params = std::make_shared( + metric, zvec_field.index_params.ivf.n_list, + zvec_field.index_params.ivf.n_iters, + zvec_field.index_params.ivf.use_soar, quantize); + field_schema->set_index_params(index_params); + break; + } + default: + field_schema->set_index_params( + std::make_shared(zvec::MetricType::L2)); + break; + } + } else { + field_schema->set_index_params( + std::make_shared(zvec::MetricType::L2)); + } + } else { + field_schema = std::make_shared(field_name, data_type, + zvec_field.nullable); + + if (zvec_field.has_index && + zvec_field.index_params.index_type == ZVEC_INDEX_TYPE_INVERT) { + auto index_params = std::make_shared( + zvec_field.index_params.invert.enable_range_optimization, + zvec_field.index_params.invert.enable_extended_wildcard); + field_schema->set_index_params(index_params); + } + } + + return zvec::Status::OK(); +} + +ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, + ZVecDataType data_type, + const void *value, + size_t value_size) { + if (!doc || !field_name || !value) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to add field", + auto doc_ptr = reinterpret_cast *>(doc); + std::string name(field_name); ZVecErrorCode error_code = ZVEC_OK; + + switch (data_type) { + // Scalar types + case ZVEC_DATA_TYPE_BINARY: + case ZVEC_DATA_TYPE_STRING: { + std::string val(static_cast(value), value_size); + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_BOOL: { + bool val = extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for bool type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_INT32: { + int32_t val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for int32 type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_INT64: { + int64_t val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for int64 type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + uint32_t val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for uint32 type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + uint64_t val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for uint64 type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + float val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for float type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + double val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for double type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + + // Vector types + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_binary32 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_binary64 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_fp32 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + auto vec = extract_vector_values(value, value_size, + &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_fp16 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_fp64 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_int8 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_int16 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT4: { + // INT4 vectors are packed - each byte contains 2 int4 values + size_t count = value_size * 2; + const int8_t *packed_vals = static_cast(value); + std::vector vec; + vec.reserve(count); + + // Unpack int4 values + for (size_t i = 0; i < value_size; ++i) { + int8_t byte_val = packed_vals[i]; + // Extract lower 4 bits + vec.push_back(byte_val & 0x0F); + // Extract upper 4 bits + vec.push_back((byte_val >> 4) & 0x0F); + } + (*doc_ptr)->set(name, vec); + break; + } + + // Sparse vector types + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { + auto sparse_vec = extract_sparse_vector( + value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid sparse vector data size"); + return error_code; + } + (*doc_ptr)->set(name, sparse_vec); + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + auto sparse_vec = + extract_sparse_vector(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid sparse vector data size"); + return error_code; + } + (*doc_ptr)->set(name, sparse_vec); + break; + } + + // Array types + case ZVEC_DATA_TYPE_ARRAY_BINARY: { + auto binary_array = extract_binary_array(value, value_size); + (*doc_ptr)->set(name, binary_array); + break; + } + case ZVEC_DATA_TYPE_ARRAY_STRING: { + // Check if this is a ZVecString** array or a C-string array + // ZVecString** array has pointer-sized elements + constexpr size_t ptr_size = sizeof(void *); + if (value_size % ptr_size == 0) { + // Likely a ZVecString** array + size_t count = value_size / ptr_size; + ZVecString **zvec_str_array = + reinterpret_cast(const_cast(value)); + auto string_array = + extract_string_array_from_zvec(zvec_str_array, count); + (*doc_ptr)->set(name, string_array); + } else { + // C-string array (null-terminated strings) + auto string_array = extract_string_array(value, value_size); + (*doc_ptr)->set(name, string_array); + } + break; + } + case ZVEC_DATA_TYPE_ARRAY_BOOL: { + auto vec = extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_bool type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_int32 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_int64 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_uint32 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_uint64 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_float type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_double type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + + default: + set_last_error("Unsupported data type: " + std::to_string(data_type)); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + return ZVEC_OK;) +} + +ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, + const ZVecDocField *field) { + if (!doc || !field) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to add field", + auto doc_ptr = reinterpret_cast *>(doc); + + std::string name(field->name.data, field->name.length); + + switch (field->data_type) { + // Scalar types (in ZVecDataType order: BINARY, STRING, BOOL, INT32, + // INT64, UINT32, UINT64, FLOAT, DOUBLE) + case ZVEC_DATA_TYPE_BINARY: { + std::string val( + reinterpret_cast(field->value.binary_value.data), + field->value.binary_value.length); + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_STRING: { + std::string val(field->value.string_value.data, + field->value.string_value.length); + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_BOOL: { + (*doc_ptr)->set(name, field->value.bool_value); + break; + } + case ZVEC_DATA_TYPE_INT32: { + (*doc_ptr)->set(name, field->value.int32_value); + break; + } + case ZVEC_DATA_TYPE_INT64: { + (*doc_ptr)->set(name, field->value.int64_value); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + (*doc_ptr)->set(name, field->value.uint32_value); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + (*doc_ptr)->set(name, field->value.uint64_value); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + (*doc_ptr)->set(name, field->value.float_value); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + (*doc_ptr)->set(name, field->value.double_value); + break; + } + + // Vector types (in ZVecDataType order: BINARY32, BINARY64, FP16, FP32, + // FP64, INT4, INT8, INT16) + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + std::vector vec(reinterpret_cast( + field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + std::vector vec(reinterpret_cast( + field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + std::vector vec( + reinterpret_cast( + field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + std::vector vec(field->value.vector_value.data, + field->value.vector_value.data + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + std::vector vec( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT4: { + size_t byte_count = (field->value.vector_value.length + 1) / 2; + const int8_t *packed_data = + reinterpret_cast(field->value.vector_value.data); + std::vector vec; + vec.reserve(field->value.vector_value.length); + + for (size_t i = 0; + i < byte_count && vec.size() < field->value.vector_value.length; + ++i) { + int8_t byte_val = packed_data[i]; + // Extract lower 4 bits + vec.push_back(byte_val & 0x0F); + // Extract upper 4 bits + if (vec.size() < field->value.vector_value.length) { + vec.push_back((byte_val >> 4) & 0x0F); + } + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + std::vector vec( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + std::vector vec( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + + // Sparse vector types (in ZVecDataType order: FP16, FP32) + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { + std::vector vec( + reinterpret_cast( + field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + std::vector vec(field->value.vector_value.data, + field->value.vector_value.data + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + + // Array types (in ZVecDataType order: BINARY, STRING, BOOL, INT32, + // INT64, UINT32, UINT64, FLOAT, DOUBLE) + case ZVEC_DATA_TYPE_ARRAY_BINARY: { + std::vector array_values; + const uint8_t *data_ptr = field->value.binary_value.data; + size_t total_length = field->value.binary_value.length; + size_t offset = 0; + + while (offset + sizeof(uint32_t) <= total_length) { + uint32_t elem_length = + *reinterpret_cast(data_ptr + offset); + offset += sizeof(uint32_t); + + if (offset + elem_length <= total_length) { + std::string elem( + reinterpret_cast(data_ptr + offset), + elem_length); + array_values.push_back(elem); + offset += elem_length; + } else { + break; + } + } + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_STRING: { + std::vector array_values; + const char *data_ptr = field->value.string_value.data; + size_t total_length = field->value.string_value.length; + size_t offset = 0; + + while (offset < total_length) { + size_t str_len = strlen(data_ptr + offset); + if (str_len > 0 && offset + str_len <= total_length) { + array_values.emplace_back(data_ptr + offset, str_len); + offset += str_len + 1; + } else { + break; + } + } + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_BOOL: { + std::vector array_values( + reinterpret_cast(field->value.binary_value.data), + reinterpret_cast(field->value.binary_value.data) + + field->value.binary_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + std::vector array_values( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + std::vector array_values( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + std::vector array_values( + reinterpret_cast( + field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + std::vector array_values( + reinterpret_cast( + field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + std::vector array_values(field->value.vector_value.data, + field->value.vector_value.data + + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + std::vector array_values( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + + default: + set_last_error("Unsupported data type: " + + std::to_string(field->data_type)); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + return ZVEC_OK;) +} + +const char *zvec_doc_get_pk_pointer(const ZVecDoc *doc) { + if (!doc) return nullptr; + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->pk_ref().data(); +} + +const char *zvec_doc_get_pk_copy(const ZVecDoc *doc) { + if (!doc) return nullptr; + auto doc_ptr = reinterpret_cast *>(doc); + const std::string &pk = (*doc_ptr)->pk_ref(); + if (pk.empty()) return nullptr; + + char *result = static_cast(malloc(pk.length() + 1)); + strcpy(result, pk.c_str()); + return result; +} + +uint64_t zvec_doc_get_doc_id(const ZVecDoc *doc) { + if (!doc) return 0; + + ZVEC_TRY_RETURN_SCALAR( + "Failed to get document ID", 0, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->doc_id();) +} + +float zvec_doc_get_score(const ZVecDoc *doc) { + if (!doc) return 0.0f; + + ZVEC_TRY_RETURN_SCALAR( + "Failed to get document score", 0.0f, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->score();) +} + +ZVecDocOperator zvec_doc_get_operator(const ZVecDoc *doc) { + if (!doc) return ZVEC_DOC_OP_INSERT; // default + ZVEC_TRY_RETURN_SCALAR( + "Failed to get document operator", ZVEC_DOC_OP_INSERT, + auto doc_ptr = reinterpret_cast *>(doc); + zvec::Operator op = (*doc_ptr)->get_operator(); + return static_cast(op);) +} + +size_t zvec_doc_get_field_count(const ZVecDoc *doc) { + if (!doc) return 0; + + ZVEC_TRY_RETURN_SCALAR( + "Failed to get field count", 0, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->field_names().size();) +} + +ZVecErrorCode zvec_doc_get_field_value_basic(const ZVecDoc *doc, + const char *field_name, + ZVecDataType field_type, + void *value_buffer, + size_t buffer_size) { + if (!doc || !field_name || !value_buffer) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to get field value", + auto doc_ptr = reinterpret_cast *>(doc); + + // Check if field exists + if (!(*doc_ptr)->has(field_name)) { + set_last_error("Field not found in document"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + // Handle basic data types that return values directly + switch (field_type) { + case ZVEC_DATA_TYPE_BOOL: { + if (buffer_size < sizeof(bool)) { + set_last_error("Buffer too small for bool value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const bool val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_INT32: { + if (buffer_size < sizeof(int32_t)) { + set_last_error("Buffer too small for int32 value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const int32_t val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_INT64: { + if (buffer_size < sizeof(int64_t)) { + set_last_error("Buffer too small for int64 value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const int64_t val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_UINT32: { + if (buffer_size < sizeof(uint32_t)) { + set_last_error("Buffer too small for uint32 value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const uint32_t val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_UINT64: { + if (buffer_size < sizeof(uint64_t)) { + set_last_error("Buffer too small for uint64 value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const uint64_t val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + if (buffer_size < sizeof(float)) { + set_last_error("Buffer too small for float value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const float val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + if (buffer_size < sizeof(double)) { + set_last_error("Buffer too small for double value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const double val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + default: { + set_last_error("Data type not supported for basic value return"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + return ZVEC_OK;) +} + +ZVecErrorCode zvec_doc_get_field_value_copy(const ZVecDoc *doc, + const char *field_name, + ZVecDataType field_type, + void **value, size_t *value_size) { + if (!doc || !field_name || !value || !value_size) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to get field value copy", + auto doc_ptr = reinterpret_cast *>(doc); + + // Check if field exists + if (!(*doc_ptr)->has(field_name)) { + set_last_error("Field not found in document"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + // Handle copy-returning data types (allocate new memory) + switch (field_type) { + // Basic types - copy the actual values + case ZVEC_DATA_TYPE_BOOL: { + const bool val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(bool)); + if (!buffer) { + set_last_error("Memory allocation failed for bool"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(bool); + break; + } + case ZVEC_DATA_TYPE_INT32: { + const int32_t val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(int32_t)); + if (!buffer) { + set_last_error("Memory allocation failed for int32"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(int32_t); + break; + } + case ZVEC_DATA_TYPE_INT64: { + const int64_t val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(int64_t)); + if (!buffer) { + set_last_error("Memory allocation failed for int64"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(int64_t); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + const uint32_t val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(uint32_t)); + if (!buffer) { + set_last_error("Memory allocation failed for uint32"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(uint32_t); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + const uint64_t val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(uint64_t)); + if (!buffer) { + set_last_error("Memory allocation failed for uint64"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(uint64_t); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + const float val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(float)); + if (!buffer) { + set_last_error("Memory allocation failed for float"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(float); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + const double val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(double)); + if (!buffer) { + set_last_error("Memory allocation failed for double"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(double); + break; + } + + // String and binary types - copy the data + case ZVEC_DATA_TYPE_BINARY: + case ZVEC_DATA_TYPE_STRING: { + const std::string &val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(val.length()); + if (!buffer) { + set_last_error("Memory allocation failed for string/binary"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), val.length()); + *value = buffer; + *value_size = val.length(); + break; + } + + // Vector types - copy the data + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(uint32_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for uint32 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(uint64_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for uint64 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(zvec::float16_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for fp16 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(float); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for fp32 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(double); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for fp64 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT4: + case ZVEC_DATA_TYPE_VECTOR_INT8: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(int8_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for int8 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(int16_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for int16 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + + // Sparse vector types - create flattened representation + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { + using SparseVecFP16 = + std::pair, std::vector>; + const SparseVecFP16 &sparse_vec = + (*doc_ptr)->get_ref(field_name); + size_t nnz = sparse_vec.first.size(); + size_t total_size = sizeof(size_t) + nnz * (sizeof(uint32_t) + + sizeof(zvec::float16_t)); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for sparse vector FP16"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + char *ptr = static_cast(buffer); + *reinterpret_cast(ptr) = nnz; + ptr += sizeof(size_t); + + for (size_t i = 0; i < nnz; ++i) { + *reinterpret_cast(ptr) = sparse_vec.first[i]; + ptr += sizeof(uint32_t); + } + for (size_t i = 0; i < nnz; ++i) { + *reinterpret_cast(ptr) = sparse_vec.second[i]; + ptr += sizeof(zvec::float16_t); + } + + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + using SparseVecFP32 = + std::pair, std::vector>; + const SparseVecFP32 &sparse_vec = + (*doc_ptr)->get_ref(field_name); + size_t nnz = sparse_vec.first.size(); + size_t total_size = + sizeof(size_t) + nnz * (sizeof(uint32_t) + sizeof(float)); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for sparse vector FP32"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + char *ptr = static_cast(buffer); + *reinterpret_cast(ptr) = nnz; + ptr += sizeof(size_t); + + for (size_t i = 0; i < nnz; ++i) { + *reinterpret_cast(ptr) = sparse_vec.first[i]; + ptr += sizeof(uint32_t); + } + for (size_t i = 0; i < nnz; ++i) { + *reinterpret_cast(ptr) = sparse_vec.second[i]; + ptr += sizeof(float); + } + + *value = buffer; + *value_size = total_size; + break; + } + + // Array types - create serialized representations + case ZVEC_DATA_TYPE_ARRAY_BINARY: { + using BinaryArray = std::vector; + const BinaryArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = 0; + for (const auto &bin_val : array_vals) { + total_size += bin_val.length(); + } + + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for binary array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + char *ptr = static_cast(buffer); + for (const auto &bin_val : array_vals) { + memcpy(ptr, bin_val.data(), bin_val.length()); + ptr += bin_val.length(); + } + + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_STRING: { + using StringArray = std::vector; + const StringArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = 0; + for (const auto &str_val : array_vals) { + total_size += str_val.length() + 1; // +1 for null terminator + } + + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for string array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + char *ptr = static_cast(buffer); + for (const auto &str_val : array_vals) { + memcpy(ptr, str_val.c_str(), str_val.length()); + ptr += str_val.length(); + *ptr = '\0'; + ptr++; + } + + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_BOOL: { + using BoolArray = std::vector; + const BoolArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t byte_count = (array_vals.size() + 7) / 8; + void *buffer = malloc(byte_count); + if (!buffer) { + set_last_error("Memory allocation failed for bool array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + uint8_t *bytes = static_cast(buffer); + memset(bytes, 0, byte_count); + + for (size_t i = 0; i < array_vals.size(); ++i) { + if (array_vals[i]) { + bytes[i / 8] |= (1 << (i % 8)); + } + } + + *value = buffer; + *value_size = byte_count; + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + using Int32Array = std::vector; + const Int32Array &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(int32_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for int32 array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + using Int64Array = std::vector; + const Int64Array &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(int64_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for int64 array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + using UInt32Array = std::vector; + const UInt32Array &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(uint32_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for uint32 array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + using UInt64Array = std::vector; + const UInt64Array &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(uint64_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for uint64 array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + using FloatArray = std::vector; + const FloatArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(float); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for float array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + using DoubleArray = std::vector; + const DoubleArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(double); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for double array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + default: { + set_last_error("Unknown data type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + return ZVEC_OK;) +} + +ZVecErrorCode zvec_doc_get_field_value_pointer(const ZVecDoc *doc, + const char *field_name, + ZVecDataType field_type, + const void **value, + size_t *value_size) { + if (!doc || !field_name || !value || !value_size) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to get field value pointer", + auto doc_ptr = reinterpret_cast *>(doc); + + // Check if field exists + if (!(*doc_ptr)->has(field_name)) { + set_last_error("Field not found in document"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + // Get field value based on data type + switch (field_type) { + case ZVEC_DATA_TYPE_BINARY: { + const std::string &val = (*doc_ptr)->get_ref(field_name); + *value = val.data(); + *value_size = val.length(); + break; + } + case ZVEC_DATA_TYPE_STRING: { + const std::string &val = (*doc_ptr)->get_ref(field_name); + *value = val.c_str(); + *value_size = val.length(); + break; + } + case ZVEC_DATA_TYPE_BOOL: { + const bool &val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(bool); + break; + } + case ZVEC_DATA_TYPE_INT32: { + const int32_t &val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(int32_t); + break; + } + case ZVEC_DATA_TYPE_INT64: { + const int64_t &val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(int64_t); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + const uint32_t &val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(uint32_t); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + const uint64_t &val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(uint64_t); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + const float &val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(float); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + const double &val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(double); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(uint32_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(uint64_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + // FP16 vectors typically stored as uint16_t + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(zvec::float16_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(float); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(double); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT4: { + // INT4 vectors typically stored as int8_t with 2 values per byte + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(int8_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(int8_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(int16_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(int32_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(int64_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(uint32_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(uint64_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(float); + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(double); + break; + } + default: { + set_last_error("Unknown data type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + return ZVEC_OK;) +} + +bool zvec_doc_is_empty(const ZVecDoc *doc) { + if (!doc) { + set_last_error("Document pointer is null"); + return true; + } + + ZVEC_TRY_RETURN_SCALAR( + "Failed to check if document is empty", true, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->is_empty();) +} + +ZVecErrorCode zvec_doc_remove_field(ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Document pointer or field name is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to remove field", + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->remove(std::string(field_name)); return ZVEC_OK;) +} + + +bool zvec_doc_has_field(const ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Document pointer or field name is null"); + return false; + } + + ZVEC_TRY_RETURN_SCALAR( + "Failed to check field existence", false, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->has(std::string(field_name));) +} + +bool zvec_doc_has_field_value(const ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Document pointer or field name is null"); + return false; + } + + ZVEC_TRY_RETURN_SCALAR( + "Failed to check field value existence", false, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->has_value(std::string(field_name));) +} + +bool zvec_doc_is_field_null(const ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Document pointer or field name is null"); + return false; + } + + ZVEC_TRY_RETURN_SCALAR( + "Failed to check if field is null", false, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->is_null(std::string(field_name));) +} + +ZVecErrorCode zvec_doc_get_field_names(const ZVecDoc *doc, char ***field_names, + size_t *count) { + if (!doc || !field_names || !count) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to get field names", + auto doc_ptr = reinterpret_cast *>(doc); + std::vector names = (*doc_ptr)->field_names(); + + *count = names.size(); + if (*count == 0) { + *field_names = nullptr; + return ZVEC_OK; + } + + *field_names = static_cast(malloc(*count * sizeof(char *))); + if (!*field_names) { + set_last_error("Failed to allocate memory for field names"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + for (size_t i = 0; i < *count; ++i) { + (*field_names)[i] = copy_string(names[i]); + if (!(*field_names)[i]) { + for (size_t j = 0; j < i; ++j) { + free((*field_names)[j]); + } + free(*field_names); + *field_names = nullptr; + set_last_error("Failed to copy field name"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + } + + return ZVEC_OK;) +} + +ZVecErrorCode zvec_doc_serialize(const ZVecDoc *doc, uint8_t **data, + size_t *size) { + if (!doc || !data || !size) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to serialize document", + auto doc_ptr = reinterpret_cast *>(doc); + std::vector serialized_data = (*doc_ptr)->serialize(); + + *size = serialized_data.size(); + if (*size == 0) { + *data = nullptr; + return ZVEC_OK; + } + + *data = static_cast(malloc(*size)); + if (!*data) { + set_last_error("Failed to allocate memory for serialized data"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(*data, serialized_data.data(), *size); + return ZVEC_OK;) +} + +ZVecErrorCode zvec_doc_deserialize(const uint8_t *data, size_t size, + ZVecDoc **doc) { + if (!data || !doc || size == 0) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to deserialize document", + auto deserialized_doc = zvec::Doc::deserialize(data, size); + if (!deserialized_doc) { + set_last_error("Failed to deserialize document"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + auto doc_ptr = new std::shared_ptr(deserialized_doc); + *doc = reinterpret_cast(doc_ptr); return ZVEC_OK;) +} + +void zvec_doc_merge(ZVecDoc *doc, const ZVecDoc *other) { + if (!doc || !other) { + set_last_error("Document pointers are null"); + return; + } + + ZVEC_TRY_BEGIN_VOID + auto doc_ptr = reinterpret_cast *>(doc); + auto other_ptr = reinterpret_cast *>(other); + (*doc_ptr)->merge(**other_ptr); + ZVEC_CATCH_END_VOID +} + +size_t zvec_doc_memory_usage(const ZVecDoc *doc) { + if (!doc) { + set_last_error("Document pointer is null"); + return 0; + } + + ZVEC_TRY_RETURN_SCALAR( + "Failed to get document memory usage", 0, + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->memory_usage();) +} + +ZVecErrorCode zvec_doc_validate(const ZVecDoc *doc, + const ZVecCollectionSchema *schema, + bool is_update, char **error_msg) { + if (!doc || !schema) { + set_last_error("Document or schema pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to validate document", + std::shared_ptr schema_ptr = nullptr; + auto status = + convert_zvec_collection_schema_to_internal(schema, schema_ptr); + if (!status.ok()) { + if (error_msg) { + *error_msg = copy_string(status.message()); + } + return status_to_error_code(status); + } + + auto doc_ptr = reinterpret_cast *>(doc); + status = (*doc_ptr)->validate(schema_ptr, is_update); if (!status.ok()) { + if (error_msg) { + *error_msg = copy_string(status.message()); + } + return status_to_error_code(status); + } + + if (error_msg) { *error_msg = nullptr; } return ZVEC_OK;) +} + +ZVecErrorCode zvec_doc_to_detail_string(const ZVecDoc *doc, char **detail_str) { + if (!doc || !detail_str) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to get document detail string", + auto doc_ptr = reinterpret_cast *>(doc); + std::string detail = (*doc_ptr)->to_detail_string(); + *detail_str = copy_string(detail); + + if (!*detail_str && !detail.empty()) { + set_last_error("Failed to copy detail string"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + return ZVEC_OK;) +} + +// ============================================================================= +// Collection functions implementation +// ============================================================================= + +ZVecErrorCode zvec_collection_create_and_open( + const char *path, const ZVecCollectionSchema *schema, + const ZVecCollectionOptions *options, ZVecCollection **collection) { + ZVEC_TRY_RETURN_ERROR( + "Exception in zvec_collection_create_and_open_with_schema", + if (!path || !schema || !collection) { + set_last_error("Path, schema, or collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + std::shared_ptr + schema_ptr = nullptr; + auto status = + convert_zvec_collection_schema_to_internal(schema, schema_ptr); + if (!status.ok()) { + set_last_error(status.message()); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + zvec::CollectionOptions collection_options; + if (options) { + collection_options.enable_mmap_ = options->enable_mmap; + collection_options.max_buffer_size_ = options->max_buffer_size; + collection_options.read_only_ = options->read_only; + } + + auto result = zvec::Collection::CreateAndOpen(path, *schema_ptr, + collection_options); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *collection = reinterpret_cast( + new std::shared_ptr(std::move(result.value()))); + } + + return error_code;) +} + +ZVecErrorCode zvec_collection_open(const char *path, + const ZVecCollectionOptions *options, + ZVecCollection **collection) { + if (!path || !collection) { + set_last_error("Invalid arguments: path and collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", zvec::CollectionOptions collection_options; + if (options) { + collection_options.enable_mmap_ = options->enable_mmap; + collection_options.max_buffer_size_ = options->max_buffer_size; + collection_options.read_only_ = options->read_only; + } + + auto result = zvec::Collection::Open(path, collection_options); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *collection = reinterpret_cast( + new std::shared_ptr(std::move(result.value()))); + } + + return error_code;) +} + +ZVecErrorCode zvec_collection_close(ZVecCollection *collection) { + if (!collection) { + set_last_error("Invalid argument: collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + delete reinterpret_cast *>(collection); + return ZVEC_OK;) +} + +ZVecErrorCode zvec_collection_destroy(ZVecCollection *collection) { + if (!collection) { + set_last_error("Invalid argument: collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto &coll = + *reinterpret_cast *>(collection); + zvec::Status status = coll->Destroy(); + if (!status.ok()) { set_last_error(status.message()); } + + return status_to_error_code(status);) +} + +ZVecErrorCode zvec_collection_flush(ZVecCollection *collection) { + if (!collection) { + set_last_error("Invalid argument: collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto &coll = + *reinterpret_cast *>(collection); + zvec::Status status = coll->Flush(); + + if (!status.ok()) { set_last_error(status.message()); } + + return status_to_error_code(status);) +} + +ZVecErrorCode zvec_collection_get_schema(const ZVecCollection *collection, + ZVecCollectionSchema **schema) { + if (!collection || !schema) { + set_last_error("Invalid arguments: collection and schema cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto &coll = *reinterpret_cast *>( + collection); + auto result = coll->Schema(); + + ZVecErrorCode error_code = handle_expected_result(result); + if (error_code == ZVEC_OK) { + const auto &cpp_schema = result.value(); + + // Create new schema structure + ZVecCollectionSchema *c_schema = static_cast( + malloc(sizeof(ZVecCollectionSchema))); + if (!c_schema) { + set_last_error("Failed to allocate memory for schema"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Initialize the schema structure + c_schema->name = nullptr; + c_schema->fields = nullptr; + c_schema->field_count = 0; + c_schema->field_capacity = 0; + c_schema->max_doc_count_per_segment = + cpp_schema.max_doc_count_per_segment(); + + // Set collection name + c_schema->name = zvec_string_create(cpp_schema.name().c_str()); + if (!c_schema->name) { + free(c_schema); + set_last_error("Failed to allocate memory for collection name"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Convert and copy fields + const auto &cpp_fields = cpp_schema.fields(); + c_schema->field_count = cpp_fields.size(); + c_schema->field_capacity = cpp_fields.size(); + + if (c_schema->field_count > 0) { + // Allocate array of field pointers + c_schema->fields = static_cast( + malloc(c_schema->field_count * sizeof(ZVecFieldSchema *))); + if (!c_schema->fields) { + zvec_collection_schema_destroy(c_schema); + set_last_error("Failed to allocate memory for fields"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Initialize all field pointers to nullptr + for (size_t i = 0; i < c_schema->field_count; ++i) { + c_schema->fields[i] = nullptr; + } + + size_t i = 0; + for (const auto &cpp_field : cpp_fields) { + try { + // Create new field schema + c_schema->fields[i] = static_cast( + malloc(sizeof(ZVecFieldSchema))); + if (!c_schema->fields[i]) { + throw std::bad_alloc(); + } + + // Copy field name using zvec_string_create + c_schema->fields[i]->name = + zvec_string_create(cpp_field->name().c_str()); + if (!c_schema->fields[i]->name) { + throw std::bad_alloc(); + } + + // Convert data type + c_schema->fields[i]->data_type = + convert_zvec_data_type(cpp_field->data_type()); + + // Copy dimension for vector fields + c_schema->fields[i]->dimension = cpp_field->dimension(); + + // Copy nullable flag + c_schema->fields[i]->nullable = cpp_field->nullable(); + + // Initialize index parameters (embedded, not pointer) + memset(&c_schema->fields[i]->index_params, 0, + sizeof(ZVecIndexParams)); + c_schema->fields[i]->has_index = false; + + // Convert index parameters based on the actual type + auto index_params = cpp_field->index_params(); + if (index_params) { + switch (index_params->type()) { + case zvec::IndexType::HNSW: { + auto hnsw_params = + std::dynamic_pointer_cast( + index_params); + if (hnsw_params) { + c_schema->fields[i]->index_params.index_type = + ZVEC_INDEX_TYPE_HNSW; + c_schema->fields[i]->index_params.metric_type = + static_cast( + hnsw_params->metric_type()); + c_schema->fields[i]->index_params.quantize_type = + static_cast( + hnsw_params->quantize_type()); + c_schema->fields[i]->index_params.hnsw.m = + hnsw_params->m(); + c_schema->fields[i]->index_params.hnsw.ef_construction = + hnsw_params->ef_construction(); + c_schema->fields[i]->has_index = true; + } + break; + } + + case zvec::IndexType::IVF: { + auto ivf_params = + std::dynamic_pointer_cast( + index_params); + if (ivf_params) { + c_schema->fields[i]->index_params.index_type = + ZVEC_INDEX_TYPE_IVF; + c_schema->fields[i]->index_params.metric_type = + static_cast( + ivf_params->metric_type()); + c_schema->fields[i]->index_params.quantize_type = + static_cast( + ivf_params->quantize_type()); + c_schema->fields[i]->index_params.ivf.n_list = + ivf_params->n_list(); + c_schema->fields[i]->index_params.ivf.n_iters = + ivf_params->n_iters(); + c_schema->fields[i]->index_params.ivf.use_soar = + ivf_params->use_soar(); + c_schema->fields[i]->has_index = true; + } + break; + } + + case zvec::IndexType::FLAT: { + auto flat_params = + std::dynamic_pointer_cast( + index_params); + if (flat_params) { + c_schema->fields[i]->index_params.index_type = + ZVEC_INDEX_TYPE_FLAT; + c_schema->fields[i]->index_params.metric_type = + static_cast( + flat_params->metric_type()); + c_schema->fields[i]->index_params.quantize_type = + static_cast( + flat_params->quantize_type()); + c_schema->fields[i]->has_index = true; + } + break; + } + + case zvec::IndexType::INVERT: { + auto invert_params = + std::dynamic_pointer_cast( + index_params); + if (invert_params) { + c_schema->fields[i]->index_params.index_type = + ZVEC_INDEX_TYPE_INVERT; + c_schema->fields[i] + ->index_params.invert.enable_range_optimization = + invert_params->enable_range_optimization(); + c_schema->fields[i] + ->index_params.invert.enable_extended_wildcard = + invert_params->enable_extended_wildcard(); + c_schema->fields[i]->has_index = true; + } + break; + } + + default: + // For undefined or unsupported index types + c_schema->fields[i]->has_index = false; + break; + } + } else { + // No index parameters + c_schema->fields[i]->has_index = false; + } + } catch (const std::bad_alloc &) { + // Clean up already allocated fields + for (size_t j = 0; j <= i; ++j) { + if (c_schema->fields[j]) { + zvec_field_schema_destroy(c_schema->fields[j]); + } + } + free(c_schema->fields); + zvec_free_string(c_schema->name); + free(c_schema); + set_last_error("Failed to allocate memory for field"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + ++i; + } + } + + *schema = c_schema; + } + + return error_code;) +} + +ZVecErrorCode zvec_collection_get_options(const ZVecCollection *collection, + ZVecCollectionOptions **options) { + if (!collection || !options) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to get collection options", + auto collection_ptr = + reinterpret_cast *>( + collection); + auto result = (*collection_ptr)->Options(); + + if (!result.has_value()) { + set_last_error("Failed to get collection option: " + + result.error().message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Create and initialize options structure + *options = static_cast( + malloc(sizeof(ZVecCollectionOptions))); + if (!*options) { + set_last_error("Failed to allocate memory for options"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + (*options) + ->enable_mmap = result.value().enable_mmap_; + (*options)->max_buffer_size = result.value().max_buffer_size_; + (*options)->read_only = result.value().read_only_; + (*options)->max_doc_count_per_segment = zvec::MAX_DOC_COUNT_PER_SEGMENT; + + return ZVEC_OK;) +} + +ZVecErrorCode zvec_collection_get_stats(const ZVecCollection *collection, + ZVecCollectionStats **stats) { + if (!collection || !stats) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Failed to get detailed collection stats", + auto collection_ptr = + reinterpret_cast *>( + collection); + auto result = (*collection_ptr)->Stats(); + + if (!result.has_value()) { + set_last_error("Failed to get collection stats: " + + result.error().message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + *stats = static_cast( + malloc(sizeof(ZVecCollectionStats))); + if (!*stats) { + set_last_error("Failed to allocate memory for stats"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + ZVecErrorCode error_code = handle_expected_result(result); + if (error_code == ZVEC_OK) { + (*stats)->doc_count = result.value().doc_count; + (*stats)->index_count = result.value().index_completeness.size(); + if ((*stats)->index_count > 0) { + (*stats)->index_completeness = static_cast( + malloc((*stats)->index_count * sizeof(float))); + (*stats)->index_names = static_cast( + malloc((*stats)->index_count * sizeof(ZVecString *))); + int i = 0; + for (auto &[name, completeness] : result.value().index_completeness) { + (*stats)->index_completeness[i] = completeness; + (*stats)->index_names[i] = zvec_string_create(name.c_str()); + i++; + } + } + } else { + (*stats)->index_completeness = nullptr; + (*stats)->index_names = nullptr; + } + + return error_code;) +} + +ZVecCollectionStats *zvec_collection_stats_create(void) { + ZVecCollectionStats *stats = + static_cast(malloc(sizeof(ZVecCollectionStats))); + if (!stats) { + return nullptr; + } + stats->doc_count = 0; + stats->index_count = 0; + stats->index_completeness = nullptr; + stats->index_names = nullptr; + return stats; +} + +void zvec_collection_stats_destroy(ZVecCollectionStats *stats) { + if (stats) { + if (stats->index_names) { + for (size_t i = 0; i < stats->index_count; ++i) { + zvec_free_string(stats->index_names[i]); + } + free(stats->index_names); + } + + if (stats->index_completeness) { + free(stats->index_completeness); + } + + free(stats); + } +} + +// ============================================================================= +// QueryParams functions implementation +// ============================================================================= + +ZVecQueryParams *zvec_query_params_create(ZVecIndexType index_type) { + ZVecQueryParams *params = + static_cast(malloc(sizeof(ZVecQueryParams))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecQueryParams", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + params->index_type = index_type; + params->radius = 0.0f; + params->is_linear = false; + params->is_using_refiner = false; + return params; +} + +ZVecHnswQueryParams *zvec_query_params_hnsw_create(ZVecIndexType index_type, + int ef, float radius, + bool is_linear, + bool is_using_refiner) { + ZVecHnswQueryParams *params = + static_cast(malloc(sizeof(ZVecHnswQueryParams))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecHnswQueryParams", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + params->base.index_type = index_type; + params->base.radius = radius; + params->base.is_linear = is_linear; + params->base.is_using_refiner = is_using_refiner; + params->ef = ef; + return params; +} + +ZVecIVFQueryParams *zvec_query_params_ivf_create(ZVecIndexType index_type, + int nprobe, + bool is_using_refiner, + float scale_factor) { + ZVecIVFQueryParams *params = + static_cast(malloc(sizeof(ZVecIVFQueryParams))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecIVFQueryParams", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + params->base.index_type = index_type; + params->base.is_using_refiner = is_using_refiner; + params->nprobe = nprobe; + params->scale_factor = scale_factor; + return params; +} + +ZVecFlatQueryParams *zvec_query_params_flat_create(ZVecIndexType index_type, + bool is_using_refiner, + float scale_factor) { + ZVecFlatQueryParams *params = + static_cast(malloc(sizeof(ZVecFlatQueryParams))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecFlatQueryParams", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + params->base.index_type = index_type; + params->base.is_using_refiner = is_using_refiner; + params->scale_factor = scale_factor; + return params; +} + +ZVecQueryParamsUnion *zvec_query_params_union_create(ZVecIndexType index_type) { + ZVecQueryParamsUnion *params = + static_cast(malloc(sizeof(ZVecQueryParamsUnion))); + if (!params) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + "Failed to allocate memory for ZVecQueryParamsUnion", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + params->index_type = index_type; + + switch (index_type) { + case ZVEC_INDEX_TYPE_HNSW: + params->params.hnsw_params.base.index_type = index_type; + params->params.hnsw_params.ef = + zvec::core_interface::kDefaultHnswEfSearch; + break; + case ZVEC_INDEX_TYPE_IVF: + params->params.ivf_params.base.index_type = index_type; + params->params.ivf_params.nprobe = 10; + params->params.ivf_params.scale_factor = 10.0f; + break; + case ZVEC_INDEX_TYPE_FLAT: + params->params.flat_params.base.index_type = index_type; + params->params.flat_params.scale_factor = 10.0f; + break; + default: + params->params.base_params.index_type = index_type; + break; + } + + return params; +} + +void zvec_query_params_destroy(ZVecQueryParams *params) { + if (params) { + free(params); + } +} + +void zvec_query_params_hnsw_destroy(ZVecHnswQueryParams *params) { + if (params) { + free(params); + } +} + +void zvec_query_params_ivf_destroy(ZVecIVFQueryParams *params) { + if (params) { + free(params); + } +} + +void zvec_query_params_flat_destroy(ZVecFlatQueryParams *params) { + if (params) { + free(params); + } +} + +void zvec_query_params_union_destroy(ZVecQueryParamsUnion *params) { + if (params) { + free(params); + } +} + +ZVecErrorCode zvec_query_params_set_index_type(ZVecQueryParams *params, + ZVecIndexType index_type) { + if (!params) { + set_last_error("Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->index_type = index_type; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_set_radius(ZVecQueryParams *params, + float radius) { + if (!params) { + set_last_error("Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->radius = radius; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_set_is_linear(ZVecQueryParams *params, + bool is_linear) { + if (!params) { + set_last_error("Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->is_linear = is_linear; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_set_is_using_refiner(ZVecQueryParams *params, + bool is_using_refiner) { + if (!params) { + set_last_error("Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->is_using_refiner = is_using_refiner; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_hnsw_set_ef(ZVecHnswQueryParams *params, + int ef) { + if (!params) { + set_last_error("HNSW query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->ef = ef; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_ivf_set_nprobe(ZVecIVFQueryParams *params, + int nprobe) { + if (!params) { + set_last_error("IVF query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->nprobe = nprobe; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_ivf_set_scale_factor(ZVecIVFQueryParams *params, + float scale_factor) { + if (!params) { + set_last_error("Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->scale_factor = scale_factor; + return ZVEC_OK; +} + + +// ============================================================================= +// Index Interface Implementation +// ============================================================================= + +ZVecErrorCode zvec_collection_create_index( + ZVecCollection *collection, const char *column_name, + const ZVecIndexParams *index_params) { + if (!collection || !column_name || !index_params) { + set_last_error( + "Invalid arguments: collection, column_name, and index_params cannot " + "be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR("Exception in zvec_collection_create_index", + auto coll_ptr = + reinterpret_cast *>(collection); + std::string field_name_str(column_name); + + switch (index_params->index_type) { + case ZVEC_INDEX_TYPE_INVERT: { + auto cpp_params = std::make_shared( + index_params->invert.enable_range_optimization, + index_params->invert.enable_extended_wildcard); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); +} + +case ZVEC_INDEX_TYPE_HNSW: { + auto metric = convert_metric_type(index_params->metric_type); + auto quantize = convert_quantize_type(index_params->quantize_type); + auto cpp_params = std::make_shared( + metric, index_params->hnsw.m, index_params->hnsw.ef_construction, + quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); +} + +case ZVEC_INDEX_TYPE_FLAT: { + auto metric = convert_metric_type(index_params->metric_type); + auto quantize = convert_quantize_type(index_params->quantize_type); + auto cpp_params = std::make_shared(metric, quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); +} + +case ZVEC_INDEX_TYPE_IVF: { + auto metric = convert_metric_type(index_params->metric_type); + auto quantize = convert_quantize_type(index_params->quantize_type); + auto cpp_params = std::make_shared( + metric, index_params->ivf.n_list, index_params->ivf.n_iters, + index_params->ivf.use_soar, quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); +} + +default: { + set_last_error("Unsupported index type"); + return ZVEC_ERROR_INVALID_ARGUMENT; +} + } + ) + } + + // Legacy function - kept for backward compatibility, just calls + // zvec_collection_create_index + ZVecErrorCode zvec_collection_create_hnsw_index( + ZVecCollection *collection, const char *field_name, + const ZVecIndexParams *hnsw_params) { + if (!hnsw_params) { + set_last_error("Invalid HNSW parameters"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + return zvec_collection_create_index(collection, field_name, hnsw_params); + } + + ZVecErrorCode zvec_collection_create_flat_index( + ZVecCollection *collection, const char *field_name, + const ZVecIndexParams *flat_params) { + if (!flat_params) { + set_last_error("Invalid Flat parameters"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + return zvec_collection_create_index(collection, field_name, flat_params); + } + + ZVecErrorCode zvec_collection_create_ivf_index( + ZVecCollection *collection, const char *field_name, + const ZVecIndexParams *ivf_params) { + if (!ivf_params) { + set_last_error("Invalid IVF parameters"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + return zvec_collection_create_index(collection, field_name, ivf_params); + } + + ZVecErrorCode zvec_collection_create_invert_index( + ZVecCollection *collection, const char *field_name, + const ZVecIndexParams *invert_params) { + if (!invert_params) { + set_last_error("Invalid Invert parameters"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + return zvec_collection_create_index(collection, field_name, invert_params); + } + + ZVecErrorCode zvec_collection_drop_index(ZVecCollection *collection, + const char *column_name) { + if (!collection || !column_name) { + set_last_error( + "Invalid arguments: collection and column_name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + zvec::Status status = (*coll_ptr)->DropIndex(column_name); + if (!status.ok()) { set_last_error(status.message()); } + + return status_to_error_code(status);) + } + + ZVecErrorCode zvec_collection_optimize(ZVecCollection *collection) { + if (!collection) { + set_last_error("Invalid argument: collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + zvec::Status status = (*coll_ptr)->Optimize(); + if (!status.ok()) { set_last_error(status.message()); } + + return status_to_error_code(status);) + } + + + // ============================================================================= + // Column Interface Implementation + // ============================================================================= + + ZVecErrorCode zvec_collection_add_column(ZVecCollection *collection, + const ZVecFieldSchema *field_schema, + const char *expression) { + if (!collection || !field_schema) { + set_last_error( + "Invalid arguments: collection and field_schema cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + + zvec::DataType data_type = convert_data_type(field_schema->data_type); + if (data_type == zvec::DataType::UNDEFINED) { + set_last_error("Invalid data type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + std::string field_name(field_schema->name->data, + field_schema->name->length); + bool is_vector_field = check_is_vector_field(*field_schema); + zvec::FieldSchema::Ptr schema; + if (is_vector_field) { + schema = std::make_shared(field_name, data_type, + field_schema->dimension, + field_schema->nullable); + } else { + schema = std::make_shared(field_name, data_type, + field_schema->nullable); + } + + std::string expr = expression ? expression : ""; + zvec::Status status = (*coll_ptr)->AddColumn(schema, expr); + + if (!status.ok()) { set_last_error(status.message()); } + + return status_to_error_code(status);) + } + + ZVecErrorCode zvec_collection_drop_column(ZVecCollection *collection, + const char *column_name) { + if (!collection || !column_name) { + set_last_error( + "Invalid arguments: collection and column_name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + zvec::Status status = (*coll_ptr)->DropColumn(column_name); + + if (!status.ok()) { set_last_error(status.message()); } + + return status_to_error_code(status);) + } + + ZVecErrorCode zvec_collection_alter_column( + ZVecCollection *collection, const char *column_name, const char *new_name, + const ZVecFieldSchema *new_schema) { + if (!collection || !column_name) { + set_last_error( + "Invalid arguments: collection and column_name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + std::string rename = new_name ? new_name : ""; + + zvec::FieldSchema::Ptr schema = nullptr; + if (new_schema) { + auto status = + convert_zvec_field_schema_to_internal(*new_schema, schema); + if (!status.ok()) { + set_last_error(status.message()); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + zvec::Status status = + (*coll_ptr)->AlterColumn(column_name, rename, schema); + if (!status.ok()) { set_last_error(status.message()); } + + return status_to_error_code(status);) + } + + // ============================================================================= + // DML Interface Implementation + // ============================================================================= + + ZVecErrorCode zvec_collection_insert(ZVecCollection *collection, + const ZVecDoc **docs, size_t doc_count, + size_t *success_count, + size_t *error_count) { + if (!collection || !docs || doc_count == 0 || !success_count || + !error_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, success_count and " + "error_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception in zvec_collection_insert_docs", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + + auto result = (*coll_ptr)->Insert(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *success_count = 0; + *error_count = 0; + for (const auto &status : result.value()) { + if (status.ok()) { + (*success_count)++; + } else { + (*error_count)++; + } + } + } else { + *success_count = 0; + *error_count = doc_count; + } + + return error_code;) + } + + ZVecErrorCode zvec_collection_insert_with_results(ZVecCollection *collection, + const ZVecDoc **docs, + size_t doc_count, + ZVecWriteResult **results, + size_t *result_count) { + if (!collection || !docs || doc_count == 0 || !results || !result_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, results and " + "result_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *results = nullptr; + *result_count = 0; + + ZVEC_TRY_RETURN_ERROR( + "Exception in zvec_collection_insert_with_results", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + std::vector pks = collect_doc_pks(docs, doc_count); + + auto result = (*coll_ptr)->Insert(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code != ZVEC_OK) { return error_code; } + + return build_write_results(result.value(), pks, results, result_count);) + } + + ZVecErrorCode zvec_collection_update(ZVecCollection *collection, + const ZVecDoc **docs, size_t doc_count, + size_t *success_count, + size_t *error_count) { + if (!collection || !docs || doc_count == 0 || !success_count || + !error_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, success_count and " + "error_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + + auto result = (*coll_ptr)->Update(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *success_count = 0; + *error_count = 0; + for (const auto &status : result.value()) { + if (status.ok()) { + (*success_count)++; + } else { + (*error_count)++; + } + } + } + + return error_code;) + } + + ZVecErrorCode zvec_collection_update_with_results(ZVecCollection *collection, + const ZVecDoc **docs, + size_t doc_count, + ZVecWriteResult **results, + size_t *result_count) { + if (!collection || !docs || doc_count == 0 || !results || !result_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, results and " + "result_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *results = nullptr; + *result_count = 0; + + ZVEC_TRY_RETURN_ERROR( + "Exception in zvec_collection_update_with_results", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + std::vector pks = collect_doc_pks(docs, doc_count); + + auto result = (*coll_ptr)->Update(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code != ZVEC_OK) { return error_code; } + + return build_write_results(result.value(), pks, results, result_count);) + } + + ZVecErrorCode zvec_collection_upsert(ZVecCollection *collection, + const ZVecDoc **docs, size_t doc_count, + size_t *success_count, + size_t *error_count) { + if (!collection || !docs || doc_count == 0 || !success_count || + !error_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, success_count and " + "error_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + + auto result = (*coll_ptr)->Upsert(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *success_count = 0; + *error_count = 0; + for (const auto &status : result.value()) { + if (status.ok()) { + (*success_count)++; + } else { + (*error_count)++; + } + } + } + + return error_code;) + } + + ZVecErrorCode zvec_collection_upsert_with_results(ZVecCollection *collection, + const ZVecDoc **docs, + size_t doc_count, + ZVecWriteResult **results, + size_t *result_count) { + if (!collection || !docs || doc_count == 0 || !results || !result_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, results and " + "result_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *results = nullptr; + *result_count = 0; + + ZVEC_TRY_RETURN_ERROR( + "Exception in zvec_collection_upsert_with_results", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + std::vector pks = collect_doc_pks(docs, doc_count); + + auto result = (*coll_ptr)->Upsert(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code != ZVEC_OK) { return error_code; } + + return build_write_results(result.value(), pks, results, result_count);) + } + + ZVecErrorCode zvec_collection_delete(ZVecCollection *collection, + const char *const *pks, size_t pk_count, + size_t *success_count, + size_t *error_count) { + if (!collection || !pks || pk_count == 0 || !success_count || + !error_count) { + set_last_error( + "Invalid arguments: collection, pks, pk_count, success_count and " + "error_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector primary_keys; primary_keys.reserve(pk_count); + for (size_t i = 0; i < pk_count; ++i) { + if (pks[i]) { + primary_keys.emplace_back(pks[i]); + } + } + + auto result = (*coll_ptr)->Delete(primary_keys); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *success_count = 0; + *error_count = 0; + for (const auto &status : result.value()) { + if (status.ok()) { + (*success_count)++; + } else { + (*error_count)++; + } + } + } + + return error_code;) + } + + ZVecErrorCode zvec_collection_delete_with_results(ZVecCollection *collection, + const char *const *pks, + size_t pk_count, + ZVecWriteResult **results, + size_t *result_count) { + if (!collection || !pks || pk_count == 0 || !results || !result_count) { + set_last_error( + "Invalid arguments: collection, pks, pk_count, results and " + "result_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *results = nullptr; + *result_count = 0; + + ZVEC_TRY_RETURN_ERROR( + "Exception in zvec_collection_delete_with_results", + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector primary_keys; primary_keys.reserve(pk_count); + for (size_t i = 0; i < pk_count; ++i) { + if (pks[i]) { + primary_keys.emplace_back(pks[i]); + } else { + primary_keys.emplace_back(""); + } + } + + auto result = (*coll_ptr)->Delete(primary_keys); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code != ZVEC_OK) { return error_code; } + + return build_write_results(result.value(), primary_keys, results, + result_count);) + } + + ZVecErrorCode zvec_collection_delete_by_filter(ZVecCollection *collection, + const char *filter) { + if (!collection || !filter) { + set_last_error("Invalid arguments: collection,filter cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>(collection); + + auto status = (*coll_ptr)->DeleteByFilter(filter); if (!status.ok()) { + set_last_error(status.message()); + return status_to_error_code(status); + } return ZVEC_OK;) + } + + // ============================================================================= + // Data query interface implementation + // ============================================================================= + + // Helper function to convert common query parameters + void convert_common_query_params(zvec::VectorQuery &internal_query, + const ZVecVectorQuery *query) { + internal_query.topk_ = query->topk; + internal_query.field_name_ = + std::string(query->field_name.data, query->field_name.length); + internal_query.filter_ = + std::string(query->filter.data, query->filter.length); + internal_query.include_vector_ = query->include_vector; + internal_query.include_doc_id_ = query->include_doc_id; + + // Binary data conversion (query_vector) + if (query->query_vector.data && query->query_vector.length > 0) { + internal_query.query_vector_.assign( + reinterpret_cast(query->query_vector.data), + query->query_vector.length); + } + + // Sparse vector data conversion + if (query->query_sparse_indices.data && + query->query_sparse_indices.length > 0) { + internal_query.query_sparse_indices_.assign( + reinterpret_cast(query->query_sparse_indices.data), + query->query_sparse_indices.length); + } + + if (query->query_sparse_values.data && + query->query_sparse_values.length > 0) { + internal_query.query_sparse_values_.assign( + reinterpret_cast(query->query_sparse_values.data), + query->query_sparse_values.length); + } + + // Output fields conversion + if (query->output_fields.count > 0) { + internal_query.output_fields_ = std::vector(); + for (size_t i = 0; i < query->output_fields.count; ++i) { + internal_query.output_fields_->emplace_back( + query->output_fields.strings[i].data, + query->output_fields.strings[i].length); + } + } + } + + // Helper function to convert query parameters + void convert_query_params(zvec::VectorQuery &internal_query, + const ZVecVectorQuery *query) { + convert_common_query_params(internal_query, query); + + // QueryParams conversion + if (query->query_params) { + auto query_params = std::make_shared( + static_cast(query->query_params->index_type)); + + switch (query->query_params->index_type) { + case ZVEC_INDEX_TYPE_HNSW: { + auto hnsw_params = std::make_shared( + query->query_params->params.hnsw_params.ef, + query->query_params->params.hnsw_params.base.radius, + query->query_params->params.hnsw_params.base.is_linear, + query->query_params->params.hnsw_params.base.is_using_refiner); + internal_query.query_params_ = hnsw_params; + break; + } + case ZVEC_INDEX_TYPE_IVF: { + auto ivf_params = std::make_shared( + query->query_params->params.ivf_params.nprobe, + query->query_params->params.ivf_params.base.is_using_refiner, + query->query_params->params.ivf_params.scale_factor); + internal_query.query_params_ = ivf_params; + break; + } + case ZVEC_INDEX_TYPE_FLAT: { + auto flat_params = std::make_shared( + query->query_params->params.flat_params.base.is_using_refiner, + query->query_params->params.flat_params.scale_factor); + internal_query.query_params_ = flat_params; + break; + } + default: { + query_params->set_radius( + query->query_params->params.base_params.radius); + query_params->set_is_linear( + query->query_params->params.base_params.is_linear); + query_params->set_is_using_refiner( + query->query_params->params.base_params.is_using_refiner); + internal_query.query_params_ = query_params; + break; + } + } + } + } + + // Helper function to convert group by query parameters + void convert_groupby_query_params(zvec::GroupByVectorQuery &internal_query, + const ZVecGroupByVectorQuery *query) { + internal_query.field_name_ = + std::string(query->field_name.data, query->field_name.length); + internal_query.filter_ = + std::string(query->filter.data, query->filter.length); + internal_query.include_vector_ = query->include_vector; + internal_query.group_by_field_name_ = std::string( + query->group_by_field_name.data, query->group_by_field_name.length); + internal_query.group_count_ = query->group_count; + internal_query.group_topk_ = query->group_topk; + + if (query->query_vector.data && query->query_vector.length > 0) { + internal_query.query_vector_.assign( + reinterpret_cast(query->query_vector.data), + query->query_vector.length); + } + + if (query->query_sparse_indices.data && + query->query_sparse_indices.length > 0) { + internal_query.query_sparse_indices_.assign( + reinterpret_cast(query->query_sparse_indices.data), + query->query_sparse_indices.length); + } + + if (query->query_sparse_values.data && + query->query_sparse_values.length > 0) { + internal_query.query_sparse_values_.assign( + reinterpret_cast(query->query_sparse_values.data), + query->query_sparse_values.length); + } + + if (query->output_fields.count > 0) { + if (!internal_query.output_fields_.has_value()) { + internal_query.output_fields_ = std::vector(); + } + for (size_t i = 0; i < query->output_fields.count; ++i) { + internal_query.output_fields_->push_back( + std::string(query->output_fields.strings[i].data, + query->output_fields.strings[i].length)); + } + } + + if (query->query_params) { + auto query_params = std::make_shared( + static_cast(query->query_params->index_type)); + + switch (query->query_params->index_type) { + case ZVEC_INDEX_TYPE_HNSW: { + auto hnsw_params = std::make_shared( + query->query_params->params.hnsw_params.ef, + query->query_params->params.hnsw_params.base.radius, + query->query_params->params.hnsw_params.base.is_linear, + query->query_params->params.hnsw_params.base.is_using_refiner); + internal_query.query_params_ = hnsw_params; + break; + } + case ZVEC_INDEX_TYPE_IVF: { + auto ivf_params = std::make_shared( + query->query_params->params.ivf_params.nprobe, + query->query_params->params.ivf_params.base.is_using_refiner, + query->query_params->params.ivf_params.scale_factor); + internal_query.query_params_ = ivf_params; + break; + } + case ZVEC_INDEX_TYPE_FLAT: { + auto flat_params = std::make_shared( + query->query_params->params.flat_params.base.is_using_refiner, + query->query_params->params.flat_params.scale_factor); + internal_query.query_params_ = flat_params; + break; + } + default: { + query_params->set_radius( + query->query_params->params.base_params.radius); + query_params->set_is_linear( + query->query_params->params.base_params.is_linear); + query_params->set_is_using_refiner( + query->query_params->params.base_params.is_using_refiner); + internal_query.query_params_ = query_params; + break; + } + } + } + } + + // Helper function to convert document results to C API format + ZVecErrorCode convert_document_results( + const std::vector> &query_results, + ZVecDoc ***results, size_t *result_count) { + *result_count = query_results.size(); + *results = + static_cast(malloc(*result_count * sizeof(ZVecDoc *))); + + if (!*results) { + set_last_error("Failed to allocate memory for query results"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + for (size_t i = 0; i < *result_count; ++i) { + const auto &internal_doc = query_results[i]; + // Create new document wrapper + ZVecDoc *c_doc = zvec_doc_create(); + if (!c_doc) { + // Clean up previously allocated documents + for (size_t j = 0; j < i; ++j) { + zvec_doc_destroy((*results)[j]); + } + free(*results); + *results = nullptr; + *result_count = 0; + set_last_error("Failed to create document wrapper"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Copy the C++ document to our wrapper + auto doc_ptr = + reinterpret_cast *>(c_doc); + *(*doc_ptr) = *internal_doc; // Copy assignment + (*results)[i] = c_doc; // Store the pointer, not dereference + } + + return ZVEC_OK; + } + + // Helper function to convert grouped document results to C API format + ZVecErrorCode convert_grouped_document_results( + const std::vector &group_results, ZVecDoc ***results, + ZVecString ***group_by_values, size_t *result_count) { + // Calculate total document count across all groups + size_t total_docs = 0; + for (const auto &group_result : group_results) { + total_docs += group_result.docs_.size(); + } + + // Allocate memory for document pointers and group by values + *result_count = total_docs; + *results = + static_cast(malloc(*result_count * sizeof(ZVecDoc *))); + *group_by_values = static_cast( + malloc(group_results.size() * sizeof(ZVecString *))); + + if (!*results) { + set_last_error("Failed to allocate memory for query results"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Convert C++ grouped results to C API format + size_t doc_index = 0; + for (const auto &group_result : group_results) { + for (const auto &internal_doc : group_result.docs_) { + if (doc_index >= *result_count) { + break; + } + + // Create new document wrapper + ZVecDoc *c_doc = zvec_doc_create(); + if (!c_doc) { + // Clean up previously allocated documents + for (size_t j = 0; j < doc_index; ++j) { + zvec_doc_destroy((*results)[j]); + } + free(*results); + *results = nullptr; + *result_count = 0; + set_last_error("Failed to create document wrapper"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Copy the C++ document to our wrapper + auto doc_ptr = + reinterpret_cast *>(c_doc); + *(*doc_ptr) = internal_doc; // Copy assignment + + ZVecString *c_group_value = + zvec_string_create(group_result.group_by_value_.c_str()); + if (!c_group_value) { + for (size_t j = 0; j < doc_index; ++j) { + zvec_doc_destroy((*results)[j]); + zvec_free_string((*group_by_values)[doc_index]); + } + free(*results); + *results = nullptr; + *result_count = 0; + set_last_error("Failed to create string wrapper"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + (*group_by_values)[doc_index] = c_group_value; + (*results)[doc_index] = c_doc; + ++doc_index; + } + } + + return ZVEC_OK; + } + + // Helper function to convert fetched document results to C API format + static void normalize_nullable_fields_for_fetch( + const zvec::CollectionSchema &schema, zvec::DocPtrMap &doc_map) { + std::vector nullable_fields; + nullable_fields.reserve(schema.fields().size()); + + for (const auto &field : schema.fields()) { + if (field && field->nullable()) { + nullable_fields.push_back(field->name()); + } + } + + if (nullable_fields.empty()) { + return; + } + + for (auto &[_, doc_ptr] : doc_map) { + if (!doc_ptr) { + continue; + } + + for (const auto &field_name : nullable_fields) { + if (!doc_ptr->has(field_name)) { + doc_ptr->set_null(field_name); + } + } + } + } + + ZVecErrorCode convert_fetched_document_results(const zvec::DocPtrMap &doc_map, + ZVecDoc ***results, + size_t *doc_count) { + // Calculate actual document count (some PKs might not exist) + size_t actual_count = 0; + for (const auto &[pk, doc_ptr] : doc_map) { + if (doc_ptr) { + actual_count++; + } + } + + // Allocate memory for document pointers + *doc_count = actual_count; + if (*doc_count == 0) { + *results = nullptr; + return ZVEC_OK; + } + + *results = static_cast(malloc(*doc_count * sizeof(ZVecDoc *))); + if (!*results) { + set_last_error("Failed to allocate memory for document pointers"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Convert C++ DocPtrMap to C ZVecDoc pointer array + size_t index = 0; + for (const auto &[pk, doc_ptr] : doc_map) { + if (doc_ptr && index < *doc_count) { + // Create new document wrapper + ZVecDoc *c_doc = zvec_doc_create(); + if (!c_doc) { + // Clean up previously allocated documents + for (size_t j = 0; j < index; ++j) { + zvec_doc_destroy((*results)[j]); + } + free(*results); + *results = nullptr; + *doc_count = 0; + set_last_error("Failed to create document wrapper"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Copy the C++ document to our wrapper + auto cpp_doc_ptr = + reinterpret_cast *>(c_doc); + *(*cpp_doc_ptr) = *doc_ptr; // Copy assignment + + // Set the primary key explicitly + zvec_doc_set_pk(c_doc, pk.c_str()); + + (*results)[index] = c_doc; + ++index; + } + } + + return ZVEC_OK; + } + + ZVecErrorCode zvec_collection_query(const ZVecCollection *collection, + const ZVecVectorQuery *query, + ZVecDoc ***results, + size_t *result_count) { + if (!collection || !query || !results || !result_count) { + set_last_error( + "Invalid arguments: collection, query, results and result_count " + "cannot " + "be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>( + collection); + + // Convert query parameters using helper function + zvec::VectorQuery internal_query; + convert_query_params(internal_query, query); + + auto result = (*coll_ptr)->Query(internal_query); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + const auto &query_results = result.value(); + error_code = + convert_document_results(query_results, results, result_count); + } else { + *results = nullptr; + *result_count = 0; + } + + return error_code;) + } + + ZVecErrorCode zvec_collection_query_by_group( + const ZVecCollection *collection, const ZVecGroupByVectorQuery *query, + ZVecDoc ***results, ZVecString ***group_by_values, size_t *result_count) { + if (!collection || !query || !results || !group_by_values || + !result_count) { + set_last_error( + "Invalid arguments: collection, query, results, group_by_values and " + "result_count cannot " + "be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception occurred", + auto coll_ptr = + reinterpret_cast *>( + collection); + + zvec::GroupByVectorQuery internal_query; + convert_groupby_query_params(internal_query, query); + + auto result = (*coll_ptr)->GroupByQuery(internal_query); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + const auto &group_results = result.value(); + error_code = convert_grouped_document_results( + group_results, results, group_by_values, result_count); + } else { + *results = nullptr; + *group_by_values = nullptr; + *result_count = 0; + } + + return error_code;) + } + + ZVecErrorCode zvec_collection_fetch(ZVecCollection *collection, + const char *const *pks, size_t pk_count, + ZVecDoc ***results, size_t *doc_count) { + if (!collection || !pks || !results || !doc_count) { + set_last_error( + "Invalid arguments: collection, pks, results and doc_count cannot " + "be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + // Handle empty case + if (pk_count == 0) { + *results = nullptr; + *doc_count = 0; + return ZVEC_OK; + } + + ZVEC_TRY_RETURN_ERROR( + "Exception in zvec_collection_fetch", + auto coll_ptr = + reinterpret_cast *>( + collection); + + // Convert C array to C++ vector + std::vector pk_vector; pk_vector.reserve(pk_count); + for (size_t i = 0; i < pk_count; ++i) { + if (pks[i]) { + pk_vector.emplace_back(pks[i]); + } else { + set_last_error("Null primary key at index " + std::to_string(i)); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + // Call C++ fetch method + auto result = (*coll_ptr)->Fetch(pk_vector); + if (!result.has_value()) { + set_last_error("Failed to fetch documents: " + + result.error().message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + auto doc_map = result.value(); + auto schema_result = (*coll_ptr)->Schema(); + if (schema_result.has_value()) { + normalize_nullable_fields_for_fetch(schema_result.value(), doc_map); + } return convert_fetched_document_results(doc_map, results, doc_count);) + } diff --git a/src/db/CMakeLists.txt b/src/db/CMakeLists.txt index 765a1b4a..0384659b 100644 --- a/src/db/CMakeLists.txt +++ b/src/db/CMakeLists.txt @@ -14,11 +14,10 @@ cc_directory(sqlengine) file(GLOB_RECURSE ALL_DB_SRCS *.cc *.c *.h) cc_library( - NAME zvec_db STATIC STRICT SRCS_NO_GLOB + NAME zvec_db STATIC STRICT SRCS_NO_GLOB PACKED SRCS ${ALL_DB_SRCS} ${CMAKE_CURRENT_BINARY_DIR}/proto/zvec.pb.cc INCS . ${CMAKE_CURRENT_BINARY_DIR} - PUBINCS ${PROJECT_ROOT_DIR}/src/include - LIBS + LIBS zvec_ailego zvec_core glog diff --git a/src/include/zvec/c_api.h b/src/include/zvec/c_api.h new file mode 100644 index 00000000..e6496734 --- /dev/null +++ b/src/include/zvec/c_api.h @@ -0,0 +1,2274 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ZVEC_C_API_H +#define ZVEC_C_API_H + +#include +#include +#include +#include + +// ============================================================================= +// API Export Control +// ============================================================================= + +#if defined(_WIN32) || defined(__CYGWIN__) +#ifdef ZVEC_BUILD_SHARED +#define ZVEC_EXPORT __declspec(dllexport) +#elif defined(ZVEC_USE_SHARED) +#define ZVEC_EXPORT __declspec(dllimport) +#else +#define ZVEC_EXPORT +#endif +#define ZVEC_CALL __cdecl +#else +#if __GNUC__ >= 4 +#define ZVEC_EXPORT __attribute__((visibility("default"))) +#else +#define ZVEC_EXPORT +#endif +#define ZVEC_CALL +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + +// ============================================================================= +// Version Information +// ============================================================================= + +/** @brief Major version number */ +#define ZVEC_VERSION_MAJOR 0 + +/** @brief Minor version number */ +#define ZVEC_VERSION_MINOR 3 + +/** @brief Patch version number */ +#define ZVEC_VERSION_PATCH 0 + +/** @brief Full version string */ +#define ZVEC_VERSION_STRING "0.3.0" + +/** + * @brief Get library version information + * + * Return format: "{base_version}[-{git_info}] (built {build_time})" + * Example: "0.3.0-g3f8a2b1 (built 2025-05-13 10:30:45)" + * + * @return const char* Version string, managed internally by the library, caller + * should not free + */ +ZVEC_EXPORT const char *ZVEC_CALL zvec_get_version(void); + +/** + * @brief Check API version compatibility + * + * Check if the current library version meets the specified minimum version + * requirements Following semantic versioning specification: MAJOR.MINOR.PATCH + * + * @param major Required major version number + * @param minor Required minor version number + * @param patch Required patch version number + * @return bool Returns true if compatible, false otherwise + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_check_version(int major, int minor, int patch); + +/** + * @brief Get major version number + * + * @return int Major version number + */ +ZVEC_EXPORT int ZVEC_CALL zvec_get_version_major(void); + +/** + * @brief Get minor version number + * + * @return int Minor version number + */ +ZVEC_EXPORT int ZVEC_CALL zvec_get_version_minor(void); + + +/** + * @brief Get patch version number + * + * @return int Patch version number + */ +ZVEC_EXPORT int ZVEC_CALL zvec_get_version_patch(void); + + +// ============================================================================= +// Error Code Definitions +// ============================================================================= + +/** + * @brief ZVec C API error code enumeration + */ +typedef enum { + ZVEC_OK = 0, /**< Success */ + ZVEC_ERROR_NOT_FOUND = 1, /**< Resource not found */ + ZVEC_ERROR_ALREADY_EXISTS = 2, /**< Resource already exists */ + ZVEC_ERROR_INVALID_ARGUMENT = 3, /**< Invalid argument */ + ZVEC_ERROR_PERMISSION_DENIED = 4, /**< Permission denied */ + ZVEC_ERROR_FAILED_PRECONDITION = 5, /**< Failed precondition */ + ZVEC_ERROR_RESOURCE_EXHAUSTED = 6, /**< Resource exhausted */ + ZVEC_ERROR_UNAVAILABLE = 7, /**< Unavailable */ + ZVEC_ERROR_INTERNAL_ERROR = 8, /**< Internal error */ + ZVEC_ERROR_NOT_SUPPORTED = 9, /**< Unsupported operation */ + ZVEC_ERROR_UNKNOWN = 10 /**< Unknown error */ +} ZVecErrorCode; + +/** + * @brief Error details structure + */ +typedef struct { + ZVecErrorCode code; /**< Error code */ + const char *message; /**< Error message */ + const char *file; /**< File where error occurred */ + int line; /**< Line number where error occurred */ + const char *function; /**< Function where error occurred */ +} ZVecErrorDetails; + +/** + * @brief Get detailed information of the last error + * @param[out] error_details Pointer to error details structure + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_get_last_error_details(ZVecErrorDetails *error_details); + +/** + * @brief Get last error message + * @param[out] error_msg Returned error message string (needs to be freed by + * calling free) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_get_last_error(char **error_msg); + +/** + * @brief Clear error status + */ +ZVEC_EXPORT void ZVEC_CALL zvec_clear_error(void); + + +// ============================================================================= +// Basic Data Structures +// ============================================================================= + +/** + * @brief String view structure (does not own memory) + */ +typedef struct { + const char *data; /**< String data pointer */ + size_t length; /**< String length */ +} ZVecStringView; + +/** + * @brief Mutable string structure (owns memory) + */ +typedef struct { + char *data; /**< String data pointer */ + size_t length; /**< String length */ + size_t capacity; /**< Allocated capacity */ +} ZVecString; + +/** + * @brief String array structure + */ +typedef struct { + ZVecString *strings; /**< String array */ + size_t count; /**< String count */ +} ZVecStringArray; + +/** + * @brief Float array structure + */ +typedef struct { + const float *data; + size_t length; +} ZVecFloatArray; + +/** + * @brief Integer array structure + */ +typedef struct { + const int64_t *data; + size_t length; +} ZVecInt64Array; + +/** + * @brief Byte array structure + */ +typedef struct { + const uint8_t *data; /**< Byte data pointer */ + size_t length; /**< Array length */ +} ZVecByteArray; + +/** + * @brief Mutable byte array structure + */ +typedef struct { + uint8_t *data; /**< Byte data pointer */ + size_t length; /**< Current length */ + size_t capacity; /**< Allocated capacity */ +} ZVecMutableByteArray; + +// ============================================================================= +// String management functions +// ============================================================================= + +/** + * @brief Create string from C string + * @param str C string + * @return ZVecString* Pointer to the newly created string + */ +ZVEC_EXPORT ZVecString *ZVEC_CALL zvec_string_create(const char *str); + +/** + * @brief Create string from string view + * + * Creates a new ZVecString by copying data from a ZVecStringView. + * The created string owns its memory and must be freed with zvec_free_string(). + * + * @param view Pointer to source string view (must not be NULL) + * @return ZVecString* New string instance on success, NULL on error + * @note Caller is responsible for freeing the returned string + */ +ZVEC_EXPORT ZVecString *ZVEC_CALL +zvec_string_create_from_view(const ZVecStringView *view); + +/** + * @brief Create binary-safe string from raw data + * + * Creates a new ZVecString from raw binary data that may contain null bytes. + * Unlike zvec_string_create(), this function takes explicit length parameter + * and doesn't rely on null-termination. + * The created string owns its memory and must be freed with zvec_free_string(). + * + * @param data Raw binary data pointer (must not be NULL) + * @param length Length of data in bytes + * @return ZVecString* New string instance on success, NULL on error + * @note Caller is responsible for freeing the returned string + * @note This function is suitable for binary data containing null bytes + */ +ZVEC_EXPORT ZVecString *ZVEC_CALL zvec_bin_create(const uint8_t *data, + size_t length); + +/** + * @brief Copy string + * + * Creates a new ZVecString by copying an existing string. + * The created string owns its memory and must be freed with zvec_free_string(). + * + * @param str Pointer to source string (must not be NULL) + * @return ZVecString* New string instance on success, NULL on error + * @note Caller is responsible for freeing the returned string + */ +ZVEC_EXPORT ZVecString *ZVEC_CALL zvec_string_copy(const ZVecString *str); + +/** + * @brief Get C string from ZVecString + * @param str ZVecString pointer + * @return const char* C string + */ +ZVEC_EXPORT const char *ZVEC_CALL zvec_string_c_str(const ZVecString *str); + +/** + * @brief Get string length + * @param str ZVecString pointer + * @return size_t String length + */ +ZVEC_EXPORT size_t ZVEC_CALL zvec_string_length(const ZVecString *str); + +/** + * @brief Compare two strings + * @param str1 First string + * @param str2 Second string + * @return int Comparison result (-1, 0, or 1) + */ +ZVEC_EXPORT int ZVEC_CALL zvec_string_compare(const ZVecString *str1, + const ZVecString *str2); + +/** + * @brief Free string memory + * @param str String pointer to free + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_string(ZVecString *str); + + +// ============================================================================= +// Array Memory management functions +// ============================================================================= + +/** + * @brief Create a new string array + * @param count Initial number of strings to allocate space for + * @return Pointer to the newly created string array, or NULL on failure + */ +ZVEC_EXPORT ZVecStringArray *ZVEC_CALL zvec_string_array_create(size_t count); + +/** + * @brief Add a string to the string array at specified index + * @param array String array pointer + * @param idx Index position where the string should be added + * @param str Null-terminated C string to add + */ +ZVEC_EXPORT void ZVEC_CALL zvec_string_array_add(ZVecStringArray *array, + size_t idx, const char *str); + +/** + * @brief Destroy string array and free all associated memory + * @param array String array pointer to destroy + */ +ZVEC_EXPORT void ZVEC_CALL zvec_string_array_destroy(ZVecStringArray *array); + +/** + * @brief Create a new mutable byte array + * @param capacity Initial capacity in bytes + * @return Pointer to the newly created byte array, or NULL on failure + */ +ZVEC_EXPORT ZVecMutableByteArray *ZVEC_CALL +zvec_byte_array_create(size_t capacity); + + +/** + * @brief Destroy byte array and free all associated memory + * @param array Byte array pointer to destroy + */ +ZVEC_EXPORT void ZVEC_CALL zvec_byte_array_destroy(ZVecMutableByteArray *array); + +/** + * @brief Create a new float array + * @param count Number of floats to allocate space for + * @return Pointer to the newly created float array, or NULL on failure + */ +ZVEC_EXPORT ZVecFloatArray *ZVEC_CALL zvec_float_array_create(size_t count); + +/** + * @brief Destroy float array and free all associated memory + * @param array Float array pointer to destroy + */ +ZVEC_EXPORT void ZVEC_CALL zvec_float_array_destroy(ZVecFloatArray *array); + +/** + * @brief Create a new int64 array + * @param count Number of int64 values to allocate space for + * @return Pointer to the newly created int64 array, or NULL on failure + */ +ZVEC_EXPORT ZVecInt64Array *ZVEC_CALL zvec_int64_array_create(size_t count); + +/** + * @brief Destroy int64 array and free all associated memory + * @param array Int64 array pointer to destroy + */ +ZVEC_EXPORT void ZVEC_CALL zvec_int64_array_destroy(ZVecInt64Array *array); + +/** + * @brief Release uint8_t array memory + * + * @param array uint8_t array pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_uint8_array(uint8_t *array); + +/** + * @brief Free heap memory allocated by zvec C API. + * + * Use this helper for pointer-returning APIs that document malloc-allocated + * buffers. This avoids allocator mismatch across DLL boundaries. + * + * @param ptr Memory pointer returned by zvec C API + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_ptr(void *ptr); + + +// ============================================================================= +// Configuration and Options Structures +// ============================================================================= + +/** + * @brief Log level enumeration + */ +typedef enum { + ZVEC_LOG_LEVEL_DEBUG = 0, + ZVEC_LOG_LEVEL_INFO = 1, + ZVEC_LOG_LEVEL_WARN = 2, + ZVEC_LOG_LEVEL_ERROR = 3, + ZVEC_LOG_LEVEL_FATAL = 4 +} ZVecLogLevel; + +/** + * @brief Log type enumeration + */ +typedef enum { ZVEC_LOG_TYPE_CONSOLE = 0, ZVEC_LOG_TYPE_FILE = 1 } ZVecLogType; + +/** + * @brief Console log configuration structure + */ +typedef struct { + ZVecLogLevel level; /**< Log level */ +} ZVecConsoleLogConfig; + +/** + * @brief File log configuration structure + */ +typedef struct { + ZVecLogLevel level; /**< Log level */ + ZVecString dir; /**< Log directory */ + ZVecString basename; /**< Log file base name */ + uint32_t file_size; /**< Log file size (MB) */ + uint32_t overdue_days; /**< Log expiration days */ +} ZVecFileLogConfig; + +/** + * @brief ZVec configuration data structure (corresponds to zvec::ConfigData) + */ +typedef struct { + uint64_t memory_limit_bytes; /**< Memory limit in bytes */ + + // log + ZVecLogType log_type; + void *log_config; /**< Log configuration (ZVecConsoleLogConfig or + ZVecFileLogConfig) */ + + // query + uint32_t query_thread_count; /**< Query thread count */ + float invert_to_forward_scan_ratio; /**< Inverted to forward scan ratio */ + float brute_force_by_keys_ratio; /**< Brute force by keys ratio */ + + // optimize + uint32_t optimize_thread_count; /**< Optimize thread count */ +} ZVecConfigData; + +/** + * @brief Create console log configuration + * @param level Log level + * @return ZVecConsoleLogConfig* Pointer to the newly created console log + * configuration + */ +ZVEC_EXPORT ZVecConsoleLogConfig *ZVEC_CALL +zvec_config_console_log_create(ZVecLogLevel level); + +/** + * @brief Create file log configuration + * @param level Log level + * @param dir Log directory + * @param basename Log file base name + * @param file_size Log file size (MB) + * @param overdue_days Log expiration days + * @return ZVecFileLogConfig* Pointer to the newly created file log + * configuration + */ +ZVEC_EXPORT ZVecFileLogConfig *ZVEC_CALL zvec_config_file_log_create( + ZVecLogLevel level, const char *dir, const char *basename, + uint32_t file_size, uint32_t overdue_days); + +/** + * @brief Destroy console log configuration + * @param config Console log configuration pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_config_console_log_destroy(ZVecConsoleLogConfig *config); + +/** + * @brief Destroy file log configuration + * @param config File log configuration pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_config_file_log_destroy(ZVecFileLogConfig *config); + + +/** + * @brief Create configuration data + * @return ZVecConfigData* Pointer to the newly created configuration data + */ +ZVEC_EXPORT ZVecConfigData *ZVEC_CALL zvec_config_data_create(void); + +/** + * @brief Destroy configuration data + * @param config Configuration data pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_config_data_destroy(ZVecConfigData *config); + +/** + * @brief Set memory limit in configuration data + * @param config Configuration data pointer + * @param memory_limit_bytes Memory limit in bytes + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_memory_limit( + ZVecConfigData *config, uint64_t memory_limit_bytes); + +/** + * @brief Set log configuration in configuration data + * @param config Configuration data pointer + * @param log_config Log configuration pointer (ownership is transferred to + * config, do not free separately) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_log_config( + ZVecConfigData *config, ZVecLogType log_type, void *log_config); + +/** + * @brief Set query thread count in configuration data + * @param config Configuration data pointer + * @param thread_count Query thread count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_query_thread_count( + ZVecConfigData *config, uint32_t thread_count); + +/** + * @brief Set optimize thread count in configuration data + * @param config Configuration data pointer + * @param thread_count Optimize thread count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_optimize_thread_count( + ZVecConfigData *config, uint32_t thread_count); + +// ============================================================================= +// Initialization and Cleanup Interface +// ============================================================================= + +/** + * @brief Initialize ZVec library + * @param config Configuration data (optional, NULL means using default + * configuration) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_initialize(const ZVecConfigData *config); + +/** + * @brief Clean up ZVec library resources + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_shutdown(void); + +/** + * @brief Check if library is initialized + * @param[out] initialized Whether initialized + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_is_initialized(bool *initialized); + +// ============================================================================= +// Data Type Enumerations +// ============================================================================= + +/** + * @brief Data type enumeration + */ +typedef enum { + ZVEC_DATA_TYPE_UNDEFINED = 0, + + ZVEC_DATA_TYPE_BINARY = 1, + ZVEC_DATA_TYPE_STRING = 2, + ZVEC_DATA_TYPE_BOOL = 3, + ZVEC_DATA_TYPE_INT32 = 4, + ZVEC_DATA_TYPE_INT64 = 5, + ZVEC_DATA_TYPE_UINT32 = 6, + ZVEC_DATA_TYPE_UINT64 = 7, + ZVEC_DATA_TYPE_FLOAT = 8, + ZVEC_DATA_TYPE_DOUBLE = 9, + + ZVEC_DATA_TYPE_VECTOR_BINARY32 = 20, + ZVEC_DATA_TYPE_VECTOR_BINARY64 = 21, + ZVEC_DATA_TYPE_VECTOR_FP16 = 22, + ZVEC_DATA_TYPE_VECTOR_FP32 = 23, + ZVEC_DATA_TYPE_VECTOR_FP64 = 24, + ZVEC_DATA_TYPE_VECTOR_INT4 = 25, + ZVEC_DATA_TYPE_VECTOR_INT8 = 26, + ZVEC_DATA_TYPE_VECTOR_INT16 = 27, + + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16 = 30, + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 = 31, + + ZVEC_DATA_TYPE_ARRAY_BINARY = 40, + ZVEC_DATA_TYPE_ARRAY_STRING = 41, + ZVEC_DATA_TYPE_ARRAY_BOOL = 42, + ZVEC_DATA_TYPE_ARRAY_INT32 = 43, + ZVEC_DATA_TYPE_ARRAY_INT64 = 44, + ZVEC_DATA_TYPE_ARRAY_UINT32 = 45, + ZVEC_DATA_TYPE_ARRAY_UINT64 = 46, + ZVEC_DATA_TYPE_ARRAY_FLOAT = 47, + ZVEC_DATA_TYPE_ARRAY_DOUBLE = 48 +} ZVecDataType; + +/** + * @brief Index type enumeration + */ +typedef enum { + ZVEC_INDEX_TYPE_UNDEFINED = 0, + ZVEC_INDEX_TYPE_HNSW = 1, + ZVEC_INDEX_TYPE_IVF = 3, + ZVEC_INDEX_TYPE_FLAT = 4, + ZVEC_INDEX_TYPE_INVERT = 10 +} ZVecIndexType; + +/** + * @brief Distance metric type enumeration + */ +typedef enum { + ZVEC_METRIC_TYPE_UNDEFINED = 0, + ZVEC_METRIC_TYPE_L2 = 1, + ZVEC_METRIC_TYPE_IP = 2, + ZVEC_METRIC_TYPE_COSINE = 3, + ZVEC_METRIC_TYPE_MIPSL2 = 4 +} ZVecMetricType; + +/** + * @brief Quantization type enumeration + */ +typedef enum { + ZVEC_QUANTIZE_TYPE_UNDEFINED = 0, + ZVEC_QUANTIZE_TYPE_FP16 = 1, + ZVEC_QUANTIZE_TYPE_INT8 = 2, + ZVEC_QUANTIZE_TYPE_INT4 = 3 +} ZVecQuantizeType; + +// ============================================================================= +// Forward Declarations +// ============================================================================= + +typedef struct ZVecCollection ZVecCollection; + +// ============================================================================= +// Index Parameters Structures +// ============================================================================= + +/** + * @brief Flattened index parameters structure + * + * Uses a union to store specific parameters for different index types, + * avoiding C++-style inheritance nesting. Supports stack allocation, + * reducing malloc/free overhead. + */ +typedef struct { + ZVecIndexType index_type; /**< Index type */ + ZVecMetricType metric_type; /**< Distance metric type (for vector indexes) */ + ZVecQuantizeType quantize_type; /**< Quantization type (for vector indexes) */ + + union { + /** @brief Inverted index specific parameters */ + struct { + bool enable_range_optimization; /**< Whether to enable range optimization + */ + bool enable_extended_wildcard; /**< Whether to enable extended wildcard */ + } invert; + + /** @brief HNSW index specific parameters */ + struct { + int m; /**< Graph connectivity parameter */ + int ef_construction; /**< Exploration factor during construction */ + int ef_search; /**< Exploration factor during search */ + } hnsw; + + /** @brief IVF index specific parameters */ + struct { + int n_list; /**< Number of cluster centers */ + int n_iters; /**< Number of iterations */ + bool use_soar; /**< Whether to use SOAR algorithm */ + int n_probe; /**< Number of clusters to probe during search */ + } ivf; + + /** @brief Flat index has no additional parameters, + * reserved for alignment */ + struct { + int _reserved; + } flat; + }; +} ZVecIndexParams; + +// ============================================================================= +// Field Schema Structures +// ============================================================================= + +/** + * @brief Field schema structure + */ +typedef struct { + ZVecString *name; /**< Field name */ + ZVecDataType data_type; /**< Data type */ + bool nullable; /**< Whether nullable */ + uint32_t dimension; /**< Vector dimension (only used for vector fields) */ + ZVecIndexParams index_params; /**< Index parameters (embedded, not pointer) */ + bool has_index; /**< Whether this field has an index */ +} ZVecFieldSchema; + + +// ============================================================================= +// Index Parameters Interface +// ============================================================================= + +/** + * @brief Initialize index parameters with default values based on index type + * @param params Index parameters structure pointer + * @param index_type Index type + * @param metric_type Metric type (for vector indexes) + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_init(ZVecIndexParams *params, + ZVecIndexType index_type, + ZVecMetricType metric_type); + +/** + * @brief Set HNSW specific parameters + * @param params Index parameters structure pointer (must be HNSW type) + * @param m Graph connectivity parameter + * @param ef_construction Construction exploration factor + * @param ef_search Search exploration factor + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_set_hnsw(ZVecIndexParams *params, + int m, + int ef_construction, + int ef_search); + +/** + * @brief Set IVF specific parameters + * @param params Index parameters structure pointer (must be IVF type) + * @param n_list Number of cluster centers + * @param n_iters Number of iterations + * @param use_soar Whether to use SOAR algorithm + * @param n_probe Search probe count + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_set_ivf(ZVecIndexParams *params, + int n_list, int n_iters, + bool use_soar, + int n_probe); + +/** + * @brief Set invert index specific parameters + * @param params Index parameters structure pointer (must be INVERT type) + * @param enable_range_opt Whether to enable range optimization + * @param enable_wildcard Whether to enable extended wildcard + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_set_invert(ZVecIndexParams *params, + bool enable_range_opt, + bool enable_wildcard); + +// ============================================================================= +// Query Parameters Structures +// ============================================================================= + +/** + * @brief Base query parameters structure (corresponds to zvec::QueryParams) + */ +typedef struct { + ZVecIndexType index_type; /**< Index type */ + float radius; /**< Search radius */ + bool is_linear; /**< Whether linear search */ + bool is_using_refiner; /**< Whether using refiner */ +} ZVecQueryParams; + +/** + * @brief HNSW query parameters structure (corresponds to zvec::HnswQueryParams) + */ +typedef struct { + ZVecQueryParams base; /**< Inherit base query parameters */ + int ef; /**< Exploration factor during search */ +} ZVecHnswQueryParams; + +/** + * @brief IVF query parameters structure (corresponds to zvec::IVFQueryParams) + */ +typedef struct { + ZVecQueryParams base; /**< Inherit base query parameters */ + int nprobe; /**< Number of clusters to probe during search */ + float scale_factor; /**< Scale factor */ +} ZVecIVFQueryParams; + +/** + * @brief Flat query parameters structure (corresponds to zvec::FlatQueryParams) + */ +typedef struct { + ZVecQueryParams base; /**< Inherit base query parameters */ + float scale_factor; /**< Scale factor */ +} ZVecFlatQueryParams; + +/** + * @brief Query parameters union (supports query parameters for different index + * types) + */ +typedef struct { + ZVecIndexType index_type; /**< Index type, used to distinguish the parameter + type stored in the union */ + union { + ZVecQueryParams base_params; /**< Base query parameters */ + ZVecHnswQueryParams hnsw_params; /**< HNSW query parameters */ + ZVecIVFQueryParams ivf_params; /**< IVF query parameters */ + ZVecFlatQueryParams flat_params; /**< Flat query parameters */ + } params; +} ZVecQueryParamsUnion; + +// ============================================================================= +// Query Structures (Updated Version, Including QueryParams) +// ============================================================================= + +/** + * @brief Vector query structure (aligned with zvec::VectorQuery, includes + * QueryParams) + */ +typedef struct { + int topk; /**< Number of results to return */ + ZVecString field_name; /**< Query field name */ + ZVecByteArray query_vector; /**< Query vector (binary data) */ + ZVecByteArray + query_sparse_indices; /**< Sparse vector indices (binary data) */ + ZVecByteArray query_sparse_values; /**< Sparse vector values (binary data) */ + ZVecString filter; /**< Filter expression */ + bool include_vector; /**< Whether to include vector data */ + bool include_doc_id; /**< Whether to include document ID */ + ZVecStringArray output_fields; /**< Output field list (NULL means all) */ + ZVecQueryParamsUnion *query_params; /**< Query parameters (optional, NULL + means using default parameters) */ +} ZVecVectorQuery; + +/** + * @brief Grouped vector query structure (aligned with zvec::GroupByVectorQuery, + * includes QueryParams) + */ +typedef struct { + ZVecString field_name; /**< Query field name */ + ZVecByteArray query_vector; /**< Query vector (binary data) */ + ZVecByteArray + query_sparse_indices; /**< Sparse vector indices (binary data) */ + ZVecByteArray query_sparse_values; /**< Sparse vector values (binary data) */ + ZVecString filter; /**< Filter expression */ + bool include_vector; /**< Whether to include vector data */ + ZVecStringArray output_fields; /**< Output field list */ + ZVecString group_by_field_name; /**< Group by field name */ + uint32_t group_count; /**< Number of groups */ + uint32_t group_topk; /**< Number of results to return per group */ + ZVecQueryParamsUnion *query_params; /**< Query parameters (optional, NULL + means using default parameters) */ +} ZVecGroupByVectorQuery; + + +// ============================================================================= +// Query Parameters Management Functions +// ============================================================================= + +/** + * @brief Create base query parameters + * @param index_type Index type + * @return ZVecQueryParams* Pointer to the newly created query parameters + */ +ZVEC_EXPORT ZVecQueryParams *ZVEC_CALL +zvec_query_params_create(ZVecIndexType index_type); + +/** + * @brief Create HNSW query parameters + * @param index_type Index type (should be ZVEC_INDEX_TYPE_HNSW) + * @param ef Exploration factor during search + * @param radius Search radius + * @param is_linear Whether linear search + * @param is_using_refiner Whether using refiner + * @return ZVecHnswQueryParams* Pointer to the newly created HNSW query + * parameters + */ +ZVEC_EXPORT ZVecHnswQueryParams *ZVEC_CALL +zvec_query_params_hnsw_create(ZVecIndexType index_type, int ef, float radius, + bool is_linear, bool is_using_refiner); + +/** + * @brief Create IVF query parameters + * @param index_type Index type (should be ZVEC_INDEX_TYPE_IVF) + * @param nprobe Number of clusters to probe during search + * @param is_using_refiner Whether using refiner + * @param scale_factor Scale factor + * @return ZVecIVFQueryParams* Pointer to the newly created IVF query parameters + */ +ZVEC_EXPORT ZVecIVFQueryParams *ZVEC_CALL +zvec_query_params_ivf_create(ZVecIndexType index_type, int nprobe, + bool is_using_refiner, float scale_factor); + +/** + * @brief Create Flat query parameters + * @param index_type Index type (should be ZVEC_INDEX_TYPE_FLAT) + * @param is_using_refiner Whether using refiner + * @param scale_factor Scale factor + * @return ZVecFlatQueryParams* Pointer to the newly created Flat query + * parameters + */ +ZVEC_EXPORT ZVecFlatQueryParams *ZVEC_CALL zvec_query_params_flat_create( + ZVecIndexType index_type, bool is_using_refiner, float scale_factor); + +/** + * @brief Create query parameters union + * @param index_type Index type + * @return ZVecQueryParamsUnion* Pointer to the newly created query parameters + * union + */ +ZVEC_EXPORT ZVecQueryParamsUnion *ZVEC_CALL +zvec_query_params_union_create(ZVecIndexType index_type); + + +/** + * @brief Destroy base query parameters + * @param params query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_query_params_destroy(ZVecQueryParams *params); + +/** + * @brief Destroy HNSW query parameters + * @param params HNSW query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_hnsw_destroy(ZVecHnswQueryParams *params); + +/** + * @brief Destroy IVF query parameters + * @param params IVF query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_ivf_destroy(ZVecIVFQueryParams *params); + +/** + * @brief Destroy Flat query parameters + * @param params Flat query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_flat_destroy(ZVecFlatQueryParams *params); + +/** + * @brief Destroy query parameters union + * @param params Query parameters union pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_union_destroy(ZVecQueryParamsUnion *params); + +/** + * @brief Set query parameters index type + * @param params Query parameters pointer + * @param index_type Index type + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_set_index_type( + ZVecQueryParams *params, ZVecIndexType index_type); + +/** + * @brief Set search radius for query parameters + * @param params Query parameters pointer + * @param radius Search radius + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_set_radius(ZVecQueryParams *params, float radius); + +/** + * @brief Set scale factor for query parameters + * @param params Query parameters pointer + * @param scale_factor Scale factor + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_set_is_linear(ZVecQueryParams *params, bool is_linear); + +/** + * @brief Set whether to use refiner for query parameters + * @param params Query parameters pointer + * @param is_using_refiner Whether to use refiner + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_set_is_using_refiner( + ZVecQueryParams *params, bool is_using_refiner); + +/** + * @brief Set exploration factor for HNSW query parameters + * @param params HNSW query parameters pointer + * @param ef Exploration factor + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_hnsw_set_ef(ZVecHnswQueryParams *params, int ef); + +/** + * @brief Set number of probe clusters for IVF query parameters + * @param params IVF query parameters pointer + * @param nprobe Number of probe clusters + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_ivf_set_nprobe(ZVecIVFQueryParams *params, int nprobe); + +/** + * @brief Set scale factor for IVF/Flat query parameters + * @param params IVF or Flat query parameters pointer + * @param scale_factor Scale factor + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_ivf_set_scale_factor( + ZVecIVFQueryParams *params, float scale_factor); + +/** + * @brief Collection options structure + */ +typedef struct { + bool enable_mmap; /**< Whether to enable memory mapping */ + size_t max_buffer_size; /**< Maximum buffer size */ + bool read_only; /**< Whether read-only mode */ + uint64_t max_doc_count_per_segment; /**< Maximum document count per segment */ +} ZVecCollectionOptions; + + +/** + * @brief Collection statistics structure + */ +typedef struct { + uint64_t doc_count; /**< Total document count */ + ZVecString **index_names; /**< Index name array */ + float *index_completeness; /**< Index completeness array */ + size_t index_count; /**< Index name count */ +} ZVecCollectionStats; + + +/** + * @brief Create field schema + * @param name Field name + * @param data_type Data type + * @param nullable Whether nullable + * @param dimension Vector dimension + * @return ZVecFieldSchema* Pointer to the newly created field schema + */ +ZVEC_EXPORT ZVecFieldSchema *ZVEC_CALL +zvec_field_schema_create(const char *name, ZVecDataType data_type, + bool nullable, uint32_t dimension); + +/** + * @brief Destroy field schema + * @param schema Field schema pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_destroy(ZVecFieldSchema *schema); + +/** + * @brief Set index parameters for field + * @param schema Field schema pointer + * @param index_params Index parameters pointer + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_field_schema_set_index_params( + ZVecFieldSchema *schema, const ZVecIndexParams *index_params); + + +/** + * @brief Set inverted index parameters for field schema + * @param field_schema Field schema pointer + * @param invert_params Inverted index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_invert_index( + ZVecFieldSchema *field_schema, const ZVecIndexParams *invert_params); + +/** + * @brief Set HNSW index parameters for field schema + * @param field_schema Field schema pointer + * @param hnsw_params HNSW index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_hnsw_index( + ZVecFieldSchema *field_schema, const ZVecIndexParams *hnsw_params); + +/** + * @brief Set Flat index parameters for field schema + * @param field_schema Field schema pointer + * @param flat_params Flat index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_flat_index( + ZVecFieldSchema *field_schema, const ZVecIndexParams *flat_params); + +/** + * @brief Set IVF index parameters for field schema + * @param field_schema Field schema pointer + * @param ivf_params IVF index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_set_ivf_index( + ZVecFieldSchema *field_schema, const ZVecIndexParams *ivf_params); + + +// ============================================================================= +// Collection Schema Structures +// ============================================================================= + +/** + * @brief Collection schema structure + */ +typedef struct { + ZVecString *name; /**< Collection name */ + ZVecFieldSchema **fields; /**< Field array */ + size_t field_count; /**< Field count */ + size_t field_capacity; /**< Field array capacity */ + uint64_t max_doc_count_per_segment; /**< Maximum document count per segment */ +} ZVecCollectionSchema; + +/** + * @brief Create collection schema + * @param name Collection name + * @return ZVecCollectionSchema* Pointer to the newly created collection schema + */ +ZVEC_EXPORT ZVecCollectionSchema *ZVEC_CALL +zvec_collection_schema_create(const char *name); + +/** + * @brief Destroy collection schema + * @param schema Collection schema pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_collection_schema_destroy(ZVecCollectionSchema *schema); + +/** + * @brief Add field to collection schema + * @param schema Collection schema pointer + * @param field Field schema pointer (function takes ownership) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_add_field( + ZVecCollectionSchema *schema, ZVecFieldSchema *field); + +/** + * @brief Add multiple fields to collection schema at once + * + * @param schema Collection schema pointer + * @param fields Array of fields to add + * @param field_count Number of fields to add + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_add_fields( + ZVecCollectionSchema *schema, const ZVecFieldSchema *fields, + size_t field_count); + +/** + * @brief Remove field + * @param schema Collection schema pointer + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_remove_field( + ZVecCollectionSchema *schema, const char *field_name); + +/** + * @brief Remove multiple fields from collection schema at once + * + * @param schema Collection schema pointer + * @param field_names Array of field names to remove + * @param field_count Number of fields to remove + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_remove_fields( + ZVecCollectionSchema *schema, const char *const *field_names, + size_t field_count); + +/** + * @brief Get field count + * + * @param schema Collection schema pointer + * @return size_t Field count + */ +ZVEC_EXPORT size_t ZVEC_CALL +zvec_collection_schema_get_field_count(const ZVecCollectionSchema *schema); + +/** + * @brief Find field + * @param schema Collection schema pointer + * @param field_name Field name + * @return ZVecFieldSchema* Field schema pointer, returns NULL if not found + */ +ZVEC_EXPORT ZVecFieldSchema *ZVEC_CALL zvec_collection_schema_find_field( + const ZVecCollectionSchema *schema, const char *field_name); + +/** + * @brief Validate collection schema + * @param schema Collection schema pointer + * @param[out] error_msg Error message (needs to be freed by calling + * zvec_free_string) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_validate( + const ZVecCollectionSchema *schema, ZVecString **error_msg); + + +/** + * @brief Get field by index + * @param schema Collection schema pointer + * @param index Field index + * @return ZVecFieldSchema* Field schema pointer + */ +ZVEC_EXPORT ZVecFieldSchema *ZVEC_CALL zvec_collection_schema_get_field( + const ZVecCollectionSchema *schema, size_t index); + +/** + * @brief Set maximum document count per segment + * @param schema Collection schema pointer + * @param max_doc_count Maximum document count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_schema_set_max_doc_count_per_segment( + ZVecCollectionSchema *schema, uint64_t max_doc_count); + +/** + * @brief Get maximum document count per segment of collection schema + * + * @param schema Collection schema pointer + * @return uint64_t Maximum document count per segment + */ +ZVEC_EXPORT uint64_t ZVEC_CALL +zvec_collection_schema_get_max_doc_count_per_segment( + const ZVecCollectionSchema *schema); + + +// ============================================================================= +// Collection Management Functions +// ============================================================================= + +/** + * @brief Create and open collection + * @param path Collection path + * @param schema Collection schema pointer + * @param options Collection options pointer (NULL uses default options) + * @param[out] collection Returned collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_and_open( + const char *path, const ZVecCollectionSchema *schema, + const ZVecCollectionOptions *options, ZVecCollection **collection); + + +/** + * @brief Open existing collection + * @param path Collection path + * @param options Collection options pointer (NULL uses default options) + * @param[out] collection Returned collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_open(const char *path, const ZVecCollectionOptions *options, + ZVecCollection **collection); + + +/** + * @brief Close collection + * @param collection Collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_close(ZVecCollection *collection); + + +/** + * @brief Destroy collection + * + * @param collection Collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_destroy(ZVecCollection *collection); + +/** + * @brief Flush collection data to disk + * @param collection Collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_flush(ZVecCollection *collection); + +/** + * @brief Get collection schema + * @param collection Collection handle + * @param[out] schema + * Returned collection schema pointer (needs to be freed by calling + * zvec_collection_schema_destroy) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_schema( + const ZVecCollection *collection, ZVecCollectionSchema **schema); + + +/** + * @brief Initialize default collection options + * @param options Collection options structure pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_collection_options_init_default(ZVecCollectionOptions *options); + +/** + * @brief Get collection options + * @param collection Collection handle + * @param[out] options + * Returned collection options pointer (needs to be freed by calling + * zvec_collection_options_destroy) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_options( + const ZVecCollection *collection, ZVecCollectionOptions **options); + +/** + * @brief Get collection statistics + * @param collection Collection handle + * @param[out] stats + * Returned statistics pointer (needs to be freed by calling + * zvec_collection_stats_destroy) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_stats( + const ZVecCollection *collection, ZVecCollectionStats **stats); + +/** + * @brief Destroy collection statistics + * @param stats Statistics pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_collection_stats_destroy(ZVecCollectionStats *stats); + +/** + * @brief Free field schema memory + * + * @param field_schema Field schema pointer to be freed + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_free_field_schema(ZVecFieldSchema *field_schema); + + +// ============================================================================= +// Index Management Interface +// ============================================================================= + +/** + * @brief Create index + * + * @param collection Collection handle + * @param field_name Field name + * @param index_params Index parameters + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_create_index(ZVecCollection *collection, const char *field_name, + const ZVecIndexParams *index_params); + +/** + * @brief Create HNSW index for collection field + * @param collection Collection handle + * @param field_name Field name + * @param hnsw_params HNSW index parameters + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_hnsw_index( + ZVecCollection *collection, const char *field_name, + const ZVecIndexParams *hnsw_params); + +/** + * @brief Create Flat index for collection field + * @param collection Collection handle + * @param field_name Field name + * @param flat_params Flat index parameters + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_flat_index( + ZVecCollection *collection, const char *field_name, + const ZVecIndexParams *flat_params); + +/** + * @brief Create IVF index for collection field + * @param collection Collection handle + * @param field_name Field name + * @param ivf_params IVF index parameters + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_ivf_index( + ZVecCollection *collection, const char *field_name, + const ZVecIndexParams *ivf_params); + +/** + * @brief Create scalar index for collection field + * @param collection Collection handle + * @param field_name Field name + * @param invert_params Scalar index parameters + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_invert_index( + ZVecCollection *collection, const char *field_name, + const ZVecIndexParams *invert_params); + +/** + * @brief Drop index + * @param collection Collection handle + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_drop_index(ZVecCollection *collection, const char *field_name); + +/** + * @brief Optimize collection (rebuild indexes, merge segments, etc.) + * @param collection Collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_optimize(ZVecCollection *collection); + +// ============================================================================= +// Column Management Interface (DDL) +// ============================================================================= + +/** + * @brief Add column + * @param collection Collection handle + * @param field_schema Field schema pointer + * @param default_expression Default value expression (can be NULL) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_add_column( + ZVecCollection *collection, const ZVecFieldSchema *field_schema, + const char *default_expression); + +/** + * @brief Drop column + * @param collection Collection handle + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_drop_column(ZVecCollection *collection, const char *field_name); + +/** + * @brief Alter column + * @param collection Collection handle + * @param old_name Original field name + * @param new_name New field name (can be NULL to indicate no renaming) + * @param new_schema New field schema (can be NULL to indicate no schema + * modification) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_alter_column( + ZVecCollection *collection, const char *old_name, const char *new_name, + const ZVecFieldSchema *new_schema); + + +/** + * @brief Document structure (opaque pointer mode) + * Internal implementation details are not visible to the outside, and + * operations are performed through API functions + */ +typedef struct ZVecDoc ZVecDoc; + +/** + * @brief Per-document status returned by detailed DML APIs. + */ +typedef struct { + const char *pk; /**< Primary key (allocated by API) */ + ZVecErrorCode code; /**< Per-document status code */ + const char *message; /**< Per-document status message (allocated by API) */ +} ZVecWriteResult; + +// ============================================================================= +// Data Manipulation Interface (DML) +// ============================================================================= + +/** + * @brief Insert documents into collection + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] success_count Number of successfully inserted documents + * @param[out] error_count Number of failed insertions + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_insert( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + size_t *success_count, size_t *error_count); + +/** + * @brief Insert documents and return per-document statuses. + * + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] results Per-document result array (free with + * zvec_write_results_free) + * @param[out] result_count Number of result entries + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_insert_with_results( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + ZVecWriteResult **results, size_t *result_count); + +/** + * @brief Update documents in collection + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] success_count Number of successfully updated documents + * @param[out] error_count Number of failed updates + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_update( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + size_t *success_count, size_t *error_count); + +/** + * @brief Update documents and return per-document statuses. + * + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] results Per-document result array (free with + * zvec_write_results_free) + * @param[out] result_count Number of result entries + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_update_with_results( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + ZVecWriteResult **results, size_t *result_count); + +/** + * @brief Insert or update documents in collection (upsert operation) + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] success_count Number of successful operations + * @param[out] error_count Number of failed operations + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_upsert( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + size_t *success_count, size_t *error_count); + +/** + * @brief Upsert documents and return per-document statuses. + * + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] results Per-document result array (free with + * zvec_write_results_free) + * @param[out] result_count Number of result entries + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_upsert_with_results( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + ZVecWriteResult **results, size_t *result_count); + +/** + * @brief Delete documents from collection + * @param collection Collection handle + * @param pks Primary key array + * @param pk_count Primary key count + * @param[out] success_count Number of successfully deleted documents + * @param[out] error_count Number of failed deletions + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_delete( + ZVecCollection *collection, const char *const *pks, size_t pk_count, + size_t *success_count, size_t *error_count); + +/** + * @brief Delete documents by PK and return per-document statuses. + * + * @param collection Collection handle + * @param pks Primary key array + * @param pk_count Primary key count + * @param[out] results Per-document result array (free with + * zvec_write_results_free) + * @param[out] result_count Number of result entries + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_delete_with_results( + ZVecCollection *collection, const char *const *pks, size_t pk_count, + ZVecWriteResult **results, size_t *result_count); + +/** + * @brief Free result arrays returned by detailed DML APIs. + * + * @param results Result array pointer + * @param result_count Number of entries in result array + */ +ZVEC_EXPORT void ZVEC_CALL zvec_write_results_free(ZVecWriteResult *results, + size_t result_count); + +/** + * @brief Delete documents by filter condition + * @param collection Collection handle + * @param filter Filter expression + * @param[out] deleted_count Number of deleted documents + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_delete_by_filter( + ZVecCollection *collection, const char *filter); + +// ============================================================================= +// Data Query Interface (DQL) +// ============================================================================= + +/** + * @brief Vector similarity search + * @param collection Collection handle + * @param query Query parameters pointer + * @param[out] results Returned document array (needs to be freed by calling + * zvec_docs_free) + * @param[out] result_count Number of returned results + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_query( + const ZVecCollection *collection, const ZVecVectorQuery *query, + ZVecDoc ***results, size_t *result_count); + +/** + * @brief Grouped vector similarity search + * @param collection Collection handle + * @param query Grouped query parameters pointer + * @param[out] results Returned document array (needs to be freed by calling + * zvec_docs_free) + * @param[out] group_by_values Returned group by field values array (needs to be + * freed by calling zvec_string_array_destroy) + * @param[out] result_count Number of returned results + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_query_by_group( + const ZVecCollection *collection, const ZVecGroupByVectorQuery *query, + ZVecDoc ***results, ZVecString ***group_by_values, size_t *result_count); + +/** + * @brief Fetch documents by primary keys + * @param collection Collection handle + * @param primary_keys Primary key array + * @param count Number of primary keys + * @param[out] documents Returned document array (needs to be freed by calling + * zvec_docs_free) + * @param[out] found_count Number of found documents + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_fetch( + ZVecCollection *collection, const char *const *primary_keys, size_t count, + ZVecDoc ***documents, size_t *found_count); + +// ============================================================================= +// Document Related Structures +// ============================================================================= + +/** + * @brief Document field value union + */ +typedef union { + bool bool_value; + int32_t int32_value; + int64_t int64_value; + uint32_t uint32_value; + uint64_t uint64_value; + float float_value; + double double_value; + ZVecString string_value; + ZVecFloatArray vector_value; + ZVecByteArray binary_value; /**< Binary data value */ +} ZVecFieldValue; + +/** + * @brief Document field structure + */ +typedef struct { + ZVecString name; ///< Field name + ZVecDataType data_type; ///< Data type + ZVecFieldValue value; ///< Field value +} ZVecDocField; + +/** + * @brief Document operator enumeration + */ +typedef enum { + ZVEC_DOC_OP_INSERT = 0, ///< Insert operation + ZVEC_DOC_OP_UPDATE = 1, ///< Update operation + ZVEC_DOC_OP_UPSERT = 2, ///< Insert or update operation + ZVEC_DOC_OP_DELETE = 3 ///< Delete operation +} ZVecDocOperator; + + +// ============================================================================= +// Data Manipulation Interface (DML) +// ============================================================================= + +/** + * @brief Create a new document object + * + * @return ZVecDoc* Pointer to the newly created document object, returns NULL + * on failure + */ +ZVEC_EXPORT ZVecDoc *ZVEC_CALL zvec_doc_create(void); + +/** + * @brief Destroy the document object and release all resources + * + * @param doc Pointer to the document object + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_destroy(ZVecDoc *doc); + +/** + * @brief Clear the document object + * + * @param doc Pointer to the document object + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_clear(ZVecDoc *doc); + +/** + * @brief Add field to document by value + * + * @param doc Document object pointer + * @param field_name Field name + * @param data_type Data type + * @param value Value pointer + * @param value_size Value size + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_add_field_by_value( + ZVecDoc *doc, const char *field_name, ZVecDataType data_type, + const void *value, size_t value_size); + +/** + * @brief Add field to document by structure + * + * @param doc Document object pointer + * @param field Field structure pointer + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_add_field_by_struct(ZVecDoc *doc, const ZVecDocField *field); + +/** + * @brief Remove field from document + * + * @param doc Document structure pointer + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_remove_field(ZVecDoc *doc, const char *field_name); + + +/** + * @brief Batch release document array + * + * @param documents Document pointer array + * @param count Document count + */ +ZVEC_EXPORT void ZVEC_CALL zvec_docs_free(ZVecDoc **documents, size_t count); + +/** + * @brief Set document primary key + * + * @param doc Pointer to the document structure + * @param pk Primary key string + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_set_pk(ZVecDoc *doc, const char *pk); + +/** + * @brief Set document ID + * + * @param doc Document structure pointer + * @param doc_id Document ID + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_set_doc_id(ZVecDoc *doc, uint64_t doc_id); + +/** + * @brief Set document score + * + * @param doc Document structure pointer + * @param score Score value + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_set_score(ZVecDoc *doc, float score); + +/** + * @brief Set document operator + * + * @param doc Document structure pointer + * @param op Operator + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_set_operator(ZVecDoc *doc, + ZVecDocOperator op); + +/** + * @brief Explicitly mark a document field as null. + * + * @param doc Document structure pointer + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_set_field_null(ZVecDoc *doc, const char *field_name); + +/** + * @brief Get document ID + * + * @param doc Document structure pointer + * @return uint64_t Document ID + */ +ZVEC_EXPORT uint64_t ZVEC_CALL zvec_doc_get_doc_id(const ZVecDoc *doc); + +/** + * @brief Get document score + * + * @param doc Document structure pointer + * @return float Score value + */ +ZVEC_EXPORT float ZVEC_CALL zvec_doc_get_score(const ZVecDoc *doc); + +/** + * @brief Get document operator + * + * @param doc Document structure pointer + * @return ZVecDocOperator Operator + */ +ZVEC_EXPORT ZVecDocOperator ZVEC_CALL zvec_doc_get_operator(const ZVecDoc *doc); + +/** + * @brief Get document field count + * + * @param doc Document structure pointer + * @return size_t Field count + */ +ZVEC_EXPORT size_t ZVEC_CALL zvec_doc_get_field_count(const ZVecDoc *doc); + + +/** + * @brief Get document primary key pointer (no copy) + * + * @param doc Document object pointer + * @return const char* Primary key string pointer, returns NULL if not set + */ +ZVEC_EXPORT const char *ZVEC_CALL zvec_doc_get_pk_pointer(const ZVecDoc *doc); + +/** + * @brief Get document primary key copy (needs manual release) + * + * @param doc Document object pointer + * @return const char* Primary key string copy, needs to call free() to release, + * returns NULL if not set + */ +ZVEC_EXPORT const char *ZVEC_CALL zvec_doc_get_pk_copy(const ZVecDoc *doc); + +/** + * @brief Get field value (basic type returned directly) + * + * Supports basic numeric data types: BOOL, INT32, INT64, UINT32, UINT64, + * FLOAT, DOUBLE. The value is copied directly into the provided buffer. + * For STRING, BINARY, and VECTOR types, use zvec_doc_get_field_value_copy + * or zvec_doc_get_field_value_pointer instead. + * + * @param doc Document object pointer + * @param field_name Field name + * @param field_type Field type (must be a basic numeric type) + * @param value_buffer Output buffer to receive the value + * @param buffer_size Size of the output buffer + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_get_field_value_basic( + const ZVecDoc *doc, const char *field_name, ZVecDataType field_type, + void *value_buffer, size_t buffer_size); + +/** + * @brief Get field value copy (allocate new memory) + * + * Supports all data types including: + * - Basic types: BOOL, INT32, INT64, UINT32, UINT64, FLOAT, DOUBLE + * - String types: STRING, BINARY + * - Vector types: VECTOR_FP32, VECTOR_FP64, VECTOR_FP16, VECTOR_INT4, + * VECTOR_INT8, VECTOR_INT16, VECTOR_BINARY32, VECTOR_BINARY64 + * - Sparse vector types: SPARSE_VECTOR_FP32, SPARSE_VECTOR_FP16 + * - Array types: ARRAY_STRING, ARRAY_BINARY, ARRAY_BOOL, ARRAY_INT32, + * ARRAY_INT64, ARRAY_UINT32, ARRAY_UINT64, ARRAY_FLOAT, ARRAY_DOUBLE + * + * The returned value pointer must be manually freed using appropriate + * deallocation functions (free() for basic types and strings, + * zvec_free_uint8_array() for binary data). + * + * @param doc Document object pointer + * @param field_name Field name + * @param field_type Field type + * @param[out] value Returned value pointer (needs manual release) + * @param[out] value_size Returned value size + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_get_field_value_copy( + const ZVecDoc *doc, const char *field_name, ZVecDataType field_type, + void **value, size_t *value_size); + +/** + * @brief Get field value pointer (data remains in document) + * + * Supports data types where direct pointer access is safe: + * - Basic types: BOOL, INT32, INT64, UINT32, UINT64, FLOAT, DOUBLE + * - String types: STRING (returns null-terminated C string), BINARY + * - Vector types: VECTOR_FP32, VECTOR_FP64, VECTOR_FP16, VECTOR_INT4, + * VECTOR_INT8, VECTOR_INT16, VECTOR_BINARY32, VECTOR_BINARY64 + * - Array types: ARRAY_INT32, ARRAY_INT64, ARRAY_UINT32, ARRAY_UINT64, + * ARRAY_FLOAT, ARRAY_DOUBLE + * + * The returned pointer points to data within the document object and + * does not require manual memory management. The pointer remains valid + * as long as the document exists. + * + * @param doc Document object pointer + * @param field_name Field name + * @param field_type Field type + * @param[out] value Returned value pointer (points to document-internal data) + * @param[out] value_size Returned value size + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_get_field_value_pointer( + const ZVecDoc *doc, const char *field_name, ZVecDataType field_type, + const void **value, size_t *value_size); + +/** + * @brief Check if document is empty + * + * @param doc Document object pointer + * @return bool Returns true if document is empty, otherwise returns false + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_doc_is_empty(const ZVecDoc *doc); + +/** + * @brief Check if document contains specified field + * + * @param doc Document object pointer + * @param field_name Field name + * @return bool Returns true if field exists, otherwise returns false + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_doc_has_field(const ZVecDoc *doc, + const char *field_name); + +/** + * @brief Check if document field has value + * + * @param doc Document object pointer + * @param field_name Field name + * @return bool Returns true if field has value, otherwise returns false + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_doc_has_field_value(const ZVecDoc *doc, + const char *field_name); + +/** + * @brief Check if document field is null + * + * @param doc Document object pointer + * @param field_name Field name + * @return bool Returns true if field is null, otherwise returns false + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_doc_is_field_null(const ZVecDoc *doc, + const char *field_name); + +/** + * @brief Get all field names of document + * + * @param doc Document object pointer + * @param[out] field_names + * Returned field name array (needs to call zvec_free_str_array to release) + * @param[out] count Returned field count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_get_field_names( + const ZVecDoc *doc, char ***field_names, size_t *count); + +/** + * @brief Release string array memory + * + * @param array String array pointer + * @param count Array element count + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_str_array(char **array, size_t count); + +/** + * @brief Serialize document + * + * @param doc Document object pointer + * @param[out] data Returned serialized data (needs to call + * zvec_free_uint8_array to release) + * @param[out] size Returned data size + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_serialize(const ZVecDoc *doc, + uint8_t **data, + size_t *size); + +/** + * @brief Deserialize document + * + * @param data Serialized data + * @param size Data size + * @param[out] doc Returned document object pointer (needs to call + * zvec_doc_destroy to release) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_deserialize(const uint8_t *data, + size_t size, + ZVecDoc **doc); + +/** + * @brief Merge two documents + * + * @param doc Target document object pointer + * @param other Source document object pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_merge(ZVecDoc *doc, const ZVecDoc *other); + +/** + * @brief Get document memory usage + * + * @param doc Document object pointer + * @return size_t Memory usage (bytes) + */ +ZVEC_EXPORT size_t ZVEC_CALL zvec_doc_memory_usage(const ZVecDoc *doc); + +/** + * @brief Validate document against Schema + * + * @param doc Document object pointer + * @param schema Schema object pointer + * @param is_update Whether it's an update operation + * @param[out] error_msg Error message (needs manual release) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_validate(const ZVecDoc *doc, const ZVecCollectionSchema *schema, + bool is_update, char **error_msg); + +/** + * @brief Get detailed string representation of document + * + * @param doc Document object pointer + * @param[out] detail_str Returned detailed string (needs manual release) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_to_detail_string(const ZVecDoc *doc, char **detail_str); + +/** + * @brief Free docs array memory + * @param docs Document array pointer + * @param count Document count + */ +ZVEC_EXPORT void ZVEC_CALL zvec_docs_free(ZVecDoc **docs, size_t count); + + +// ============================================================================= +// Utility Functions +// ============================================================================= + +/** + * @brief Convert error code to description string + * @param error_code Error code + * @return const char* Error description string + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_error_code_to_string(ZVecErrorCode error_code); + +/** + * @brief Convert data type to string + * @param data_type Data type + * @return const char* Data type string + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_data_type_to_string(ZVecDataType data_type); + +/** + * @brief Convert index type to string + * @param index_type Index type + * @return const char* Index type string + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_index_type_to_string(ZVecIndexType index_type); + +/** + * @brief Convert metric type to string + * @param metric_type Metric type + * @return const char* Metric type string + */ +const char *zvec_metric_type_to_string(ZVecMetricType metric_type); + + +// ============================================================================= +// Helper Functions +// ============================================================================= + +/** + * @brief Simplified HNSW index parameters initialization macro + * @param _metric Distance metric type + * @param _m Connectivity parameter + * @param _ef_construction Exploration factor during construction + * @param _ef_search Exploration factor during search + * @param _quant Quantization type + * + * Usage example: + * @code + * ZVecIndexParams params = ZVEC_HNSW_PARAMS( + * ZVEC_METRIC_TYPE_COSINE, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + * @endcode + */ +// clang-format off +#define ZVEC_HNSW_PARAMS(_metric, _m, _ef_construction, _ef_search, _quant) \ + ((ZVecIndexParams){ \ + .index_type = ZVEC_INDEX_TYPE_HNSW, \ + .metric_type = (_metric), \ + .quantize_type = (_quant), \ + .hnsw.m = (_m), \ + .hnsw.ef_construction = (_ef_construction), \ + .hnsw.ef_search = (_ef_search) }) +// clang-format on + +/** + * @brief Simplified inverted index parameters initialization macro + * @param range_opt Whether to enable range optimization + * @param wildcard Whether to enable wildcard expansion + * + * Usage example: + * ZVecIndexParams params = ZVEC_INVERT_PARAMS(true, false); + */ +// clang-format off +#define ZVEC_INVERT_PARAMS(_range_opt, _wildcard) \ + ((ZVecIndexParams){ \ + .index_type = ZVEC_INDEX_TYPE_INVERT, \ + .invert.enable_range_optimization = (_range_opt), \ + .invert.enable_extended_wildcard = (_wildcard) }) +// clang-format on + +/** + * @brief Simplified Flat index parameters initialization macro + * @param metric Distance metric type + * @param quant Quantization type + */ +// clang-format off +#define ZVEC_FLAT_PARAMS(_metric, _quant) \ + ((ZVecIndexParams){ \ + .index_type = ZVEC_INDEX_TYPE_FLAT, \ + .metric_type = (_metric), \ + .quantize_type = (_quant) }) +// clang-format on + +/** + * @brief Simplified IVF index parameters initialization macro + * @param metric Distance metric type + * @param nlist Number of cluster centers + * @param niters Number of iterations + * @param soar Whether to use SOAR algorithm + * @param nprobe Number of clusters to probe during search + * @param quant Quantization type + */ +// clang-format off +#define ZVEC_IVF_PARAMS(_metric, _nlist, _niters, _soar, _nprobe, _quant) \ + ((ZVecIndexParams){ \ + .index_type = ZVEC_INDEX_TYPE_IVF, \ + .metric_type = (_metric), \ + .quantize_type = (_quant), \ + .ivf.n_list = (_nlist), \ + .ivf.n_iters = (_niters), \ + .ivf.use_soar = (_soar), \ + .ivf.n_probe = (_nprobe) }) +// clang-format on + +/** + * @brief Simplified string initialization macro + * @param str String content + * + * Usage example: + * ZVecString name = ZVEC_STRING("my_collection"); + */ +#define ZVEC_STRING(str) \ + (ZVecString) { \ + .data = str, .length = strlen(str) \ + } + +/** + * @brief Simplified string view initialization macro + * @param str String content + * + * Usage example: + * ZVecStringView name = ZVEC_STRING_VIEW("my_collection"); + */ +#define ZVEC_STRING_VIEW(str) \ + (ZVecStringView) { \ + .data = str, .length = strlen(str) \ + } + +// Has been replaced by the new ZVEC_STRING_VIEW macro + +/** + * @brief Simplified float array initialization macro + * @param data_ptr Float array pointer + * @param len Array length + * + * Usage example: + * float vectors[] = {0.1f, 0.2f, 0.3f}; + * ZVecFloatArray vec_array = ZVEC_FLOAT_ARRAY(vectors, 3); + */ +#define ZVEC_FLOAT_ARRAY(data_ptr, len) \ + (ZVecFloatArray) { \ + .data = data_ptr, .length = len \ + } + +/** + * @brief Simplified integer array initialization macro + * @param data_ptr Integer array pointer + * @param len Array length + */ +#define ZVEC_INT64_ARRAY(data_ptr, len) \ + (ZVecInt64Array) { \ + .data = data_ptr, .length = len \ + } + +/** + * @brief Simplified collection options initialization macro (using default + * values) + * + * Usage example: + * ZVecCollectionOptions opts = ZVEC_DEFAULT_OPTIONS(); + */ +#define ZVEC_DEFAULT_OPTIONS() \ + (ZVecCollectionOptions) { \ + .enable_mmap = true, .max_buffer_size = 1048576, .read_only = false, \ + .max_doc_count_per_segment = 1000000 \ + } + +/** + * @brief Simplified vector query initialization macro + * @param field_name_str Query field name + * @param query_vec Query vector array + * @param top_k Number of results to return + * @param filter_str Filter condition string + * + * Usage example: + * ZVecVectorQuery query = ZVEC_VECTOR_QUERY("embedding", query_vectors, 10, + * ""); + */ +#define ZVEC_VECTOR_QUERY(field_name_str, query_vec, top_k, filter_str) \ + (ZVecVectorQuery) { \ + .field_name = ZVEC_STRING(field_name_str), .query_vector = query_vec, \ + .topk = top_k, .filter = ZVEC_STRING(filter_str), .include_vector = 1, \ + .include_doc_id = 1 \ + } + +/** + * @brief Simplified document field initialization macro + * @param name_str Field name + * @param type Data type + * @param value_union Field value union + * + * Usage example: + * ZVecDocField field = ZVEC_DOC_FIELD("id", ZVEC_DATA_TYPE_STRING, + * {.string_value = ZVEC_STRING("doc1")}); + */ +#define ZVEC_DOC_FIELD(name_str, type, value_union) \ + (ZVecDocField) { \ + .name = ZVEC_STRING(name_str), .data_type = type, .value = value_union \ + } + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // ZVEC_C_API_H diff --git a/src/include/zvec/db/doc.h b/src/include/zvec/db/doc.h index fa056053..e6d13c86 100644 --- a/src/include/zvec/db/doc.h +++ b/src/include/zvec/db/doc.h @@ -68,6 +68,10 @@ class Doc { return pk_; } + const std::string &pk_ref() const { + return pk_; + } + void set_score(float score) { score_ = score; } @@ -103,6 +107,10 @@ class Doc { return op_; } + Operator get_operator() const { + return op_; + } + // Set field value template bool set(const std::string &field_name, T value) { @@ -232,6 +240,26 @@ class Doc { return std::nullopt; } + // Get field value as const reference, throws exception if field doesn't exist + // or type mismatches + template + const T &get_ref(const std::string &field_name) const { + auto it = fields_.find(field_name); + if (it == fields_.end()) { + throw std::runtime_error("Field '" + field_name + "' not found"); + } + + if (std::holds_alternative(it->second)) { + throw std::runtime_error("Field '" + field_name + "' is null"); + } + + try { + return std::get(it->second); + } catch (const std::bad_variant_access &) { + throw std::runtime_error("Field '" + field_name + "' type mismatch"); + } + } + void remove(const std::string &field_name) { fields_.erase(field_name); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 03250f1c..e1ffc326 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -4,3 +4,4 @@ include(${PROJECT_ROOT_DIR}/cmake/option.cmake) cc_directories(ailego) cc_directories(db) cc_directories(core) +cc_directories(c_api) \ No newline at end of file diff --git a/tests/c_api/CMakeLists.txt b/tests/c_api/CMakeLists.txt new file mode 100644 index 00000000..ad2f62e1 --- /dev/null +++ b/tests/c_api/CMakeLists.txt @@ -0,0 +1,28 @@ +# Copyright 2025-present the zvec project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include(${CMAKE_SOURCE_DIR}/cmake/bazel.cmake) + +file(GLOB_RECURSE ALL_TEST_SRCS *_test.c) + +foreach(CC_SRCS ${ALL_TEST_SRCS}) + get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE) + cc_gtest( + NAME ${CC_TARGET} + STRICT + LIBS zvec_c_api + SRCS ${CC_SRCS} utils.c + INCS . .. ../../src + ) +endforeach() diff --git a/tests/c_api/c_api_test.c b/tests/c_api/c_api_test.c new file mode 100644 index 00000000..a442d191 --- /dev/null +++ b/tests/c_api/c_api_test.c @@ -0,0 +1,4586 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "zvec/c_api.h" +#include +#include +#include +#include +#include +#include +#ifdef _POSIX_C_SOURCE +#include +#endif +#include +#include "utils.h" + +// ============================================================================= +// Test helper macro definitions +// ============================================================================= + +static int test_count = 0; +static int passed_count = 0; +static int current_test_passed = 1; // Track if current test function passes + +#define TEST_START() \ + do { \ + printf("Running test: %s\n", __func__); \ + test_count++; \ + current_test_passed = 1; \ + } while (0) + +#define TEST_ASSERT(condition) \ + do { \ + if (condition) { \ + printf(" ✓ PASS\n"); \ + } else { \ + printf(" ✗ FAIL at line %d\n", __LINE__); \ + current_test_passed = 0; \ + } \ + } while (0) + +#define TEST_END() \ + do { \ + if (current_test_passed) { \ + passed_count++; \ + } \ + } while (0) + +// ============================================================================= +// Helper functions tests +// ============================================================================= + +void test_version_functions(void) { + TEST_START(); + + // Test version retrieval functions + const char *version = zvec_get_version(); + TEST_ASSERT(version != NULL); + + // Test version component retrieval + int major = zvec_get_version_major(); + int minor = zvec_get_version_minor(); + int patch = zvec_get_version_patch(); + + TEST_ASSERT(major >= 0); + TEST_ASSERT(minor >= 0); + TEST_ASSERT(patch >= 0); + + TEST_ASSERT(zvec_check_version(major, minor, patch)); + + // Test version checking functions + bool compatible = zvec_check_version(0, 3, 0); + TEST_ASSERT(compatible == true); + + bool not_compatible = zvec_check_version(99, 99, 99); + TEST_ASSERT(not_compatible == false); + + TEST_END(); +} + +void test_error_handling_functions(void) { + TEST_START(); + + char *error_msg = NULL; + ZVecErrorCode err = zvec_get_last_error(&error_msg); + TEST_ASSERT(err == ZVEC_OK); + + if (error_msg) { + free(error_msg); + } + + // Test error clearing + zvec_clear_error(); + + // Test error details retrieval + ZVecErrorDetails error_details = {0}; + err = zvec_get_last_error_details(&error_details); + TEST_ASSERT(err == ZVEC_OK); + + TEST_END(); +} + +void test_zvec_config() { + TEST_START(); + + // Test 1: Console log config creation and destruction + ZVecConsoleLogConfig *console_config = + zvec_config_console_log_create(ZVEC_LOG_LEVEL_INFO); + TEST_ASSERT(console_config != NULL); + if (console_config) { + TEST_ASSERT(console_config->level == ZVEC_LOG_LEVEL_INFO); + zvec_config_console_log_destroy(console_config); + } + + // Test 2: File log config creation and destruction + ZVecFileLogConfig *file_config = zvec_config_file_log_create( + ZVEC_LOG_LEVEL_WARN, "./logs", "test_log", 100, 7); + TEST_ASSERT(file_config != NULL); + if (file_config) { + TEST_ASSERT(file_config->level == ZVEC_LOG_LEVEL_WARN); + TEST_ASSERT(strcmp(file_config->dir.data, "./logs") == 0); + TEST_ASSERT(strcmp(file_config->basename.data, "test_log") == 0); + TEST_ASSERT(file_config->file_size == 100); + TEST_ASSERT(file_config->overdue_days == 7); + zvec_config_file_log_destroy(file_config); + } + + // Test 3: File log config edge cases + ZVecFileLogConfig *empty_file_config = + zvec_config_file_log_create(ZVEC_LOG_LEVEL_INFO, "", "", 0, 0); + TEST_ASSERT(empty_file_config != NULL); + if (empty_file_config) { + TEST_ASSERT(empty_file_config->level == ZVEC_LOG_LEVEL_INFO); + TEST_ASSERT(strcmp(empty_file_config->dir.data, "") == 0); + TEST_ASSERT(strcmp(empty_file_config->basename.data, "") == 0); + TEST_ASSERT(empty_file_config->file_size == 0); + TEST_ASSERT(empty_file_config->overdue_days == 0); + zvec_config_file_log_destroy(empty_file_config); + } + + // Test 4: Log config creation with console type + ZVecConsoleLogConfig *temp_console = + zvec_config_console_log_create(ZVEC_LOG_LEVEL_ERROR); + TEST_ASSERT(temp_console != NULL); + if (temp_console) { + zvec_config_console_log_destroy(temp_console); + } + + // Test 5: Log config creation with file type + ZVecFileLogConfig *temp_file = zvec_config_file_log_create( + ZVEC_LOG_LEVEL_DEBUG, "./logs", "app", 50, 30); + TEST_ASSERT(temp_file != NULL); + TEST_ASSERT(temp_file->level == ZVEC_LOG_LEVEL_DEBUG); + TEST_ASSERT(strcmp(temp_file->dir.data, "./logs") == 0); + TEST_ASSERT(strcmp(temp_file->basename.data, "app") == 0); + TEST_ASSERT(temp_file->file_size == 50); + TEST_ASSERT(temp_file->overdue_days == 30); + + zvec_config_file_log_destroy(temp_file); + + // Test 6: Config data creation and basic operations + ZVecConfigData *config_data = zvec_config_data_create(); + TEST_ASSERT(config_data != NULL); + if (config_data) { + // Test initial values + TEST_ASSERT(config_data->log_config != NULL); + TEST_ASSERT(config_data->log_type == ZVEC_LOG_TYPE_CONSOLE); + + // Test memory limit setting + ZVecErrorCode err = + zvec_config_data_set_memory_limit(config_data, 1024 * 1024 * 1024); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(config_data->memory_limit_bytes == 1024 * 1024 * 1024); + + // Test thread count settings + err = zvec_config_data_set_query_thread_count(config_data, 8); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(config_data->query_thread_count == 8); + + err = zvec_config_data_set_optimize_thread_count(config_data, 4); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(config_data->optimize_thread_count == 4); + + // Test log config replacement + TEST_ASSERT(config_data->log_type == ZVEC_LOG_TYPE_CONSOLE); + TEST_ASSERT(config_data->log_config != NULL); + + ZVecFileLogConfig *new_file = zvec_config_file_log_create( + ZVEC_LOG_LEVEL_DEBUG, "./logs", "app", 50, 30); + TEST_ASSERT(new_file != NULL); + zvec_config_data_set_log_config(config_data, ZVEC_LOG_TYPE_FILE, new_file); + TEST_ASSERT(config_data->log_type == ZVEC_LOG_TYPE_FILE); + TEST_ASSERT(config_data->log_config != NULL); + + zvec_config_data_destroy(config_data); + } + + // Test 7: Edge cases and error conditions + // Test NULL pointer handling + ZVecErrorCode err = zvec_config_data_set_memory_limit(NULL, 1024); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_config_data_set_log_config(NULL, ZVEC_LOG_TYPE_CONSOLE, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_config_data_set_query_thread_count(NULL, 1); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_config_data_set_optimize_thread_count(NULL, 1); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test boundary values + ZVecConfigData *boundary_config = zvec_config_data_create(); + if (boundary_config) { + // Test zero values + err = zvec_config_data_set_memory_limit(boundary_config, 0); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(boundary_config->memory_limit_bytes == 0); + + // Test maximum values + err = zvec_config_data_set_memory_limit(boundary_config, UINT64_MAX); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(boundary_config->memory_limit_bytes == UINT64_MAX); + + // Test zero thread counts + err = zvec_config_data_set_query_thread_count(boundary_config, 0); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(boundary_config->query_thread_count == 0); + + err = zvec_config_data_set_optimize_thread_count(boundary_config, 0); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(boundary_config->optimize_thread_count == 0); + + zvec_config_data_destroy(boundary_config); + } + + // Test 8: Memory leak prevention - double destroy safety + ZVecConfigData *double_destroy_test = zvec_config_data_create(); + if (double_destroy_test) { + zvec_config_data_destroy(double_destroy_test); + } + + TEST_END(); +} + +void test_zvec_initialize() { + TEST_START(); + + ZVecConfigData *config = zvec_config_data_create(); + TEST_ASSERT(config != NULL); + if (config) { + TEST_ASSERT(config->log_config != NULL); + TEST_ASSERT(config->log_type == ZVEC_LOG_TYPE_CONSOLE); + } + ZVecErrorCode err = zvec_initialize(config); + TEST_ASSERT(err == ZVEC_OK); + bool is_initialized = false; + zvec_is_initialized(&is_initialized); + TEST_ASSERT(is_initialized); + + TEST_END(); +} + +// ============================================================================= +// Schema-related tests +// ============================================================================= + +void test_schema_basic_operations(void) { + TEST_START(); + + // Test 1: Basic Schema creation and destruction + ZVecCollectionSchema *schema = zvec_collection_schema_create("demo"); + TEST_ASSERT(schema != NULL); + TEST_ASSERT(schema->name != NULL); + TEST_ASSERT(strcmp(schema->name->data, "demo") == 0); + TEST_ASSERT(schema->field_count == 0); + TEST_ASSERT(schema->fields == NULL); + TEST_ASSERT(schema->max_doc_count_per_segment > 0); + + // Test 2: Schema field count operations + size_t initial_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(initial_count == 0); + + // Test 3: Adding fields to schema + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + ZVecErrorCode err = zvec_collection_schema_add_field(schema, id_field); + TEST_ASSERT(err == ZVEC_OK); + + size_t count_after_add = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(count_after_add == 1); + + // Test 4: Finding fields in schema + const ZVecFieldSchema *found_field = + zvec_collection_schema_find_field(schema, "id"); + TEST_ASSERT(found_field != NULL); + TEST_ASSERT(strcmp(found_field->name->data, "id") == 0); + TEST_ASSERT(found_field->data_type == ZVEC_DATA_TYPE_INT64); + + // Test 5: Getting field by index + ZVecFieldSchema *indexed_field = zvec_collection_schema_get_field(schema, 0); + TEST_ASSERT(indexed_field != NULL); + TEST_ASSERT(strcmp(indexed_field->name->data, "id") == 0); + + // Test 6: Adding multiple fields + ZVecFieldSchema fields_to_add[2]; + ZVecFieldSchema *name_field = + zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); + ZVecFieldSchema *age_field = + zvec_field_schema_create("age", ZVEC_DATA_TYPE_INT32, true, 0); + + fields_to_add[0] = *name_field; + fields_to_add[1] = *age_field; + + err = zvec_collection_schema_add_fields(schema, fields_to_add, 2); + TEST_ASSERT(err == ZVEC_OK); + + size_t count_after_multi_add = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(count_after_multi_add == 3); + + // Test 7: Finding newly added fields + const ZVecFieldSchema *name_found = + zvec_collection_schema_find_field(schema, "name"); + TEST_ASSERT(name_found != NULL); + TEST_ASSERT(strcmp(name_found->name->data, "name") == 0); + + const ZVecFieldSchema *age_found = + zvec_collection_schema_find_field(schema, "age"); + TEST_ASSERT(age_found != NULL); + TEST_ASSERT(strcmp(age_found->name->data, "age") == 0); + + // Test 8: Setting and getting max doc count + err = zvec_collection_schema_set_max_doc_count_per_segment(schema, 10000); + TEST_ASSERT(err == ZVEC_OK); + + uint64_t max_doc_count = + zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(max_doc_count == 10000); + + // Test 9: Schema validation + ZVecString *validation_error = NULL; + err = zvec_collection_schema_validate(schema, &validation_error); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(validation_error == NULL); + + // Test 10: Removing single field + err = zvec_collection_schema_remove_field(schema, "age"); + TEST_ASSERT(err == ZVEC_OK); + + size_t count_after_remove = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(count_after_remove == 2); + + const ZVecFieldSchema *removed_field = + zvec_collection_schema_find_field(schema, "age"); + TEST_ASSERT(removed_field == NULL); + + // Test 11: Removing multiple fields + const char *fields_to_remove[] = {"name", "id"}; + err = zvec_collection_schema_remove_fields(schema, fields_to_remove, 2); + TEST_ASSERT(err == ZVEC_OK); + + size_t final_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(final_count == 0); + + // Test 12: Schema cleanup + zvec_collection_schema_destroy(schema); + + TEST_END(); +} + +void test_schema_edge_cases(void) { + TEST_START(); + + // Test 1: NULL parameter handling for schema creation + ZVecCollectionSchema *null_schema = zvec_collection_schema_create(NULL); + TEST_ASSERT(null_schema == NULL); + + // Test 2: Empty string schema name + ZVecCollectionSchema *empty_schema = zvec_collection_schema_create(""); + TEST_ASSERT(empty_schema != NULL); + TEST_ASSERT(empty_schema->name != NULL); + TEST_ASSERT(strcmp(empty_schema->name->data, "") == 0); + zvec_collection_schema_destroy(empty_schema); + + // Test 3: Very long schema name + char long_name[1024]; + memset(long_name, 'a', 1023); + long_name[1023] = '\0'; + ZVecCollectionSchema *long_schema = zvec_collection_schema_create(long_name); + TEST_ASSERT(long_schema != NULL); + TEST_ASSERT(long_schema->name != NULL); + TEST_ASSERT(strlen(long_schema->name->data) == 1023); + zvec_collection_schema_destroy(long_schema); + + // Test 4: NULL schema parameter handling for all functions + ZVecErrorCode err; + size_t count = zvec_collection_schema_get_field_count(NULL); + TEST_ASSERT(count == 0); + + const ZVecFieldSchema *null_field = + zvec_collection_schema_find_field(NULL, "test"); + TEST_ASSERT(null_field == NULL); + + ZVecFieldSchema *null_indexed_field = + zvec_collection_schema_get_field(NULL, 0); + TEST_ASSERT(null_indexed_field == NULL); + + uint64_t null_max_doc_count = + zvec_collection_schema_get_max_doc_count_per_segment(NULL); + TEST_ASSERT(null_max_doc_count == 0); + + err = zvec_collection_schema_set_max_doc_count_per_segment(NULL, 1000); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + ZVecString *null_validation_error = NULL; + err = zvec_collection_schema_validate(NULL, &null_validation_error); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + TEST_ASSERT(null_validation_error == NULL); + + err = zvec_collection_schema_add_field(NULL, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_collection_schema_add_fields(NULL, NULL, 0); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_collection_schema_remove_field(NULL, "test"); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + const char *null_field_names[] = {NULL}; + err = zvec_collection_schema_remove_fields(NULL, null_field_names, 1); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 5: Working with valid schema for edge cases + ZVecCollectionSchema *schema = zvec_collection_schema_create("edge_test"); + TEST_ASSERT(schema != NULL); + + // Test 6: Adding NULL field to schema + err = zvec_collection_schema_add_field(schema, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 7: Adding fields with NULL array + err = zvec_collection_schema_add_fields(schema, NULL, 5); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 8: Adding zero fields + err = zvec_collection_schema_add_fields(schema, NULL, 0); + TEST_ASSERT(err == ZVEC_OK); + + // Test 9: Finding field with NULL name + const ZVecFieldSchema *null_name_field = + zvec_collection_schema_find_field(schema, NULL); + TEST_ASSERT(null_name_field == NULL); + + // Test 10: Finding non-existent field + const ZVecFieldSchema *nonexistent_field = + zvec_collection_schema_find_field(schema, "nonexistent"); + TEST_ASSERT(nonexistent_field == NULL); + + // Test 11: Getting field with invalid index + ZVecFieldSchema *invalid_index_field = + zvec_collection_schema_get_field(schema, 1000); + TEST_ASSERT(invalid_index_field == NULL); + + // Test 12: Getting field from empty schema with index 0 + ZVecFieldSchema *zero_index_field = + zvec_collection_schema_get_field(schema, 0); + TEST_ASSERT(zero_index_field == NULL); + + // Test 13: Removing field with NULL name + err = zvec_collection_schema_remove_field(schema, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 14: Removing non-existent field + err = zvec_collection_schema_remove_field(schema, "nonexistent"); + TEST_ASSERT(err == ZVEC_ERROR_NOT_FOUND); + + // Test 15: Removing fields with NULL array + err = zvec_collection_schema_remove_fields(schema, NULL, 5); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 16: Removing zero fields + err = zvec_collection_schema_remove_fields(schema, NULL, 0); + TEST_ASSERT(err == ZVEC_OK); + + // Test 17: Setting extremely large max doc count + err = + zvec_collection_schema_set_max_doc_count_per_segment(schema, UINT64_MAX); + TEST_ASSERT(err == ZVEC_OK); + uint64_t retrieved_max_count = + zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(retrieved_max_count == UINT64_MAX); + + // Test 18: Setting zero max doc count + err = zvec_collection_schema_set_max_doc_count_per_segment(schema, 0); + TEST_ASSERT(err == ZVEC_OK); + uint64_t zero_max_count = + zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(zero_max_count == 0); + + // Test 19: Schema validation with empty schema + ZVecString *empty_validation_error = NULL; + err = zvec_collection_schema_validate(schema, &empty_validation_error); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 20: Add duplicate field names + ZVecFieldSchema *first_id = + zvec_field_schema_create("duplicate_id", ZVEC_DATA_TYPE_INT64, false, 0); + ZVecFieldSchema *second_id = + zvec_field_schema_create("duplicate_id", ZVEC_DATA_TYPE_STRING, false, 0); + + err = zvec_collection_schema_add_field(schema, first_id); + TEST_ASSERT(err == ZVEC_OK); + + err = zvec_collection_schema_add_field(schema, second_id); + TEST_ASSERT(err == ZVEC_ERROR_ALREADY_EXISTS); + zvec_field_schema_destroy(second_id); + + // Verify fields + size_t field_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(field_count == 1); + + // Test 21: Cleanup + zvec_collection_schema_destroy(schema); + + TEST_END(); +} + +void test_schema_field_operations(void) { + TEST_START(); + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Test field count + size_t initial_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(initial_count == 5); + + // Test finding non-existent field + const ZVecFieldSchema *nonexistent = + zvec_collection_schema_find_field(schema, "nonexistent"); + TEST_ASSERT(nonexistent == NULL); + + // Test finding existing field + const ZVecFieldSchema *id_field = + zvec_collection_schema_find_field(schema, "id"); + TEST_ASSERT(id_field != NULL); + if (id_field) { + TEST_ASSERT(strcmp(id_field->name->data, "id") == 0); + TEST_ASSERT(id_field->data_type == ZVEC_DATA_TYPE_INT64); + } + + zvec_collection_schema_destroy(schema); + } + + TEST_END(); +} + +void test_normal_schema_creation(void) { + TEST_START(); + + ZVecCollectionSchema *schema = + zvec_test_create_normal_schema(false, "test_normal", NULL, NULL, 1000); + TEST_ASSERT(schema != NULL); + + if (schema) { + TEST_ASSERT(strcmp(schema->name->data, "test_normal") == 0); + + // Verify field count + size_t field_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(field_count > 0); + + zvec_collection_schema_destroy(schema); + } + + TEST_END(); +} + +void test_schema_with_indexes(void) { + TEST_START(); + + // Test Schema with scalar index + ZVecCollectionSchema *scalar_index_schema = + zvec_test_create_schema_with_scalar_index(true, true, + "scalar_index_test"); + TEST_ASSERT(scalar_index_schema != NULL); + if (scalar_index_schema) { + zvec_collection_schema_destroy(scalar_index_schema); + } + + // Test Schema with vector index + ZVecCollectionSchema *vector_index_schema = + zvec_test_create_schema_with_vector_index(false, "vector_index_test", + NULL); + TEST_ASSERT(vector_index_schema != NULL); + if (vector_index_schema) { + zvec_collection_schema_destroy(vector_index_schema); + } + + TEST_END(); +} + +void test_schema_max_doc_count(void) { + TEST_START(); + + // Test 1: Setting max doc count to a valid value + ZVecCollectionSchema *schema = zvec_collection_schema_create("max_doc_test"); + TEST_ASSERT(schema != NULL); + + ZVecErrorCode err = + zvec_collection_schema_set_max_doc_count_per_segment(schema, 1000); + TEST_ASSERT(err == ZVEC_OK); + + uint64_t max_doc_count = + zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(max_doc_count == 1000); + + zvec_collection_schema_destroy(schema); + + // Test 2: Setting max doc count to zero + schema = zvec_collection_schema_create("max_doc_test"); + TEST_ASSERT(schema != NULL); + + err = zvec_collection_schema_set_max_doc_count_per_segment(schema, 0); + TEST_ASSERT(err == ZVEC_OK); + + max_doc_count = zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(max_doc_count == 0); + + zvec_collection_schema_destroy(schema); + + // Test 3: Setting max doc count to maximum value + schema = zvec_collection_schema_create("max_doc_test"); + TEST_ASSERT(schema != NULL); + + err = + zvec_collection_schema_set_max_doc_count_per_segment(schema, UINT64_MAX); + TEST_ASSERT(err == ZVEC_OK); + + max_doc_count = zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(max_doc_count == UINT64_MAX); + + zvec_collection_schema_destroy(schema); + + TEST_END(); +} + +// ============================================================================= +// Collection-related tests +// ============================================================================= + +void test_collection_basic_operations(void) { + TEST_START(); + + // Create temporary directory + char temp_dir[] = "/tmp/zvec_test_collection_basic_operations"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Test collection operations + ZVecDoc *doc1 = zvec_test_create_doc(1, schema, NULL); + ZVecDoc *doc2 = zvec_test_create_doc(2, schema, NULL); + ZVecDoc *doc3 = zvec_test_create_doc(3, schema, NULL); + + TEST_ASSERT(doc1 != NULL); + TEST_ASSERT(doc2 != NULL); + TEST_ASSERT(doc3 != NULL); + + if (doc1 && doc2 && doc3) { + ZVecDoc *docs[] = {doc1, doc2, doc3}; + size_t success_count, error_count; + + // Test insert operation + err = zvec_collection_insert(collection, (const ZVecDoc **)docs, 3, + &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 3); + TEST_ASSERT(error_count == 0); + + // Test update operation + zvec_doc_set_score(doc1, 0.95f); + ZVecDoc *update_docs[] = {doc1}; + err = zvec_collection_update(collection, (const ZVecDoc **)update_docs, + 1, &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 1); + TEST_ASSERT(error_count == 0); + + // Test upsert operation + zvec_doc_set_pk(doc3, "pk_3_modified"); + ZVecDoc *upsert_docs[] = {doc3}; + err = zvec_collection_upsert(collection, (const ZVecDoc **)upsert_docs, + 1, &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 1); + TEST_ASSERT(error_count == 0); + + // Test delete operation by primary keys + const char *pks[] = {"pk_1", "pk_2"}; + err = zvec_collection_delete(collection, pks, 2, &success_count, + &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 2); + TEST_ASSERT(error_count == 0); + + // Test delete by filter + err = zvec_collection_delete_by_filter(collection, "id > 0"); + TEST_ASSERT(err == ZVEC_OK); + + // Clean up documents + zvec_doc_destroy(doc1); + zvec_doc_destroy(doc2); + zvec_doc_destroy(doc3); + } + + // Test collection flush + err = zvec_collection_flush(collection); + TEST_ASSERT(err == ZVEC_OK); + + // Test collection optimization + err = zvec_collection_optimize(collection); + TEST_ASSERT(err == ZVEC_OK); + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_edge_cases(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_edge_cases"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + + // Test empty name collection + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + if (collection) { + zvec_collection_destroy(collection); + collection = NULL; + } + + // Test long name collection + char long_name[256]; + memset(long_name, 'a', 255); + long_name[255] = '\0'; + + char long_path[512]; + snprintf(long_path, sizeof(long_path), "%s/%s", temp_dir, + "very_long_collection_name_that_tests_path_limits"); + + err = zvec_collection_create_and_open(long_path, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + if (collection) { + zvec_collection_destroy(collection); + collection = NULL; + } + + // Test NULL name集合 + err = zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err != ZVEC_OK); + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_delete_by_filter(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_delete_by_filter"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + // Test normal deletion filtering + err = zvec_collection_delete_by_filter(collection, "id > 1"); + TEST_ASSERT(err == ZVEC_OK); + + // Test NULL filter + err = zvec_collection_delete_by_filter(collection, NULL); + TEST_ASSERT(err != ZVEC_OK); + + // Test empty string filter + err = zvec_collection_delete_by_filter(collection, ""); + TEST_ASSERT(err == ZVEC_OK); + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_stats(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_stats"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + ZVecCollectionStats *stats = NULL; + err = zvec_collection_get_stats(collection, &stats); + TEST_ASSERT(err == ZVEC_OK); + + if (stats) { + // Basic validation of statistics + TEST_ASSERT(stats->doc_count == + 0); // New collection should have no documents + zvec_collection_stats_destroy(stats); + } + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +// ============================================================================= +// Field-related tests +// ============================================================================= + +void test_field_schema_functions(void) { + TEST_START(); + + // Test scalar field creation + ZVecFieldSchema scalar_field = {0}; + ZVecString name1 = {0}; + name1.data = "test_field"; + name1.length = 10; + scalar_field.name = &name1; + scalar_field.data_type = ZVEC_DATA_TYPE_STRING; + scalar_field.nullable = true; + scalar_field.dimension = 0; + + TEST_ASSERT(strcmp(scalar_field.name->data, "test_field") == 0); + TEST_ASSERT(scalar_field.data_type == ZVEC_DATA_TYPE_STRING); + TEST_ASSERT(scalar_field.nullable == true); + + // Test vector field creation + ZVecFieldSchema vector_field = {0}; + ZVecString name2 = {0}; + name2.data = "vec_field"; + name2.length = 9; + vector_field.name = &name2; + vector_field.data_type = ZVEC_DATA_TYPE_VECTOR_FP32; + vector_field.nullable = false; + vector_field.dimension = 128; + + TEST_ASSERT(strcmp(vector_field.name->data, "vec_field") == 0); + TEST_ASSERT(vector_field.data_type == ZVEC_DATA_TYPE_VECTOR_FP32); + TEST_ASSERT(vector_field.dimension == 128); + + // Test sparse vector field creation + ZVecFieldSchema sparse_field = {0}; + ZVecString name3 = {0}; + name3.data = "sparse_field"; + name3.length = 12; + sparse_field.name = &name3; + sparse_field.data_type = ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32; + sparse_field.nullable = false; + sparse_field.dimension = 0; + + TEST_ASSERT(strcmp(sparse_field.name->data, "sparse_field") == 0); + TEST_ASSERT(sparse_field.data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32); + + TEST_END(); +} + +void test_field_helper_functions(void) { + TEST_START(); + + // Test scalar field helper functions + ZVecIndexParams *invert_params = zvec_test_create_default_invert_params(true); + ZVecFieldSchema *scalar_field = zvec_test_create_scalar_field( + "test_scalar", ZVEC_DATA_TYPE_INT32, true, invert_params); + TEST_ASSERT(scalar_field != NULL); + if (scalar_field) { + TEST_ASSERT(strcmp(scalar_field->name->data, "test_scalar") == 0); + TEST_ASSERT(scalar_field->data_type == ZVEC_DATA_TYPE_INT32); + free(scalar_field); + } + if (invert_params) { + free(invert_params); + } + + // Test vector field helper functions + ZVecIndexParams *hnsw_params = zvec_test_create_default_hnsw_params(); + ZVecFieldSchema *vector_field = zvec_test_create_vector_field( + "test_vector", ZVEC_DATA_TYPE_VECTOR_FP32, 128, false, hnsw_params); + TEST_ASSERT(vector_field != NULL); + if (vector_field) { + TEST_ASSERT(strcmp(vector_field->name->data, "test_vector") == 0); + TEST_ASSERT(vector_field->data_type == ZVEC_DATA_TYPE_VECTOR_FP32); + TEST_ASSERT(vector_field->dimension == 128); + free(vector_field); + } + if (hnsw_params) { + free(hnsw_params); + } + + TEST_END(); +} + +// ============================================================================= +// Document-related tests +// ============================================================================= + +void test_doc_creation(void) { + TEST_START(); + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Test complete document creation + ZVecDoc *doc = zvec_test_create_doc(1, schema, NULL); + TEST_ASSERT(doc != NULL); + if (doc) { + zvec_doc_destroy(doc); + } + + // Test null value document creation + ZVecDoc *null_doc = zvec_test_create_doc_null(2, schema, NULL); + TEST_ASSERT(null_doc != NULL); + if (null_doc) { + zvec_doc_destroy(null_doc); + } + + zvec_collection_schema_destroy(schema); + } + + TEST_END(); +} + +void test_doc_primary_key(void) { + TEST_START(); + + // Test primary key generation + char *pk = zvec_test_make_pk(12345); + TEST_ASSERT(pk != NULL); + if (pk) { + TEST_ASSERT(strcmp(pk, "pk_12345") == 0); + free(pk); + } + + TEST_END(); +} + +// Test for zvec_doc_add_field_by_value - covers all data types +void test_doc_add_field_by_value(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + if (!doc) { + TEST_END(); + return; + } + + // Scalar types + // BINARY + const char *binary_data = "binary"; + ZVecErrorCode err = + zvec_doc_add_field_by_value(doc, "binary_field", ZVEC_DATA_TYPE_BINARY, + binary_data, strlen(binary_data)); + TEST_ASSERT(err == ZVEC_OK); + + // STRING + const char *string_data = "hello"; + err = zvec_doc_add_field_by_value(doc, "string_field", ZVEC_DATA_TYPE_STRING, + string_data, strlen(string_data)); + TEST_ASSERT(err == ZVEC_OK); + + // BOOL + bool bool_val = true; + err = zvec_doc_add_field_by_value(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_val, sizeof(bool_val)); + TEST_ASSERT(err == ZVEC_OK); + + // INT32 + int32_t int32_val = -12345; + err = zvec_doc_add_field_by_value(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &int32_val, sizeof(int32_val)); + TEST_ASSERT(err == ZVEC_OK); + + // INT64 + int64_t int64_val = -9876543210LL; + err = zvec_doc_add_field_by_value(doc, "int64_field", ZVEC_DATA_TYPE_INT64, + &int64_val, sizeof(int64_val)); + TEST_ASSERT(err == ZVEC_OK); + + // UINT32 + uint32_t uint32_val = 4294967295U; + err = zvec_doc_add_field_by_value(doc, "uint32_field", ZVEC_DATA_TYPE_UINT32, + &uint32_val, sizeof(uint32_val)); + TEST_ASSERT(err == ZVEC_OK); + + // UINT64 + uint64_t uint64_val = 18446744073709551615ULL; + err = zvec_doc_add_field_by_value(doc, "uint64_field", ZVEC_DATA_TYPE_UINT64, + &uint64_val, sizeof(uint64_val)); + TEST_ASSERT(err == ZVEC_OK); + + // FLOAT + float float_val = 3.14159f; + err = zvec_doc_add_field_by_value(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &float_val, sizeof(float_val)); + TEST_ASSERT(err == ZVEC_OK); + + // DOUBLE + double double_val = 3.14159265358979; + err = zvec_doc_add_field_by_value(doc, "double_field", ZVEC_DATA_TYPE_DOUBLE, + &double_val, sizeof(double_val)); + TEST_ASSERT(err == ZVEC_OK); + + // Vector types + // VECTOR_BINARY32 + uint32_t binary32_vec[] = {0xFFFFFFFF, 0x00000000, 0xAAAAAAAA, 0x55555555}; + err = zvec_doc_add_field_by_value(doc, "binary32_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY32, + binary32_vec, sizeof(binary32_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_BINARY64 + uint64_t binary64_vec[] = {0xFFFFFFFFFFFFFFFFULL, 0x0000000000000000ULL}; + err = zvec_doc_add_field_by_value(doc, "binary64_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY64, + binary64_vec, sizeof(binary64_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP16 + uint16_t fp16_vec[] = {0x3C00, 0x4000, 0xC000, 0x8000}; + err = zvec_doc_add_field_by_value(doc, "fp16_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP16, fp16_vec, + sizeof(fp16_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP32 + float fp32_vec[] = {1.0f, -2.0f, 3.5f, -4.5f}; + err = zvec_doc_add_field_by_value(doc, "fp32_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP32, fp32_vec, + sizeof(fp32_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP64 + double fp64_vec[] = {1.1, -2.2, 3.3, -4.4}; + err = zvec_doc_add_field_by_value(doc, "fp64_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP64, fp64_vec, + sizeof(fp64_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT4 (packed - each byte contains 2 values) + int8_t int4_vec[] = {0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xF0}; + err = zvec_doc_add_field_by_value(doc, "int4_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT4, int4_vec, + sizeof(int4_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT8 + int8_t int8_vec[] = {-128, -1, 0, 1, 127}; + err = zvec_doc_add_field_by_value(doc, "int8_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT8, int8_vec, + sizeof(int8_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT16 + int16_t int16_vec[] = {-32768, -1, 0, 1, 32767}; + err = zvec_doc_add_field_by_value(doc, "int16_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT16, int16_vec, + sizeof(int16_vec)); + TEST_ASSERT(err == ZVEC_OK); + + // Sparse vector types + // SPARSE_VECTOR_FP16 - format: [nnz(size_t)][indices...][values...] + size_t sparse_fp16_nnz = 3; + uint32_t sparse_fp16_indices[] = {0, 5, 10}; + uint16_t sparse_fp16_values[] = {0x3C00, 0x4000, 0xC000}; + size_t sparse_fp16_size = sizeof(sparse_fp16_nnz) + + sizeof(sparse_fp16_indices) + + sizeof(sparse_fp16_values); + char *sparse_fp16_buffer = (char *)malloc(sparse_fp16_size); + memcpy(sparse_fp16_buffer, &sparse_fp16_nnz, sizeof(sparse_fp16_nnz)); + memcpy(sparse_fp16_buffer + sizeof(sparse_fp16_nnz), sparse_fp16_indices, + sizeof(sparse_fp16_indices)); + memcpy(sparse_fp16_buffer + sizeof(sparse_fp16_nnz) + + sizeof(sparse_fp16_indices), + sparse_fp16_values, sizeof(sparse_fp16_values)); + err = zvec_doc_add_field_by_value(doc, "sparse_fp16_field", + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16, + sparse_fp16_buffer, sparse_fp16_size); + TEST_ASSERT(err == ZVEC_OK); + free(sparse_fp16_buffer); + + // SPARSE_VECTOR_FP32 + size_t sparse_fp32_nnz = 3; + uint32_t sparse_fp32_indices[] = {2, 7, 15}; + float sparse_fp32_values[] = {1.5f, -2.5f, 3.5f}; + size_t sparse_fp32_size = sizeof(sparse_fp32_nnz) + + sizeof(sparse_fp32_indices) + + sizeof(sparse_fp32_values); + char *sparse_fp32_buffer = (char *)malloc(sparse_fp32_size); + memcpy(sparse_fp32_buffer, &sparse_fp32_nnz, sizeof(sparse_fp32_nnz)); + memcpy(sparse_fp32_buffer + sizeof(sparse_fp32_nnz), sparse_fp32_indices, + sizeof(sparse_fp32_indices)); + memcpy(sparse_fp32_buffer + sizeof(sparse_fp32_nnz) + + sizeof(sparse_fp32_indices), + sparse_fp32_values, sizeof(sparse_fp32_values)); + err = zvec_doc_add_field_by_value(doc, "sparse_fp32_field", + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, + sparse_fp32_buffer, sparse_fp32_size); + TEST_ASSERT(err == ZVEC_OK); + free(sparse_fp32_buffer); + + // Array types + // ARRAY_BINARY - format: [length(uint32_t)][data][length][data]... + uint8_t array_bin_data[] = { + 1, 0, 0, 0, 0x01, // length=1, data=0x01 + 2, 0, 0, 0, 0x02, 0x03, // length=2, data=0x02,0x03 + 2, 0, 0, 0, 0x04, 0x05 // length=2, data=0x04,0x05 + }; + err = zvec_doc_add_field_by_value(doc, "array_binary_field", + ZVEC_DATA_TYPE_ARRAY_BINARY, array_bin_data, + sizeof(array_bin_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_STRING - null-terminated strings + const char *array_str_data[] = {"str1", "str2", "str3"}; + ZVecString *array_zvec_str[3]; + for (int i = 0; i < 3; i++) { + array_zvec_str[i] = zvec_string_create(array_str_data[i]); + } + err = zvec_doc_add_field_by_value(doc, "array_string_field", + ZVEC_DATA_TYPE_ARRAY_STRING, array_zvec_str, + sizeof(array_zvec_str)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_BOOL + bool array_bool_data[] = {true, false, true, false}; + err = zvec_doc_add_field_by_value(doc, "array_bool_field", + ZVEC_DATA_TYPE_ARRAY_BOOL, array_bool_data, + sizeof(array_bool_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT32 + int32_t array_int32_data[] = {-100, -50, 0, 50, 100}; + err = zvec_doc_add_field_by_value(doc, "array_int32_field", + ZVEC_DATA_TYPE_ARRAY_INT32, + array_int32_data, sizeof(array_int32_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT64 + int64_t array_int64_data[] = {-1000000, -500000, 0, 500000, 1000000}; + err = zvec_doc_add_field_by_value(doc, "array_int64_field", + ZVEC_DATA_TYPE_ARRAY_INT64, + array_int64_data, sizeof(array_int64_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT32 + uint32_t array_uint32_data[] = {0, 100, 1000, 10000, 4294967295U}; + err = zvec_doc_add_field_by_value( + doc, "array_uint32_field", ZVEC_DATA_TYPE_ARRAY_UINT32, array_uint32_data, + sizeof(array_uint32_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT64 + uint64_t array_uint64_data[] = {0, 100, 1000, 10000, 18446744073709551615ULL}; + err = zvec_doc_add_field_by_value( + doc, "array_uint64_field", ZVEC_DATA_TYPE_ARRAY_UINT64, array_uint64_data, + sizeof(array_uint64_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_FLOAT + float array_float_data[] = {-1.5f, -0.5f, 0.0f, 0.5f, 1.5f}; + err = zvec_doc_add_field_by_value(doc, "array_float_field", + ZVEC_DATA_TYPE_ARRAY_FLOAT, + array_float_data, sizeof(array_float_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_DOUBLE + double array_double_data[] = {-1.1, -0.1, 0.0, 0.1, 1.1}; + err = zvec_doc_add_field_by_value( + doc, "array_double_field", ZVEC_DATA_TYPE_ARRAY_DOUBLE, array_double_data, + sizeof(array_double_data)); + TEST_ASSERT(err == ZVEC_OK); + + // Verify we can retrieve some of the values + void *result = NULL; + size_t result_size = 0; + err = zvec_doc_get_field_value_copy(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &result, &result_size); + TEST_ASSERT(err == ZVEC_OK && result_size == sizeof(int32_t)); + if (result) { + TEST_ASSERT(*(int32_t *)result == -12345); + free(result); + } + + err = zvec_doc_get_field_value_copy(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &result, &result_size); + TEST_ASSERT(err == ZVEC_OK && result_size == sizeof(float)); + if (result) { + TEST_ASSERT(fabs(*(float *)result - 3.14159f) < 0.0001f); + free(result); + } + + zvec_doc_destroy(doc); + TEST_END(); +} + +// Test for zvec_doc_add_field_by_struct - covers all data types +void test_doc_add_field_by_struct(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + if (!doc) { + TEST_END(); + return; + } + + ZVecErrorCode err; + ZVecDocField field; + + // Scalar types + // BINARY + memset(&field, 0, sizeof(field)); + field.name.data = "binary_field"; + field.name.length = strlen("binary_field"); + field.data_type = ZVEC_DATA_TYPE_BINARY; + uint8_t binary_data[] = {0x01, 0x02, 0x03, 0x04}; + field.value.binary_value.data = binary_data; + field.value.binary_value.length = sizeof(binary_data); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // STRING + memset(&field, 0, sizeof(field)); + field.name.data = "string_field"; + field.name.length = strlen("string_field"); + field.data_type = ZVEC_DATA_TYPE_STRING; + const char *string_data = "hello world"; + field.value.string_value.data = (char *)string_data; + field.value.string_value.length = strlen(string_data); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // BOOL + memset(&field, 0, sizeof(field)); + field.name.data = "bool_field"; + field.name.length = strlen("bool_field"); + field.data_type = ZVEC_DATA_TYPE_BOOL; + field.value.bool_value = true; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // INT32 + memset(&field, 0, sizeof(field)); + field.name.data = "int32_field"; + field.name.length = strlen("int32_field"); + field.data_type = ZVEC_DATA_TYPE_INT32; + field.value.int32_value = -12345; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // INT64 + memset(&field, 0, sizeof(field)); + field.name.data = "int64_field"; + field.name.length = strlen("int64_field"); + field.data_type = ZVEC_DATA_TYPE_INT64; + field.value.int64_value = -9876543210LL; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // UINT32 + memset(&field, 0, sizeof(field)); + field.name.data = "uint32_field"; + field.name.length = strlen("uint32_field"); + field.data_type = ZVEC_DATA_TYPE_UINT32; + field.value.uint32_value = 4294967295U; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // UINT64 + memset(&field, 0, sizeof(field)); + field.name.data = "uint64_field"; + field.name.length = strlen("uint64_field"); + field.data_type = ZVEC_DATA_TYPE_UINT64; + field.value.uint64_value = 18446744073709551615ULL; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // FLOAT + memset(&field, 0, sizeof(field)); + field.name.data = "float_field"; + field.name.length = strlen("float_field"); + field.data_type = ZVEC_DATA_TYPE_FLOAT; + field.value.float_value = 3.14159f; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // DOUBLE + memset(&field, 0, sizeof(field)); + field.name.data = "double_field"; + field.name.length = strlen("double_field"); + field.data_type = ZVEC_DATA_TYPE_DOUBLE; + field.value.double_value = 3.14159265358979; + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_BINARY32 + memset(&field, 0, sizeof(field)); + field.name.data = "binary32_vec_field"; + field.name.length = strlen("binary32_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_BINARY32; + uint32_t binary32_vec[] = {0xFFFFFFFF, 0x00000000, 0xAAAAAAAA, 0x55555555}; + field.value.vector_value.data = (const float *)binary32_vec; + field.value.vector_value.length = sizeof(binary32_vec) / sizeof(uint32_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_BINARY64 + memset(&field, 0, sizeof(field)); + field.name.data = "binary64_vec_field"; + field.name.length = strlen("binary64_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_BINARY64; + uint64_t binary64_vec[] = {0xFFFFFFFFFFFFFFFFULL, 0x0000000000000000ULL}; + field.value.vector_value.data = (const float *)binary64_vec; + field.value.vector_value.length = sizeof(binary64_vec) / sizeof(uint64_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP16 + memset(&field, 0, sizeof(field)); + field.name.data = "fp16_vec_field"; + field.name.length = strlen("fp16_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_FP16; + uint16_t fp16_vec[] = {0x3C00, 0x4000, 0xC000, 0x8000}; + field.value.vector_value.data = (const float *)fp16_vec; + field.value.vector_value.length = sizeof(fp16_vec) / sizeof(uint16_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP32 + memset(&field, 0, sizeof(field)); + field.name.data = "fp32_vec_field"; + field.name.length = strlen("fp32_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_FP32; + float fp32_vec[] = {1.0f, -2.0f, 3.5f, -4.5f}; + field.value.vector_value.data = fp32_vec; + field.value.vector_value.length = sizeof(fp32_vec) / sizeof(float); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP64 + memset(&field, 0, sizeof(field)); + field.name.data = "fp64_vec_field"; + field.name.length = strlen("fp64_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_FP64; + double fp64_vec[] = {1.1, -2.2, 3.3, -4.4}; + field.value.vector_value.data = (const float *)fp64_vec; + field.value.vector_value.length = sizeof(fp64_vec) / sizeof(double); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT4 + memset(&field, 0, sizeof(field)); + field.name.data = "int4_vec_field"; + field.name.length = strlen("int4_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_INT4; + int8_t int4_vec[] = {0x12, 0x34, 0x56, 0x78}; + field.value.vector_value.data = (const float *)int4_vec; + field.value.vector_value.length = + sizeof(int4_vec) * 2; // Each byte contains 2 values + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT8 + memset(&field, 0, sizeof(field)); + field.name.data = "int8_vec_field"; + field.name.length = strlen("int8_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_INT8; + int8_t int8_vec[] = {-128, -1, 0, 1, 127}; + field.value.vector_value.data = (const float *)int8_vec; + field.value.vector_value.length = sizeof(int8_vec) / sizeof(int8_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT16 + memset(&field, 0, sizeof(field)); + field.name.data = "int16_vec_field"; + field.name.length = strlen("int16_vec_field"); + field.data_type = ZVEC_DATA_TYPE_VECTOR_INT16; + int16_t int16_vec[] = {-32768, -1, 0, 1, 32767}; + field.value.vector_value.data = (const float *)int16_vec; + field.value.vector_value.length = sizeof(int16_vec) / sizeof(int16_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // Sparse vector types + // SPARSE_VECTOR_FP16 + memset(&field, 0, sizeof(field)); + field.name.data = "sparse_fp16_field"; + field.name.length = strlen("sparse_fp16_field"); + field.data_type = ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16; + uint16_t sparse_fp16_values[] = {0x3C00, 0x4000, 0xC000}; + field.value.vector_value.data = (const float *)sparse_fp16_values; + field.value.vector_value.length = + sizeof(sparse_fp16_values) / sizeof(uint16_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // SPARSE_VECTOR_FP32 + memset(&field, 0, sizeof(field)); + field.name.data = "sparse_fp32_field"; + field.name.length = strlen("sparse_fp32_field"); + field.data_type = ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32; + float sparse_fp32_values[] = {1.5f, -2.5f, 3.5f}; + field.value.vector_value.data = sparse_fp32_values; + field.value.vector_value.length = sizeof(sparse_fp32_values) / sizeof(float); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // Array types + // ARRAY_BINARY + memset(&field, 0, sizeof(field)); + field.name.data = "array_binary_field"; + field.name.length = strlen("array_binary_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_BINARY; + uint8_t array_bin_data[] = { + 1, 0, 0, 0, 0x01, // length=1, data=0x01 + 2, 0, 0, 0, 0x02, 0x03, // length=2, data=0x02,0x03 + 2, 0, 0, 0, 0x04, 0x05 // length=2, data=0x04,0x05 + }; + field.value.binary_value.data = array_bin_data; + field.value.binary_value.length = sizeof(array_bin_data); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_STRING + memset(&field, 0, sizeof(field)); + field.name.data = "array_string_field"; + field.name.length = strlen("array_string_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_STRING; + const char array_string_data[] = "str1\0str2\0str3\0"; + field.value.string_value.data = (char *)array_string_data; + field.value.string_value.length = sizeof(array_string_data); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_BOOL + memset(&field, 0, sizeof(field)); + field.name.data = "array_bool_field"; + field.name.length = strlen("array_bool_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_BOOL; + bool array_bool_data[] = {true, false, true, false}; + field.value.binary_value.data = (const uint8_t *)array_bool_data; + field.value.binary_value.length = sizeof(array_bool_data); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT32 + memset(&field, 0, sizeof(field)); + field.name.data = "array_int32_field"; + field.name.length = strlen("array_int32_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_INT32; + int32_t array_int32_data[] = {-100, -50, 0, 50, 100}; + field.value.vector_value.data = (const float *)array_int32_data; + field.value.vector_value.length = sizeof(array_int32_data) / sizeof(int32_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT64 + memset(&field, 0, sizeof(field)); + field.name.data = "array_int64_field"; + field.name.length = strlen("array_int64_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_INT64; + int64_t array_int64_data[] = {-1000000, -500000, 0, 500000, 1000000}; + field.value.vector_value.data = (const float *)array_int64_data; + field.value.vector_value.length = sizeof(array_int64_data) / sizeof(int64_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT32 + memset(&field, 0, sizeof(field)); + field.name.data = "array_uint32_field"; + field.name.length = strlen("array_uint32_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_UINT32; + uint32_t array_uint32_data[] = {0, 100, 1000, 10000, 4294967295U}; + field.value.vector_value.data = (const float *)array_uint32_data; + field.value.vector_value.length = + sizeof(array_uint32_data) / sizeof(uint32_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT64 + memset(&field, 0, sizeof(field)); + field.name.data = "array_uint64_field"; + field.name.length = strlen("array_uint64_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_UINT64; + uint64_t array_uint64_data[] = {0, 100, 1000, 10000, 18446744073709551615ULL}; + field.value.vector_value.data = (const float *)array_uint64_data; + field.value.vector_value.length = + sizeof(array_uint64_data) / sizeof(uint64_t); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_FLOAT + memset(&field, 0, sizeof(field)); + field.name.data = "array_float_field"; + field.name.length = strlen("array_float_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_FLOAT; + float array_float_data[] = {-1.5f, -0.5f, 0.0f, 0.5f, 1.5f}; + field.value.vector_value.data = array_float_data; + field.value.vector_value.length = sizeof(array_float_data) / sizeof(float); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_DOUBLE + memset(&field, 0, sizeof(field)); + field.name.data = "array_double_field"; + field.name.length = strlen("array_double_field"); + field.data_type = ZVEC_DATA_TYPE_ARRAY_DOUBLE; + double array_double_data[] = {-1.1, -0.1, 0.0, 0.1, 1.1}; + field.value.vector_value.data = (const float *)array_double_data; + field.value.vector_value.length = sizeof(array_double_data) / sizeof(double); + err = zvec_doc_add_field_by_struct(doc, &field); + TEST_ASSERT(err == ZVEC_OK); + + // Verify we can retrieve some of the values + void *result = NULL; + size_t result_size = 0; + + err = zvec_doc_get_field_value_copy(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &result, &result_size); + TEST_ASSERT(err == ZVEC_OK && result_size == sizeof(int32_t)); + if (result) { + TEST_ASSERT(*(int32_t *)result == -12345); + free(result); + } + + err = zvec_doc_get_field_value_copy(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &result, &result_size); + TEST_ASSERT(err == ZVEC_OK && result_size == sizeof(float)); + if (result) { + TEST_ASSERT(fabs(*(float *)result - 3.14159f) < 0.0001f); + free(result); + } + + zvec_doc_destroy(doc); + TEST_END(); +} + +void test_doc_basic_operations(void); +void test_doc_null_field_api(void); +void test_doc_get_field_value_basic(void); +void test_doc_get_field_value_copy(void); +void test_doc_get_field_value_pointer(void); +void test_doc_field_operations(void); +void test_doc_error_conditions(void); +void test_doc_serialization(void); +void test_doc_add_field_by_value(void); +void test_doc_add_field_by_struct(void); + +void test_doc_functions(void) { + test_doc_basic_operations(); + test_doc_null_field_api(); + test_doc_get_field_value_basic(); + test_doc_get_field_value_copy(); + test_doc_get_field_value_pointer(); + test_doc_field_operations(); + test_doc_error_conditions(); + test_doc_serialization(); +} + +void test_doc_basic_operations(void) { + TEST_START(); + + // Create test document + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + // Test primary key operations + zvec_doc_set_pk(doc, "test_doc_complete"); + const char *pk = zvec_doc_get_pk_pointer(doc); + TEST_ASSERT(pk != NULL); + TEST_ASSERT(strcmp(pk, "test_doc_complete") == 0); + + // Test document ID and score operations + zvec_doc_set_doc_id(doc, 99999); + uint64_t doc_id = zvec_doc_get_doc_id(doc); + TEST_ASSERT(doc_id == 99999); + + zvec_doc_set_score(doc, 0.95f); + float score = zvec_doc_get_score(doc); + TEST_ASSERT(score == 0.95f); + + // Test operator operations + zvec_doc_set_operator(doc, ZVEC_DOC_OP_INSERT); + ZVecDocOperator op = zvec_doc_get_operator(doc); + TEST_ASSERT(op == ZVEC_DOC_OP_INSERT); + + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_null_field_api(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + if (!doc) { + TEST_END(); + return; + } + + ZVecErrorCode err = zvec_doc_set_field_null(doc, "nullable_field"); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(zvec_doc_has_field(doc, "nullable_field") == true); + TEST_ASSERT(zvec_doc_has_field_value(doc, "nullable_field") == false); + TEST_ASSERT(zvec_doc_is_field_null(doc, "nullable_field") == true); + + err = zvec_doc_set_field_null(NULL, "nullable_field"); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + err = zvec_doc_set_field_null(doc, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + zvec_doc_destroy(doc); + TEST_END(); +} + +void test_doc_get_field_value_basic(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + ZVecErrorCode err; + + printf( + "=== Testing zvec_doc_get_field_value_basic with all supported types " + "===\n"); + + // BOOL type + ZVecDocField bool_field; + bool_field.name.data = "bool_field"; + bool_field.name.length = strlen("bool_field"); + bool_field.data_type = ZVEC_DATA_TYPE_BOOL; + bool_field.value.bool_value = true; + err = zvec_doc_add_field_by_struct(doc, &bool_field); + TEST_ASSERT(err == ZVEC_OK); + + bool bool_result; + err = zvec_doc_get_field_value_basic(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_result, sizeof(bool_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bool_result == true); + + // INT32 type + ZVecDocField int32_field; + int32_field.name.data = "int32_field"; + int32_field.name.length = strlen("int32_field"); + int32_field.data_type = ZVEC_DATA_TYPE_INT32; + int32_field.value.int32_value = -2147483648; // Min int32 + err = zvec_doc_add_field_by_struct(doc, &int32_field); + TEST_ASSERT(err == ZVEC_OK); + + int32_t int32_result; + err = zvec_doc_get_field_value_basic(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &int32_result, sizeof(int32_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int32_result == -2147483648); + + // INT64 type + ZVecDocField int64_field; + int64_field.name.data = "int64_field"; + int64_field.name.length = strlen("int64_field"); + int64_field.data_type = ZVEC_DATA_TYPE_INT64; + int64_field.value.int64_value = 9223372036854775807LL; // Max int64 + err = zvec_doc_add_field_by_struct(doc, &int64_field); + TEST_ASSERT(err == ZVEC_OK); + + int64_t int64_result; + err = zvec_doc_get_field_value_basic(doc, "int64_field", ZVEC_DATA_TYPE_INT64, + &int64_result, sizeof(int64_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int64_result == 9223372036854775807LL); + + // UINT32 type + ZVecDocField uint32_field; + uint32_field.name.data = "uint32_field"; + uint32_field.name.length = strlen("uint32_field"); + uint32_field.data_type = ZVEC_DATA_TYPE_UINT32; + uint32_field.value.uint32_value = 4294967295U; // Max uint32 + err = zvec_doc_add_field_by_struct(doc, &uint32_field); + TEST_ASSERT(err == ZVEC_OK); + + uint32_t uint32_result; + err = + zvec_doc_get_field_value_basic(doc, "uint32_field", ZVEC_DATA_TYPE_UINT32, + &uint32_result, sizeof(uint32_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint32_result == 4294967295U); + + // UINT64 type + ZVecDocField uint64_field; + uint64_field.name.data = "uint64_field"; + uint64_field.name.length = strlen("uint64_field"); + uint64_field.data_type = ZVEC_DATA_TYPE_UINT64; + uint64_field.value.uint64_value = 18446744073709551615ULL; // Max uint64 + err = zvec_doc_add_field_by_struct(doc, &uint64_field); + TEST_ASSERT(err == ZVEC_OK); + + uint64_t uint64_result; + err = + zvec_doc_get_field_value_basic(doc, "uint64_field", ZVEC_DATA_TYPE_UINT64, + &uint64_result, sizeof(uint64_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint64_result == 18446744073709551615ULL); + + // FLOAT type + ZVecDocField float_field; + float_field.name.data = "float_field"; + float_field.name.length = strlen("float_field"); + float_field.data_type = ZVEC_DATA_TYPE_FLOAT; + float_field.value.float_value = 3.14159265359f; + err = zvec_doc_add_field_by_struct(doc, &float_field); + TEST_ASSERT(err == ZVEC_OK); + + float float_result; + err = zvec_doc_get_field_value_basic(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &float_result, sizeof(float_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fabsf(float_result - 3.14159265359f) < 1e-6f); + + // DOUBLE type + ZVecDocField double_field; + double_field.name.data = "double_field"; + double_field.name.length = strlen("double_field"); + double_field.data_type = ZVEC_DATA_TYPE_DOUBLE; + double_field.value.double_value = 2.71828182845904523536; + err = zvec_doc_add_field_by_struct(doc, &double_field); + TEST_ASSERT(err == ZVEC_OK); + + double double_result; + err = + zvec_doc_get_field_value_basic(doc, "double_field", ZVEC_DATA_TYPE_DOUBLE, + &double_result, sizeof(double_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fabs(double_result - 2.71828182845904523536) < 1e-15); + + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_get_field_value_copy(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + ZVecErrorCode err; + + printf( + "=== Testing zvec_doc_get_field_value_copy with all supported types " + "===\n"); + + // Basic scalar types first + bool bool_val = true; + err = zvec_doc_add_field_by_value(doc, "bool_field2", ZVEC_DATA_TYPE_BOOL, + &bool_val, sizeof(bool_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *bool_copy_result; + size_t bool_copy_size; + err = zvec_doc_get_field_value_copy(doc, "bool_field2", ZVEC_DATA_TYPE_BOOL, + &bool_copy_result, &bool_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bool_copy_result != NULL); + TEST_ASSERT(bool_copy_size == sizeof(bool)); + TEST_ASSERT(*(bool *)bool_copy_result == true); + free(bool_copy_result); + + int32_t int32_val = -12345; + err = zvec_doc_add_field_by_value(doc, "int32_field2", ZVEC_DATA_TYPE_INT32, + &int32_val, sizeof(int32_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *int32_copy_result; + size_t int32_copy_size; + err = zvec_doc_get_field_value_copy(doc, "int32_field2", ZVEC_DATA_TYPE_INT32, + &int32_copy_result, &int32_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int32_copy_result != NULL); + TEST_ASSERT(int32_copy_size == sizeof(int32_t)); + TEST_ASSERT(*(int32_t *)int32_copy_result == -12345); + free(int32_copy_result); + + int64_t int64_val = -9223372036854775807LL; + err = zvec_doc_add_field_by_value(doc, "int64_field2", ZVEC_DATA_TYPE_INT64, + &int64_val, sizeof(int64_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *int64_copy_result; + size_t int64_copy_size; + err = zvec_doc_get_field_value_copy(doc, "int64_field2", ZVEC_DATA_TYPE_INT64, + &int64_copy_result, &int64_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int64_copy_result != NULL); + TEST_ASSERT(int64_copy_size == sizeof(int64_t)); + TEST_ASSERT(*(int64_t *)int64_copy_result == -9223372036854775807LL); + free(int64_copy_result); + + uint32_t uint32_val = 4000000000U; + err = zvec_doc_add_field_by_value(doc, "uint32_field2", ZVEC_DATA_TYPE_UINT32, + &uint32_val, sizeof(uint32_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *uint32_copy_result; + size_t uint32_copy_size; + err = + zvec_doc_get_field_value_copy(doc, "uint32_field2", ZVEC_DATA_TYPE_UINT32, + &uint32_copy_result, &uint32_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint32_copy_result != NULL); + TEST_ASSERT(uint32_copy_size == sizeof(uint32_t)); + TEST_ASSERT(*(uint32_t *)uint32_copy_result == 4000000000U); + free(uint32_copy_result); + + uint64_t uint64_val = 18000000000000000000ULL; + err = zvec_doc_add_field_by_value(doc, "uint64_field2", ZVEC_DATA_TYPE_UINT64, + &uint64_val, sizeof(uint64_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *uint64_copy_result; + size_t uint64_copy_size; + err = + zvec_doc_get_field_value_copy(doc, "uint64_field2", ZVEC_DATA_TYPE_UINT64, + &uint64_copy_result, &uint64_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint64_copy_result != NULL); + TEST_ASSERT(uint64_copy_size == sizeof(uint64_t)); + TEST_ASSERT(*(uint64_t *)uint64_copy_result == 18000000000000000000ULL); + free(uint64_copy_result); + + float float_val = 3.14159265f; + err = zvec_doc_add_field_by_value(doc, "float_field2", ZVEC_DATA_TYPE_FLOAT, + &float_val, sizeof(float_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *float_copy_result; + size_t float_copy_size; + err = zvec_doc_get_field_value_copy(doc, "float_field2", ZVEC_DATA_TYPE_FLOAT, + &float_copy_result, &float_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(float_copy_result != NULL); + TEST_ASSERT(float_copy_size == sizeof(float)); + TEST_ASSERT(fabs(*(float *)float_copy_result - 3.14159265f) < 1e-6f); + free(float_copy_result); + + double double_val = 2.718281828459045; + err = zvec_doc_add_field_by_value(doc, "double_field2", ZVEC_DATA_TYPE_DOUBLE, + &double_val, sizeof(double_val)); + TEST_ASSERT(err == ZVEC_OK); + + void *double_copy_result; + size_t double_copy_size; + err = + zvec_doc_get_field_value_copy(doc, "double_field2", ZVEC_DATA_TYPE_DOUBLE, + &double_copy_result, &double_copy_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(double_copy_result != NULL); + TEST_ASSERT(double_copy_size == sizeof(double)); + TEST_ASSERT(fabs(*(double *)double_copy_result - 2.718281828459045) < 1e-15); + free(double_copy_result); + + // String and binary types + ZVecDocField string_field; + string_field.name.data = "string_field"; + string_field.name.length = strlen("string_field"); + string_field.data_type = ZVEC_DATA_TYPE_STRING; + string_field.value.string_value = *zvec_string_create("Hello, 世界!"); + err = zvec_doc_add_field_by_struct(doc, &string_field); + TEST_ASSERT(err == ZVEC_OK); + + void *string_result; + size_t string_size; + err = zvec_doc_get_field_value_copy( + doc, "string_field", ZVEC_DATA_TYPE_STRING, &string_result, &string_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(string_result != NULL); + TEST_ASSERT(string_size == strlen("Hello, 世界!")); + TEST_ASSERT(memcmp(string_result, "Hello, 世界!", string_size) == 0); + free(string_result); + + ZVecDocField binary_field; + binary_field.name.data = "binary_field"; + binary_field.name.length = strlen("binary_field"); + binary_field.data_type = ZVEC_DATA_TYPE_BINARY; + uint8_t binary_data[] = {0x00, 0x01, 0x02, 0xFF, 0xFE, 0xFD}; + binary_field.value.string_value = + *zvec_bin_create(binary_data, sizeof(binary_data)); + err = zvec_doc_add_field_by_struct(doc, &binary_field); + TEST_ASSERT(err == ZVEC_OK); + + void *binary_result; + size_t binary_size; + err = zvec_doc_get_field_value_copy( + doc, "binary_field", ZVEC_DATA_TYPE_BINARY, &binary_result, &binary_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(binary_result != NULL); + TEST_ASSERT(binary_size == 6); + TEST_ASSERT(memcmp(binary_result, "\x00\x01\x02\xFF\xFE\xFD", binary_size) == + 0); + free(binary_result); + + // VECTOR_FP32 type + float test_vector[] = {1.1f, 2.2f, 3.3f, 4.4f, 5.5f}; + ZVecDocField fp32_vec_field; + fp32_vec_field.name.data = "fp32_vec_field"; + fp32_vec_field.name.length = strlen("fp32_vec_field"); + fp32_vec_field.data_type = ZVEC_DATA_TYPE_VECTOR_FP32; + fp32_vec_field.value.vector_value.data = test_vector; + fp32_vec_field.value.vector_value.length = 5; + err = zvec_doc_add_field_by_struct(doc, &fp32_vec_field); + TEST_ASSERT(err == ZVEC_OK); + + void *fp32_vec_result; + size_t fp32_vec_size; + err = zvec_doc_get_field_value_copy(doc, "fp32_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP32, + &fp32_vec_result, &fp32_vec_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp32_vec_result != NULL); + TEST_ASSERT(fp32_vec_size == 5 * sizeof(float)); + TEST_ASSERT(memcmp(fp32_vec_result, test_vector, fp32_vec_size) == 0); + free(fp32_vec_result); + + // VECTOR_FP16 type (16-bit float vector) + uint16_t fp16_data[] = {0x3C00, 0x4000, 0x4200, + 0x4400}; // FP16: 1.0, 2.0, 3.0, 4.0 + err = zvec_doc_add_field_by_value(doc, "fp16_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP16, fp16_data, + sizeof(fp16_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *fp16_result; + size_t fp16_size; + err = zvec_doc_get_field_value_copy(doc, "fp16_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP16, &fp16_result, + &fp16_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp16_result != NULL); + TEST_ASSERT(fp16_size == sizeof(fp16_data)); + TEST_ASSERT(memcmp(fp16_result, fp16_data, fp16_size) == 0); + free(fp16_result); + + // VECTOR_INT8 type + int8_t int8_data[] = {-128, -1, 0, 1, 127}; + err = zvec_doc_add_field_by_value(doc, "int8_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT8, int8_data, + sizeof(int8_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *int8_result; + size_t int8_size; + err = zvec_doc_get_field_value_copy(doc, "int8_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT8, &int8_result, + &int8_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int8_result != NULL); + TEST_ASSERT(int8_size == sizeof(int8_data)); + TEST_ASSERT(memcmp(int8_result, int8_data, int8_size) == 0); + free(int8_result); + + // VECTOR_BINARY32 type (32-bit aligned binary vector) + uint8_t bin32_data[] = {0xAA, 0x55, 0xAA, 0x55}; + err = zvec_doc_add_field_by_value(doc, "bin32_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY32, bin32_data, + sizeof(bin32_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *bin32_result; + size_t bin32_size; + err = zvec_doc_get_field_value_copy(doc, "bin32_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY32, + &bin32_result, &bin32_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bin32_result != NULL); + TEST_ASSERT(bin32_size == sizeof(bin32_data)); + TEST_ASSERT(memcmp(bin32_result, bin32_data, bin32_size) == 0); + free(bin32_result); + + // VECTOR_BINARY64 type (64-bit aligned binary vector) + uint64_t bin64_data[] = {0xAA55AA55AA55AA55ULL, 0x55AA55AA55AA55AAULL}; + err = zvec_doc_add_field_by_value(doc, "bin64_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY64, bin64_data, + sizeof(bin64_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *bin64_result; + size_t bin64_size; + err = zvec_doc_get_field_value_copy(doc, "bin64_vec_field", + ZVEC_DATA_TYPE_VECTOR_BINARY64, + &bin64_result, &bin64_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bin64_result != NULL); + TEST_ASSERT(bin64_size == sizeof(bin64_data)); + TEST_ASSERT(memcmp(bin64_result, bin64_data, bin64_size) == 0); + free(bin64_result); + + // VECTOR_FP64 type (double precision vector) + double fp64_data[] = {1.1, 2.2, 3.3, 4.4}; + err = zvec_doc_add_field_by_value(doc, "fp64_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP64, fp64_data, + sizeof(fp64_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *fp64_result; + size_t fp64_size; + err = zvec_doc_get_field_value_copy(doc, "fp64_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP64, &fp64_result, + &fp64_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp64_result != NULL); + TEST_ASSERT(fp64_size == sizeof(fp64_data)); + TEST_ASSERT(memcmp(fp64_result, fp64_data, fp64_size) == 0); + free(fp64_result); + + // VECTOR_INT16 type + int16_t int16_data[] = {-32768, -1, 0, 1, 32767}; + err = zvec_doc_add_field_by_value(doc, "int16_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT16, int16_data, + sizeof(int16_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *int16_result; + size_t int16_size; + err = zvec_doc_get_field_value_copy(doc, "int16_vec_field", + ZVEC_DATA_TYPE_VECTOR_INT16, + &int16_result, &int16_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int16_result != NULL); + TEST_ASSERT(int16_size == sizeof(int16_data)); + TEST_ASSERT(memcmp(int16_result, int16_data, int16_size) == 0); + free(int16_result); + + // SPARSE_VECTOR_FP16 type - format: [nnz(uint32_t)][indices...][values...] + uint32_t sparse_fp16_nnz = 3; + size_t sparse_fp16_size_input = + sizeof(uint32_t) + + sparse_fp16_nnz * (sizeof(uint32_t) + sizeof(uint16_t)); + void *sparse_fp16_input = malloc(sparse_fp16_size_input); + uint32_t *fp16_nnz_ptr = (uint32_t *)sparse_fp16_input; + *fp16_nnz_ptr = sparse_fp16_nnz; + uint32_t *fp16_indices = + (uint32_t *)((char *)sparse_fp16_input + sizeof(uint32_t)); + uint16_t *fp16_values = + (uint16_t *)((char *)sparse_fp16_input + sizeof(uint32_t) + + sparse_fp16_nnz * sizeof(uint32_t)); + fp16_indices[0] = 0; + fp16_indices[1] = 5; + fp16_indices[2] = 10; + fp16_values[0] = 0x3C00; + fp16_values[1] = 0x4000; + fp16_values[2] = 0x4200; // FP16: 1.0, 2.0, 3.0 + err = zvec_doc_add_field_by_value(doc, "sparse_fp16_field", + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16, + sparse_fp16_input, sparse_fp16_size_input); + TEST_ASSERT(err == ZVEC_OK); + free(sparse_fp16_input); + + void *sparse_fp16_result; + size_t sparse_fp16_result_size; + err = zvec_doc_get_field_value_copy( + doc, "sparse_fp16_field", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16, + &sparse_fp16_result, &sparse_fp16_result_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(sparse_fp16_result != NULL); + // Sparse vector format: [nnz(size_t)][indices...][values...] + size_t retrieved_nnz = *(size_t *)sparse_fp16_result; + TEST_ASSERT(retrieved_nnz == 3); + uint32_t *retrieved_fp16_indices = + (uint32_t *)((char *)sparse_fp16_result + sizeof(size_t)); + uint16_t *retrieved_fp16_vals = + (uint16_t *)((char *)sparse_fp16_result + sizeof(size_t) + + retrieved_nnz * sizeof(uint32_t)); + TEST_ASSERT(retrieved_fp16_indices[0] == 0); + TEST_ASSERT(retrieved_fp16_indices[1] == 5); + TEST_ASSERT(retrieved_fp16_indices[2] == 10); + TEST_ASSERT(retrieved_fp16_vals[0] == 0x3C00); + TEST_ASSERT(retrieved_fp16_vals[1] == 0x4000); + TEST_ASSERT(retrieved_fp16_vals[2] == 0x4200); + free(sparse_fp16_result); + + // SPARSE_VECTOR_FP32 type - format: [nnz(uint32_t)][indices...][values...] + uint32_t sparse_fp32_nnz = 4; + size_t sparse_fp32_size_input = + sizeof(uint32_t) + sparse_fp32_nnz * (sizeof(uint32_t) + sizeof(float)); + void *sparse_fp32_input = malloc(sparse_fp32_size_input); + uint32_t *fp32_nnz_ptr = (uint32_t *)sparse_fp32_input; + *fp32_nnz_ptr = sparse_fp32_nnz; + uint32_t *fp32_indices = + (uint32_t *)((char *)sparse_fp32_input + sizeof(uint32_t)); + float *fp32_values = (float *)((char *)sparse_fp32_input + sizeof(uint32_t) + + sparse_fp32_nnz * sizeof(uint32_t)); + fp32_indices[0] = 2; + fp32_indices[1] = 7; + fp32_indices[2] = 15; + fp32_indices[3] = 20; + fp32_values[0] = 1.5f; + fp32_values[1] = 2.5f; + fp32_values[2] = 3.5f; + fp32_values[3] = 4.5f; + err = zvec_doc_add_field_by_value(doc, "sparse_fp32_field", + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, + sparse_fp32_input, sparse_fp32_size_input); + TEST_ASSERT(err == ZVEC_OK); + free(sparse_fp32_input); + + void *sparse_fp32_result; + size_t sparse_fp32_result_size; + err = zvec_doc_get_field_value_copy( + doc, "sparse_fp32_field", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, + &sparse_fp32_result, &sparse_fp32_result_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(sparse_fp32_result != NULL); + retrieved_nnz = *(size_t *)sparse_fp32_result; + TEST_ASSERT(retrieved_nnz == 4); + uint32_t *retrieved_fp32_indices = + (uint32_t *)((char *)sparse_fp32_result + sizeof(size_t)); + float *retrieved_fp32_vals = + (float *)((char *)sparse_fp32_result + sizeof(size_t) + + retrieved_nnz * sizeof(uint32_t)); + TEST_ASSERT(retrieved_fp32_indices[0] == 2); + TEST_ASSERT(retrieved_fp32_indices[1] == 7); + TEST_ASSERT(retrieved_fp32_indices[2] == 15); + TEST_ASSERT(retrieved_fp32_indices[3] == 20); + TEST_ASSERT(fabs(retrieved_fp32_vals[0] - 1.5f) < 1e-5f); + TEST_ASSERT(fabs(retrieved_fp32_vals[1] - 2.5f) < 1e-5f); + TEST_ASSERT(fabs(retrieved_fp32_vals[2] - 3.5f) < 1e-5f); + TEST_ASSERT(fabs(retrieved_fp32_vals[3] - 4.5f) < 1e-5f); + free(sparse_fp32_result); + + // ARRAY_BINARY type + // Format: [length(uint32_t)][data][length][data]... + uint8_t array_bin_data[] = { + 1, 0, 0, 0, 0x01, // length=1, data=0x01 + 2, 0, 0, 0, 0x02, 0x03, // length=2, data=0x02,0x03 + 2, 0, 0, 0, 0x04, 0x05 // length=2, data=0x04,0x05 + }; + err = zvec_doc_add_field_by_value(doc, "array_binary_field", + ZVEC_DATA_TYPE_ARRAY_BINARY, array_bin_data, + sizeof(array_bin_data)); + TEST_ASSERT(err == ZVEC_OK); + void *array_binary_result; + size_t array_binary_size; + err = zvec_doc_get_field_value_copy(doc, "array_binary_field", + ZVEC_DATA_TYPE_ARRAY_BINARY, + &array_binary_result, &array_binary_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_binary_result != NULL); + // The result is a contiguous buffer of binary data without length prefixes + TEST_ASSERT(array_binary_size == 5); // 1 + 2 + 2 bytes + const uint8_t *result_bytes = (const uint8_t *)array_binary_result; + TEST_ASSERT(result_bytes[0] == 0x01); + TEST_ASSERT(result_bytes[1] == 0x02); + TEST_ASSERT(result_bytes[2] == 0x03); + TEST_ASSERT(result_bytes[3] == 0x04); + TEST_ASSERT(result_bytes[4] == 0x05); + free(array_binary_result); + + + // ARRAY_STRING type + const char *array_str_data[] = {"str1", "str2", "str3"}; + ZVecString *array_zvec_str[3]; + for (int i = 0; i < 3; i++) { + array_zvec_str[i] = zvec_string_create(array_str_data[i]); + } + err = zvec_doc_add_field_by_value(doc, "array_string_field", + ZVEC_DATA_TYPE_ARRAY_STRING, array_zvec_str, + sizeof(array_zvec_str)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_string_result; + size_t array_string_size; + err = zvec_doc_get_field_value_copy(doc, "array_string_field", + ZVEC_DATA_TYPE_ARRAY_STRING, + &array_string_result, &array_string_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_string_result != NULL); + free(array_string_result); + for (int i = 0; i < 3; i++) { + zvec_free_string(array_zvec_str[i]); + } + + free(string_field.value.string_value.data); + + // ARRAY_BOOL type + bool array_bool_data[] = {true, false, true, false, true}; + err = zvec_doc_add_field_by_value(doc, "array_bool_field", + ZVEC_DATA_TYPE_ARRAY_BOOL, array_bool_data, + sizeof(array_bool_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_bool_result; + size_t array_bool_size; + err = zvec_doc_get_field_value_copy(doc, "array_bool_field", + ZVEC_DATA_TYPE_ARRAY_BOOL, + &array_bool_result, &array_bool_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_bool_result != NULL); + // Verify the bit-packed bool array + uint8_t *bool_bytes = (uint8_t *)array_bool_result; + TEST_ASSERT((bool_bytes[0] & 0x01) != 0); // index 0: true + TEST_ASSERT((bool_bytes[0] & 0x02) == 0); // index 1: false + TEST_ASSERT((bool_bytes[0] & 0x04) != 0); // index 2: true + TEST_ASSERT((bool_bytes[0] & 0x08) == 0); // index 3: false + TEST_ASSERT((bool_bytes[0] & 0x10) != 0); // index 4: true + free(array_bool_result); + + // ARRAY_INT32 type + int32_t array_int32_data[] = {100, 200, 300}; + err = zvec_doc_add_field_by_value(doc, "array_int32_field", + ZVEC_DATA_TYPE_ARRAY_INT32, + array_int32_data, sizeof(array_int32_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_int32_result; + size_t array_int32_size; + err = zvec_doc_get_field_value_copy(doc, "array_int32_field", + ZVEC_DATA_TYPE_ARRAY_INT32, + &array_int32_result, &array_int32_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_int32_result != NULL); + TEST_ASSERT(array_int32_size == sizeof(array_int32_data)); + TEST_ASSERT(((int32_t *)array_int32_result)[0] == 100); + TEST_ASSERT(((int32_t *)array_int32_result)[1] == 200); + TEST_ASSERT(((int32_t *)array_int32_result)[2] == 300); + free(array_int32_result); + + // ARRAY_INT64 type + int64_t array_int64_data[] = {-9223372036854775807LL, 0, + 9223372036854775807LL}; + err = zvec_doc_add_field_by_value(doc, "array_int64_field", + ZVEC_DATA_TYPE_ARRAY_INT64, + array_int64_data, sizeof(array_int64_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_int64_result; + size_t array_int64_size; + err = zvec_doc_get_field_value_copy(doc, "array_int64_field", + ZVEC_DATA_TYPE_ARRAY_INT64, + &array_int64_result, &array_int64_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_int64_result != NULL); + TEST_ASSERT(array_int64_size == sizeof(array_int64_data)); + TEST_ASSERT(((int64_t *)array_int64_result)[0] == -9223372036854775807LL); + TEST_ASSERT(((int64_t *)array_int64_result)[1] == 0); + TEST_ASSERT(((int64_t *)array_int64_result)[2] == 9223372036854775807LL); + free(array_int64_result); + + // ARRAY_UINT32 type + uint32_t array_uint32_data[] = {0U, 1000000U, 4000000000U}; + err = zvec_doc_add_field_by_value( + doc, "array_uint32_field", ZVEC_DATA_TYPE_ARRAY_UINT32, array_uint32_data, + sizeof(array_uint32_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_uint32_result; + size_t array_uint32_size; + err = zvec_doc_get_field_value_copy(doc, "array_uint32_field", + ZVEC_DATA_TYPE_ARRAY_UINT32, + &array_uint32_result, &array_uint32_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_uint32_result != NULL); + TEST_ASSERT(array_uint32_size == sizeof(array_uint32_data)); + TEST_ASSERT(((uint32_t *)array_uint32_result)[0] == 0U); + TEST_ASSERT(((uint32_t *)array_uint32_result)[1] == 1000000U); + TEST_ASSERT(((uint32_t *)array_uint32_result)[2] == 4000000000U); + free(array_uint32_result); + + // ARRAY_UINT64 type + uint64_t array_uint64_data[] = {0ULL, 1000000000000ULL, + 18000000000000000000ULL}; + err = zvec_doc_add_field_by_value( + doc, "array_uint64_field", ZVEC_DATA_TYPE_ARRAY_UINT64, array_uint64_data, + sizeof(array_uint64_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_uint64_result; + size_t array_uint64_size; + err = zvec_doc_get_field_value_copy(doc, "array_uint64_field", + ZVEC_DATA_TYPE_ARRAY_UINT64, + &array_uint64_result, &array_uint64_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_uint64_result != NULL); + TEST_ASSERT(array_uint64_size == sizeof(array_uint64_data)); + TEST_ASSERT(((uint64_t *)array_uint64_result)[0] == 0ULL); + TEST_ASSERT(((uint64_t *)array_uint64_result)[1] == 1000000000000ULL); + TEST_ASSERT(((uint64_t *)array_uint64_result)[2] == 18000000000000000000ULL); + free(array_uint64_result); + + // ARRAY_FLOAT type + float array_float_data[] = {1.5f, 2.5f, 3.5f}; + err = zvec_doc_add_field_by_value(doc, "array_float_field", + ZVEC_DATA_TYPE_ARRAY_FLOAT, + array_float_data, sizeof(array_float_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_float_result; + size_t array_float_size; + err = zvec_doc_get_field_value_copy(doc, "array_float_field", + ZVEC_DATA_TYPE_ARRAY_FLOAT, + &array_float_result, &array_float_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_float_result != NULL); + TEST_ASSERT(array_float_size == sizeof(array_float_data)); + TEST_ASSERT(((float *)array_float_result)[0] == 1.5f); + TEST_ASSERT(((float *)array_float_result)[1] == 2.5f); + TEST_ASSERT(((float *)array_float_result)[2] == 3.5f); + free(array_float_result); + + // ARRAY_DOUBLE type + double array_double_data[] = {1.111111, 2.222222, 3.333333}; + err = zvec_doc_add_field_by_value( + doc, "array_double_field", ZVEC_DATA_TYPE_ARRAY_DOUBLE, array_double_data, + sizeof(array_double_data)); + TEST_ASSERT(err == ZVEC_OK); + + void *array_double_result; + size_t array_double_size; + err = zvec_doc_get_field_value_copy(doc, "array_double_field", + ZVEC_DATA_TYPE_ARRAY_DOUBLE, + &array_double_result, &array_double_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_double_result != NULL); + TEST_ASSERT(array_double_size == sizeof(array_double_data)); + TEST_ASSERT(fabs(((double *)array_double_result)[0] - 1.111111) < 1e-10); + TEST_ASSERT(fabs(((double *)array_double_result)[1] - 2.222222) < 1e-10); + TEST_ASSERT(fabs(((double *)array_double_result)[2] - 3.333333) < 1e-10); + free(array_double_result); + + + free(binary_field.value.string_value.data); + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_get_field_value_pointer(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + ZVecErrorCode err; + + // Add fields for pointer testing + ZVecDocField bool_field; + bool_field.name.data = "bool_field"; + bool_field.name.length = strlen("bool_field"); + bool_field.data_type = ZVEC_DATA_TYPE_BOOL; + bool_field.value.bool_value = true; + err = zvec_doc_add_field_by_struct(doc, &bool_field); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDocField int32_field; + int32_field.name.data = "int32_field"; + int32_field.name.length = strlen("int32_field"); + int32_field.data_type = ZVEC_DATA_TYPE_INT32; + int32_field.value.int32_value = -2147483648; + err = zvec_doc_add_field_by_struct(doc, &int32_field); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDocField string_field; + string_field.name.data = "string_field"; + string_field.name.length = strlen("string_field"); + string_field.data_type = ZVEC_DATA_TYPE_STRING; + string_field.value.string_value = *zvec_string_create("Hello, 世界!"); + err = zvec_doc_add_field_by_struct(doc, &string_field); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDocField binary_field; + binary_field.name.data = "binary_field"; + binary_field.name.length = strlen("binary_field"); + binary_field.data_type = ZVEC_DATA_TYPE_BINARY; + uint8_t binary_data[] = {0x00, 0x01, 0x02, 0xFF, 0xFE, 0xFD}; + binary_field.value.string_value = + *zvec_bin_create(binary_data, sizeof(binary_data)); + err = zvec_doc_add_field_by_struct(doc, &binary_field); + TEST_ASSERT(err == ZVEC_OK); + + float test_vector[] = {1.1f, 2.2f, 3.3f, 4.4f, 5.5f}; + ZVecDocField fp32_vec_field; + fp32_vec_field.name.data = "fp32_vec_field"; + fp32_vec_field.name.length = strlen("fp32_vec_field"); + fp32_vec_field.data_type = ZVEC_DATA_TYPE_VECTOR_FP32; + fp32_vec_field.value.vector_value.data = test_vector; + fp32_vec_field.value.vector_value.length = 5; + err = zvec_doc_add_field_by_struct(doc, &fp32_vec_field); + TEST_ASSERT(err == ZVEC_OK); + + // Add more fields for comprehensive pointer testing + int64_t int64_val = -9223372036854775807LL; + err = + zvec_doc_add_field_by_value(doc, "int64_field_ptr", ZVEC_DATA_TYPE_INT64, + &int64_val, sizeof(int64_val)); + TEST_ASSERT(err == ZVEC_OK); + + uint32_t uint32_val = 4000000000U; + err = zvec_doc_add_field_by_value(doc, "uint32_field_ptr", + ZVEC_DATA_TYPE_UINT32, &uint32_val, + sizeof(uint32_val)); + TEST_ASSERT(err == ZVEC_OK); + + uint64_t uint64_val = 18000000000000000000ULL; + err = zvec_doc_add_field_by_value(doc, "uint64_field_ptr", + ZVEC_DATA_TYPE_UINT64, &uint64_val, + sizeof(uint64_val)); + TEST_ASSERT(err == ZVEC_OK); + + float float_val = 3.14159265f; + err = + zvec_doc_add_field_by_value(doc, "float_field_ptr", ZVEC_DATA_TYPE_FLOAT, + &float_val, sizeof(float_val)); + TEST_ASSERT(err == ZVEC_OK); + + double double_val = 2.718281828459045; + err = zvec_doc_add_field_by_value(doc, "double_field_ptr", + ZVEC_DATA_TYPE_DOUBLE, &double_val, + sizeof(double_val)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_BINARY64 + uint64_t bin64_vec_data[] = {0xAA55AA55AA55AA55ULL, 0x55AA55AA55AA55AAULL}; + err = zvec_doc_add_field_by_value(doc, "bin64_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_BINARY64, + bin64_vec_data, sizeof(bin64_vec_data)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP16 + uint16_t fp16_vec_data[] = {0x3C00, 0x4000, 0x4200, 0x4400}; + err = zvec_doc_add_field_by_value(doc, "fp16_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_FP16, fp16_vec_data, + sizeof(fp16_vec_data)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_FP64 + double fp64_vec_data[] = {1.1, 2.2, 3.3, 4.4}; + err = zvec_doc_add_field_by_value(doc, "fp64_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_FP64, fp64_vec_data, + sizeof(fp64_vec_data)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT8 + int8_t int8_vec_data[] = {-128, -1, 0, 1, 127}; + err = zvec_doc_add_field_by_value(doc, "int8_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_INT8, int8_vec_data, + sizeof(int8_vec_data)); + TEST_ASSERT(err == ZVEC_OK); + + // VECTOR_INT16 + int16_t int16_vec_data[] = {-32768, -1, 0, 1, 32767}; + err = zvec_doc_add_field_by_value(doc, "int16_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_INT16, int16_vec_data, + sizeof(int16_vec_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT32 + int32_t array_int32_data[] = {100, 200, 300}; + err = zvec_doc_add_field_by_value(doc, "array_int32_field_ptr", + ZVEC_DATA_TYPE_ARRAY_INT32, + array_int32_data, sizeof(array_int32_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_INT64 + int64_t array_int64_data[] = {-9223372036854775807LL, 0, + 9223372036854775807LL}; + err = zvec_doc_add_field_by_value(doc, "array_int64_field_ptr", + ZVEC_DATA_TYPE_ARRAY_INT64, + array_int64_data, sizeof(array_int64_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT32 + uint32_t array_uint32_data[] = {0U, 1000000U, 4000000000U}; + err = zvec_doc_add_field_by_value( + doc, "array_uint32_field_ptr", ZVEC_DATA_TYPE_ARRAY_UINT32, + array_uint32_data, sizeof(array_uint32_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_UINT64 + uint64_t array_uint64_data[] = {0ULL, 1000000000000ULL, + 18000000000000000000ULL}; + err = zvec_doc_add_field_by_value( + doc, "array_uint64_field_ptr", ZVEC_DATA_TYPE_ARRAY_UINT64, + array_uint64_data, sizeof(array_uint64_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_FLOAT + float array_float_data[] = {1.5f, 2.5f, 3.5f}; + err = zvec_doc_add_field_by_value(doc, "array_float_field_ptr", + ZVEC_DATA_TYPE_ARRAY_FLOAT, + array_float_data, sizeof(array_float_data)); + TEST_ASSERT(err == ZVEC_OK); + + // ARRAY_DOUBLE + double array_double_data[] = {1.111111, 2.222222, 3.333333}; + err = zvec_doc_add_field_by_value( + doc, "array_double_field_ptr", ZVEC_DATA_TYPE_ARRAY_DOUBLE, + array_double_data, sizeof(array_double_data)); + TEST_ASSERT(err == ZVEC_OK); + + printf( + "=== Testing zvec_doc_get_field_value_pointer with all supported types " + "===\n"); + + // Test pointer access to BOOL + const void *bool_ptr; + size_t bool_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_ptr, &bool_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bool_ptr != NULL); + TEST_ASSERT(bool_ptr_size == sizeof(bool)); + TEST_ASSERT(*(const bool *)bool_ptr == true); + + // Test pointer access to INT32 + const void *int32_ptr; + size_t int32_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "int32_field", ZVEC_DATA_TYPE_INT32, &int32_ptr, &int32_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int32_ptr != NULL); + TEST_ASSERT(int32_ptr_size == sizeof(int32_t)); + TEST_ASSERT(*(const int32_t *)int32_ptr == -2147483648); + + // Test pointer access to STRING + const void *string_ptr; + size_t string_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "string_field", + ZVEC_DATA_TYPE_STRING, &string_ptr, + &string_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(string_ptr != NULL); + TEST_ASSERT(string_ptr_size == strlen("Hello, 世界!")); + TEST_ASSERT(memcmp(string_ptr, "Hello, 世界!", string_ptr_size) == 0); + + // Test pointer access to BINARY + const void *binary_ptr; + size_t binary_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "binary_field", + ZVEC_DATA_TYPE_BINARY, &binary_ptr, + &binary_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(binary_ptr != NULL); + TEST_ASSERT(binary_ptr_size == 6); + TEST_ASSERT(memcmp(binary_ptr, "\x00\x01\x02\xFF\xFE\xFD", binary_ptr_size) == + 0); + + // Test pointer access to VECTOR_FP32 + const void *fp32_vec_ptr; + size_t fp32_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "fp32_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP32, + &fp32_vec_ptr, &fp32_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp32_vec_ptr != NULL); + TEST_ASSERT(fp32_vec_ptr_size == 5 * sizeof(float)); + TEST_ASSERT(memcmp(fp32_vec_ptr, test_vector, fp32_vec_ptr_size) == 0); + + // Test pointer access to INT64 + const void *int64_ptr; + size_t int64_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "int64_field_ptr", + ZVEC_DATA_TYPE_INT64, &int64_ptr, + &int64_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int64_ptr != NULL); + TEST_ASSERT(int64_ptr_size == sizeof(int64_t)); + TEST_ASSERT(*(const int64_t *)int64_ptr == -9223372036854775807LL); + + // Test pointer access to UINT32 + const void *uint32_ptr; + size_t uint32_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "uint32_field_ptr", + ZVEC_DATA_TYPE_UINT32, &uint32_ptr, + &uint32_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint32_ptr != NULL); + TEST_ASSERT(uint32_ptr_size == sizeof(uint32_t)); + TEST_ASSERT(*(const uint32_t *)uint32_ptr == 4000000000U); + + // Test pointer access to UINT64 + const void *uint64_ptr; + size_t uint64_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "uint64_field_ptr", + ZVEC_DATA_TYPE_UINT64, &uint64_ptr, + &uint64_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint64_ptr != NULL); + TEST_ASSERT(uint64_ptr_size == sizeof(uint64_t)); + TEST_ASSERT(*(const uint64_t *)uint64_ptr == 18000000000000000000ULL); + + // Test pointer access to FLOAT + const void *float_ptr; + size_t float_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "float_field_ptr", + ZVEC_DATA_TYPE_FLOAT, &float_ptr, + &float_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(float_ptr != NULL); + TEST_ASSERT(float_ptr_size == sizeof(float)); + TEST_ASSERT(fabs(*(const float *)float_ptr - 3.14159265f) < 1e-6f); + + // Test pointer access to DOUBLE + const void *double_ptr; + size_t double_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "double_field_ptr", + ZVEC_DATA_TYPE_DOUBLE, &double_ptr, + &double_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(double_ptr != NULL); + TEST_ASSERT(double_ptr_size == sizeof(double)); + TEST_ASSERT(fabs(*(const double *)double_ptr - 2.718281828459045) < 1e-15); + + // Test pointer access to VECTOR_BINARY64 + const void *bin64_vec_ptr; + size_t bin64_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "bin64_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_BINARY64, + &bin64_vec_ptr, &bin64_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bin64_vec_ptr != NULL); + TEST_ASSERT(bin64_vec_ptr_size == sizeof(bin64_vec_data)); + TEST_ASSERT(memcmp(bin64_vec_ptr, bin64_vec_data, bin64_vec_ptr_size) == 0); + + // Test pointer access to VECTOR_FP16 + const void *fp16_vec_ptr; + size_t fp16_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "fp16_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_FP16, + &fp16_vec_ptr, &fp16_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp16_vec_ptr != NULL); + TEST_ASSERT(fp16_vec_ptr_size == sizeof(fp16_vec_data)); + TEST_ASSERT(memcmp(fp16_vec_ptr, fp16_vec_data, fp16_vec_ptr_size) == 0); + + // Test pointer access to VECTOR_FP64 + const void *fp64_vec_ptr; + size_t fp64_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "fp64_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_FP64, + &fp64_vec_ptr, &fp64_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp64_vec_ptr != NULL); + TEST_ASSERT(fp64_vec_ptr_size == sizeof(fp64_vec_data)); + TEST_ASSERT(memcmp(fp64_vec_ptr, fp64_vec_data, fp64_vec_ptr_size) == 0); + + // Test pointer access to VECTOR_INT8 + const void *int8_vec_ptr; + size_t int8_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "int8_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_INT8, + &int8_vec_ptr, &int8_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int8_vec_ptr != NULL); + TEST_ASSERT(int8_vec_ptr_size == sizeof(int8_vec_data)); + TEST_ASSERT(memcmp(int8_vec_ptr, int8_vec_data, int8_vec_ptr_size) == 0); + + // Test pointer access to VECTOR_INT16 + const void *int16_vec_ptr; + size_t int16_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "int16_vec_field_ptr", + ZVEC_DATA_TYPE_VECTOR_INT16, + &int16_vec_ptr, &int16_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int16_vec_ptr != NULL); + TEST_ASSERT(int16_vec_ptr_size == sizeof(int16_vec_data)); + TEST_ASSERT(memcmp(int16_vec_ptr, int16_vec_data, int16_vec_ptr_size) == 0); + + // Test pointer access to ARRAY_INT32 + const void *array_int32_ptr; + size_t array_int32_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_int32_field_ptr", ZVEC_DATA_TYPE_ARRAY_INT32, + &array_int32_ptr, &array_int32_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_int32_ptr != NULL); + TEST_ASSERT(array_int32_ptr_size == sizeof(array_int32_data)); + TEST_ASSERT(((const int32_t *)array_int32_ptr)[0] == 100); + TEST_ASSERT(((const int32_t *)array_int32_ptr)[1] == 200); + TEST_ASSERT(((const int32_t *)array_int32_ptr)[2] == 300); + + // Test pointer access to ARRAY_INT64 + const void *array_int64_ptr; + size_t array_int64_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_int64_field_ptr", ZVEC_DATA_TYPE_ARRAY_INT64, + &array_int64_ptr, &array_int64_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_int64_ptr != NULL); + TEST_ASSERT(array_int64_ptr_size == sizeof(array_int64_data)); + TEST_ASSERT(((const int64_t *)array_int64_ptr)[0] == -9223372036854775807LL); + TEST_ASSERT(((const int64_t *)array_int64_ptr)[1] == 0); + TEST_ASSERT(((const int64_t *)array_int64_ptr)[2] == 9223372036854775807LL); + + // Test pointer access to ARRAY_UINT32 + const void *array_uint32_ptr; + size_t array_uint32_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_uint32_field_ptr", ZVEC_DATA_TYPE_ARRAY_UINT32, + &array_uint32_ptr, &array_uint32_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_uint32_ptr != NULL); + TEST_ASSERT(array_uint32_ptr_size == sizeof(array_uint32_data)); + TEST_ASSERT(((const uint32_t *)array_uint32_ptr)[0] == 0U); + TEST_ASSERT(((const uint32_t *)array_uint32_ptr)[1] == 1000000U); + TEST_ASSERT(((const uint32_t *)array_uint32_ptr)[2] == 4000000000U); + + // Test pointer access to ARRAY_UINT64 + const void *array_uint64_ptr; + size_t array_uint64_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_uint64_field_ptr", ZVEC_DATA_TYPE_ARRAY_UINT64, + &array_uint64_ptr, &array_uint64_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_uint64_ptr != NULL); + TEST_ASSERT(array_uint64_ptr_size == sizeof(array_uint64_data)); + TEST_ASSERT(((const uint64_t *)array_uint64_ptr)[0] == 0ULL); + TEST_ASSERT(((const uint64_t *)array_uint64_ptr)[1] == 1000000000000ULL); + TEST_ASSERT(((const uint64_t *)array_uint64_ptr)[2] == + 18000000000000000000ULL); + + // Test pointer access to ARRAY_FLOAT + const void *array_float_ptr; + size_t array_float_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_float_field_ptr", ZVEC_DATA_TYPE_ARRAY_FLOAT, + &array_float_ptr, &array_float_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_float_ptr != NULL); + TEST_ASSERT(array_float_ptr_size == sizeof(array_float_data)); + TEST_ASSERT(((const float *)array_float_ptr)[0] == 1.5f); + TEST_ASSERT(((const float *)array_float_ptr)[1] == 2.5f); + TEST_ASSERT(((const float *)array_float_ptr)[2] == 3.5f); + + // Test pointer access to ARRAY_DOUBLE + const void *array_double_ptr; + size_t array_double_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "array_double_field_ptr", ZVEC_DATA_TYPE_ARRAY_DOUBLE, + &array_double_ptr, &array_double_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(array_double_ptr != NULL); + TEST_ASSERT(array_double_ptr_size == sizeof(array_double_data)); + TEST_ASSERT(fabs(((const double *)array_double_ptr)[0] - 1.111111) < 1e-10); + TEST_ASSERT(fabs(((const double *)array_double_ptr)[1] - 2.222222) < 1e-10); + TEST_ASSERT(fabs(((const double *)array_double_ptr)[2] - 3.333333) < 1e-10); + + free(string_field.value.string_value.data); + free(binary_field.value.string_value.data); + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_field_operations(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + ZVecErrorCode err; + + // Add some fields + ZVecDocField bool_field; + bool_field.name.data = "bool_field"; + bool_field.name.length = strlen("bool_field"); + bool_field.data_type = ZVEC_DATA_TYPE_BOOL; + bool_field.value.bool_value = true; + err = zvec_doc_add_field_by_struct(doc, &bool_field); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDocField int32_field; + int32_field.name.data = "int32_field"; + int32_field.name.length = strlen("int32_field"); + int32_field.data_type = ZVEC_DATA_TYPE_INT32; + int32_field.value.int32_value = -2147483648; + err = zvec_doc_add_field_by_struct(doc, &int32_field); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDocField string_field; + string_field.name.data = "string_field"; + string_field.name.length = strlen("string_field"); + string_field.data_type = ZVEC_DATA_TYPE_STRING; + string_field.value.string_value = *zvec_string_create("Hello"); + err = zvec_doc_add_field_by_struct(doc, &string_field); + TEST_ASSERT(err == ZVEC_OK); + + // Test field count + size_t field_count = zvec_doc_get_field_count(doc); + TEST_ASSERT(field_count >= 3); + + // Test field existence checks + TEST_ASSERT(zvec_doc_has_field(doc, "bool_field") == true); + TEST_ASSERT(zvec_doc_has_field(doc, "int32_field") == true); + TEST_ASSERT(zvec_doc_has_field(doc, "string_field") == true); + TEST_ASSERT(zvec_doc_has_field(doc, "nonexistent") == false); + + TEST_ASSERT(zvec_doc_has_field_value(doc, "bool_field") == true); + TEST_ASSERT(zvec_doc_is_field_null(doc, "bool_field") == false); + TEST_ASSERT(zvec_doc_is_field_null(doc, "nonexistent") == false); + + // Test field names retrieval + char **field_names; + size_t name_count; + err = zvec_doc_get_field_names(doc, &field_names, &name_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(name_count >= 3); + TEST_ASSERT(field_names != NULL); + + // Verify some expected fields are present + bool found_key_fields = false; + for (size_t i = 0; i < name_count; i++) { + if (strcmp(field_names[i], "bool_field") == 0 || + strcmp(field_names[i], "int32_field") == 0 || + strcmp(field_names[i], "string_field") == 0) { + found_key_fields = true; + break; + } + } + TEST_ASSERT(found_key_fields == true); + + zvec_free_str_array(field_names, name_count); + free(string_field.value.string_value.data); + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_error_conditions(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + // Add a field for error testing + ZVecDocField bool_field; + bool_field.name.data = "bool_field"; + bool_field.name.length = strlen("bool_field"); + bool_field.data_type = ZVEC_DATA_TYPE_BOOL; + bool_field.value.bool_value = true; + zvec_doc_add_field_by_struct(doc, &bool_field); + + ZVecErrorCode err; + const void *dummy_ptr; + size_t dummy_ptr_size; + int32_t int32_result; + void *string_result; + size_t string_size; + + printf("=== Testing error conditions ===\n"); + + // Test non-existent field + err = + zvec_doc_get_field_value_basic(doc, "missing_field", ZVEC_DATA_TYPE_INT32, + &int32_result, sizeof(int32_result)); + TEST_ASSERT(err != ZVEC_OK); + + err = + zvec_doc_get_field_value_copy(doc, "missing_field", ZVEC_DATA_TYPE_STRING, + &string_result, &string_size); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_doc_get_field_value_pointer( + doc, "missing_field", ZVEC_DATA_TYPE_FLOAT, &dummy_ptr, &dummy_ptr_size); + TEST_ASSERT(err != ZVEC_OK); + + // Test wrong data type access + err = zvec_doc_get_field_value_basic(doc, "bool_field", ZVEC_DATA_TYPE_INT32, + &int32_result, sizeof(int32_result)); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_doc_get_field_value_copy(doc, "bool_field", ZVEC_DATA_TYPE_STRING, + &string_result, &string_size); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_doc_get_field_value_pointer( + doc, "bool_field", ZVEC_DATA_TYPE_FLOAT, &dummy_ptr, &dummy_ptr_size); + TEST_ASSERT(err != ZVEC_OK); + + zvec_doc_destroy(doc); + + TEST_END(); +} + +void test_doc_serialization(void) { + TEST_START(); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + ZVecErrorCode err; + + // Add fields for serialization testing + ZVecDocField int32_field; + int32_field.name.data = "int32_field"; + int32_field.name.length = strlen("int32_field"); + int32_field.data_type = ZVEC_DATA_TYPE_INT32; + int32_field.value.int32_value = -2147483648; + err = zvec_doc_add_field_by_struct(doc, &int32_field); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDocField string_field; + string_field.name.data = "string_field"; + string_field.name.length = strlen("string_field"); + string_field.data_type = ZVEC_DATA_TYPE_STRING; + string_field.value.string_value = *zvec_string_create("Serialization Test"); + err = zvec_doc_add_field_by_struct(doc, &string_field); + TEST_ASSERT(err == ZVEC_OK); + + printf("=== Testing document serialization ===\n"); + + uint8_t *serialized_data; + size_t data_size; + err = zvec_doc_serialize(doc, &serialized_data, &data_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(serialized_data != NULL); + TEST_ASSERT(data_size > 0); + + ZVecDoc *deserialized_doc; + err = zvec_doc_deserialize(serialized_data, data_size, &deserialized_doc); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(deserialized_doc != NULL); + + // Verify deserialized document has same field count + size_t field_count = zvec_doc_get_field_count(doc); + size_t deserialized_field_count = zvec_doc_get_field_count(deserialized_doc); + TEST_ASSERT(deserialized_field_count == field_count); + + // Test a field from deserialized document + int32_t deserialized_int32; + err = zvec_doc_get_field_value_basic( + deserialized_doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &deserialized_int32, sizeof(deserialized_int32)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(deserialized_int32 == -2147483648); + + zvec_free_uint8_array(serialized_data); + free(string_field.value.string_value.data); + zvec_doc_destroy(deserialized_doc); + zvec_doc_destroy(doc); + + TEST_END(); +} + +// ============================================================================= +// Index parameter tests +// ============================================================================= + +void test_index_params(void) { + TEST_START(); + + // Test HNSW parameter creation + ZVecIndexParams *hnsw_params = zvec_test_create_default_hnsw_params(); + TEST_ASSERT(hnsw_params != NULL); + if (hnsw_params) { + free(hnsw_params); + } + + // Test Flat parameter creation + ZVecIndexParams *flat_params = zvec_test_create_default_flat_params(); + TEST_ASSERT(flat_params != NULL); + if (flat_params) { + free(flat_params); + } + + // Test scalar index parameter creation + ZVecIndexParams *invert_params = zvec_test_create_default_invert_params(true); + TEST_ASSERT(invert_params != NULL); + if (invert_params) { + free(invert_params); + } + + TEST_END(); +} + +// ============================================================================= +// Memory management tests +// ============================================================================= +void test_zvec_string_functions(void) { + TEST_START(); + + // Test string creation and basic operations + ZVecString *str1 = zvec_string_create("Hello World"); + TEST_ASSERT(str1 != NULL); + TEST_ASSERT(zvec_string_length(str1) == 11); + TEST_ASSERT(strcmp(zvec_string_c_str(str1), "Hello World") == 0); + + // Test string copy + ZVecString *str2 = zvec_string_copy(str1); + TEST_ASSERT(str2 != NULL); + TEST_ASSERT(zvec_string_length(str2) == 11); + TEST_ASSERT(strcmp(zvec_string_c_str(str2), "Hello World") == 0); + + // Test string comparison + int cmp_result = zvec_string_compare(str1, str2); + TEST_ASSERT(cmp_result == 0); + + ZVecString *str3 = zvec_string_create("Hello"); + TEST_ASSERT(zvec_string_compare(str1, str3) > 0); + + // Test string creation from view + ZVecStringView view = {"Hello View", 10}; + ZVecString *str4 = zvec_string_create_from_view(&view); + TEST_ASSERT(str4 != NULL); + TEST_ASSERT(zvec_string_length(str4) == 10); + TEST_ASSERT(strcmp(zvec_string_c_str(str4), "Hello View") == 0); + + // Test string view with embedded null bytes + char binary_data[] = {'H', 'e', 'l', 'l', 'o', '\0', 'W', 'o', 'r', 'l', 'd'}; + ZVecStringView binary_view = {binary_data, 11}; + ZVecString *str5 = zvec_string_create_from_view(&binary_view); + TEST_ASSERT(str5 != NULL); + TEST_ASSERT(zvec_string_length(str5) == 11); + // Note: strcmp will stop at first null byte, so we need to compare manually + TEST_ASSERT(memcmp(zvec_string_c_str(str5), binary_data, 11) == 0); + + // Cleanup + zvec_free_string(str1); + zvec_free_string(str2); + zvec_free_string(str3); + zvec_free_string(str4); + zvec_free_string(str5); + + TEST_END(); +} + +void test_index_params_functions(void) { + TEST_START(); + + // Test index params with new flat structure + // clang-format off + ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_COSINE, 16, 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + TEST_ASSERT(hnsw_params.index_type == ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params.metric_type == ZVEC_METRIC_TYPE_COSINE); + TEST_ASSERT(hnsw_params.hnsw.m == 16); + TEST_ASSERT(hnsw_params.hnsw.ef_construction == 200); + TEST_ASSERT(hnsw_params.hnsw.ef_search == 50); + + // Test invert index params + // clang-format off + ZVecIndexParams invert_params = ZVEC_INVERT_PARAMS(true, false); + // clang-format on + TEST_ASSERT(invert_params.index_type == ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(invert_params.invert.enable_range_optimization == true); + TEST_ASSERT(invert_params.invert.enable_extended_wildcard == false); + + // Test flat index params + // clang-format off + ZVecIndexParams flat_params = + ZVEC_FLAT_PARAMS(ZVEC_METRIC_TYPE_IP, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + TEST_ASSERT(flat_params.index_type == ZVEC_INDEX_TYPE_FLAT); + TEST_ASSERT(flat_params.metric_type == ZVEC_METRIC_TYPE_IP); + + // Test IVF index params + // clang-format off + ZVecIndexParams ivf_params = ZVEC_IVF_PARAMS(ZVEC_METRIC_TYPE_L2, 100, 10, true, 5, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + TEST_ASSERT(ivf_params.index_type == ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(ivf_params.metric_type == ZVEC_METRIC_TYPE_L2); + TEST_ASSERT(ivf_params.ivf.n_list == 100); + TEST_ASSERT(ivf_params.ivf.n_iters == 10); + TEST_ASSERT(ivf_params.ivf.use_soar == true); + TEST_ASSERT(ivf_params.ivf.n_probe == 5); + + TEST_END(); +} + +void test_index_params_api_functions(void) { + TEST_START(); + + ZVecIndexParams params; + ZVecErrorCode error; + + // Test zvec_index_params_init for HNSW + zvec_index_params_init(¶ms, ZVEC_INDEX_TYPE_HNSW, + ZVEC_METRIC_TYPE_COSINE); + TEST_ASSERT(params.index_type == ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(params.metric_type == ZVEC_METRIC_TYPE_COSINE); + + // Test zvec_index_params_set_hnsw + zvec_index_params_set_hnsw(¶ms, 32, 300, 150); + TEST_ASSERT(params.hnsw.m == 32); + TEST_ASSERT(params.hnsw.ef_construction == 300); + TEST_ASSERT(params.hnsw.ef_search == 150); + + // Test zvec_index_params_init for IVF + zvec_index_params_init(¶ms, ZVEC_INDEX_TYPE_IVF, ZVEC_METRIC_TYPE_L2); + TEST_ASSERT(params.index_type == ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(params.metric_type == ZVEC_METRIC_TYPE_L2); + + // Test zvec_index_params_set_ivf + zvec_index_params_set_ivf(¶ms, 200, 20, true, 10); + TEST_ASSERT(params.ivf.n_list == 200); + TEST_ASSERT(params.ivf.n_iters == 20); + TEST_ASSERT(params.ivf.use_soar == true); + TEST_ASSERT(params.ivf.n_probe == 10); + + // Test zvec_index_params_init for INVERT + zvec_index_params_init(¶ms, ZVEC_INDEX_TYPE_INVERT, + ZVEC_METRIC_TYPE_UNDEFINED); + TEST_ASSERT(params.index_type == ZVEC_INDEX_TYPE_INVERT); + + // Test zvec_index_params_set_invert + zvec_index_params_set_invert(¶ms, true, true); + TEST_ASSERT(params.invert.enable_range_optimization == true); + TEST_ASSERT(params.invert.enable_extended_wildcard == true); + + // Test zvec_index_params_init for FLAT + zvec_index_params_init(¶ms, ZVEC_INDEX_TYPE_FLAT, ZVEC_METRIC_TYPE_IP); + TEST_ASSERT(params.index_type == ZVEC_INDEX_TYPE_FLAT); + TEST_ASSERT(params.metric_type == ZVEC_METRIC_TYPE_IP); + + TEST_END(); +} + +void test_utility_functions(void) { + TEST_START(); + + // Test error code to string conversion + const char *error_str = zvec_error_code_to_string(ZVEC_OK); + TEST_ASSERT(error_str != NULL); + TEST_ASSERT(strlen(error_str) > 0); + + error_str = zvec_error_code_to_string(ZVEC_ERROR_INVALID_ARGUMENT); + TEST_ASSERT(error_str != NULL); + + // Test data type to string conversion + const char *data_type_str = zvec_data_type_to_string(ZVEC_DATA_TYPE_INT32); + TEST_ASSERT(data_type_str != NULL); + TEST_ASSERT(strlen(data_type_str) > 0); + + data_type_str = zvec_data_type_to_string(ZVEC_DATA_TYPE_STRING); + TEST_ASSERT(data_type_str != NULL); + + // Test index type to string conversion + const char *index_type_str = zvec_index_type_to_string(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(index_type_str != NULL); + TEST_ASSERT(strlen(index_type_str) > 0); + + index_type_str = zvec_index_type_to_string(ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(index_type_str != NULL); + + TEST_END(); +} + +void test_memory_management_functions(void) { + TEST_START(); + + // Test string allocation and deallocation + ZVecString *str = zvec_string_create("Test String"); + TEST_ASSERT(str != NULL); + zvec_free_string(str); + + void *buffer = malloc(64); + TEST_ASSERT(buffer != NULL); + zvec_free_ptr(buffer); + + TEST_END(); +} + +void test_query_params_functions(void) { + TEST_START(); + + // Test basic query parameters creation and destruction + ZVecQueryParams *base_params = zvec_query_params_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(base_params != NULL); + + // Test union query parameters + ZVecQueryParamsUnion *union_params = + zvec_query_params_union_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(union_params != NULL); + + // Test HNSW query parameters + ZVecHnswQueryParams *hnsw_params = zvec_query_params_hnsw_create( + ZVEC_INDEX_TYPE_HNSW, 50, 0.5f, false, true); + TEST_ASSERT(hnsw_params != NULL); + + // Test IVF query parameters + ZVecIVFQueryParams *ivf_params = + zvec_query_params_ivf_create(ZVEC_INDEX_TYPE_IVF, 10, true, 1.5f); + TEST_ASSERT(ivf_params != NULL); + + // Test Flat query parameters + ZVecFlatQueryParams *flat_params = + zvec_query_params_flat_create(ZVEC_INDEX_TYPE_FLAT, false, 2.0f); + TEST_ASSERT(flat_params != NULL); + + // Test setting various parameters on base query params + ZVecErrorCode err; + + // Test index type setting + err = zvec_query_params_set_index_type(base_params, ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(err == ZVEC_OK); + + // Test radius setting + err = zvec_query_params_set_radius(base_params, 0.8f); + TEST_ASSERT(err == ZVEC_OK); + + // Test linear search setting + err = zvec_query_params_set_is_linear(base_params, false); + TEST_ASSERT(err == ZVEC_OK); + + // Test refiner setting + err = zvec_query_params_set_is_using_refiner(base_params, true); + TEST_ASSERT(err == ZVEC_OK); + + // Test HNSW-specific parameters + err = zvec_query_params_hnsw_set_ef(hnsw_params, 75); + TEST_ASSERT(err == ZVEC_OK); + + // Test IVF-specific parameters + err = zvec_query_params_ivf_set_nprobe(ivf_params, 15); + TEST_ASSERT(err == ZVEC_OK); + + // Test IVF scale factor setting + err = zvec_query_params_ivf_set_scale_factor(ivf_params, 2.5f); + TEST_ASSERT(err == ZVEC_OK); + + // Test destruction of valid parameters + zvec_query_params_destroy(base_params); + zvec_query_params_hnsw_destroy(hnsw_params); + zvec_query_params_ivf_destroy(ivf_params); + zvec_query_params_flat_destroy(flat_params); + zvec_query_params_union_destroy(union_params); + + + // Test boundary cases - null pointer handling + zvec_query_params_hnsw_destroy(NULL); + zvec_query_params_ivf_destroy(NULL); + zvec_query_params_flat_destroy(NULL); + zvec_query_params_union_destroy(NULL); + + + TEST_END(); +} + +void test_collection_stats_functions(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_stats_functions"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + ZVecCollectionStats *stats = NULL; + + // Test normal statistics retrieval + err = zvec_collection_get_stats(collection, &stats); + TEST_ASSERT(err == ZVEC_OK); + + if (stats) { + TEST_ASSERT(stats->doc_count == 0); + zvec_collection_stats_destroy(stats); + } + + // Test NULL parameters + err = zvec_collection_get_stats(NULL, &stats); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_get_stats(collection, NULL); + TEST_ASSERT(err != ZVEC_OK); + + // Test statistics destruction boundary cases + zvec_collection_stats_destroy(NULL); + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_dml_functions(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_dml"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Test insertion function boundary cases + size_t success_count, error_count; + + // Test NULL collection + err = zvec_collection_insert(NULL, NULL, 0, &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test NULL document array + err = zvec_collection_insert(collection, NULL, 0, &success_count, + &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test zero document count + ZVecDoc *empty_docs[1]; + err = zvec_collection_insert(collection, (const ZVecDoc **)empty_docs, 0, + &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test NULL count pointer + err = zvec_collection_insert(collection, (const ZVecDoc **)empty_docs, 1, + NULL, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test update function boundary cases + err = zvec_collection_update(NULL, NULL, 0, &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_update(collection, NULL, 0, &success_count, + &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_update(collection, (const ZVecDoc **)empty_docs, 0, + NULL, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test upsert function boundary cases + err = zvec_collection_upsert(NULL, NULL, 0, &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_upsert(collection, NULL, 0, &success_count, + &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_upsert(collection, (const ZVecDoc **)empty_docs, 0, + NULL, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test deletion function boundary cases + const char *pks[1]; + err = zvec_collection_delete(NULL, NULL, 0, &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_delete(collection, NULL, 0, &success_count, + &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_delete(collection, pks, 0, NULL, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test deletion by filter boundary cases + err = zvec_collection_delete_by_filter(NULL, NULL); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_delete_by_filter(collection, NULL); + TEST_ASSERT(err != ZVEC_OK); + + // Test detailed DML result APIs + ZVecDoc *result_doc = zvec_test_create_doc(101, schema, NULL); + TEST_ASSERT(result_doc != NULL); + if (result_doc) { + ZVecDoc *result_docs[] = {result_doc}; + ZVecWriteResult *results = NULL; + size_t result_count = 0; + + err = zvec_collection_upsert_with_results(collection, + (const ZVecDoc **)result_docs, + 1, &results, &result_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(result_count == 1); + if (results && result_count == 1) { + TEST_ASSERT(results[0].pk != NULL); + if (results[0].pk) { + TEST_ASSERT(strcmp(results[0].pk, "pk_101") == 0); + } + TEST_ASSERT(results[0].code == ZVEC_OK); + zvec_write_results_free(results, result_count); + } + + const char *delete_pks[] = {"pk_101"}; + results = NULL; + result_count = 0; + err = zvec_collection_delete_with_results(collection, delete_pks, 1, + &results, &result_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(result_count == 1); + if (results && result_count == 1) { + TEST_ASSERT(results[0].pk != NULL); + if (results[0].pk) { + TEST_ASSERT(strcmp(results[0].pk, "pk_101") == 0); + } + zvec_write_results_free(results, result_count); + } + + zvec_doc_destroy(result_doc); + } + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_nullable_roundtrip(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_nullable_roundtrip"; + zvec_test_delete_dir(temp_dir); + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + if (!schema) { + TEST_END(); + return; + } + + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + if (doc) { + zvec_doc_set_pk(doc, "pk_nullable"); + + int64_t id = 77; + err = zvec_doc_add_field_by_value(doc, "id", ZVEC_DATA_TYPE_INT64, &id, + sizeof(id)); + TEST_ASSERT(err == ZVEC_OK); + + const char *name = "nullable"; + err = zvec_doc_add_field_by_value(doc, "name", ZVEC_DATA_TYPE_STRING, + name, strlen(name)); + TEST_ASSERT(err == ZVEC_OK); + + // "weight" in temp schema is nullable. + err = zvec_doc_set_field_null(doc, "weight"); + TEST_ASSERT(err == ZVEC_OK); + + float dense[128]; + for (size_t i = 0; i < 128; ++i) { + dense[i] = (float)i / 128.0f; + } + err = zvec_doc_add_field_by_value( + doc, "dense", ZVEC_DATA_TYPE_VECTOR_FP32, dense, sizeof(dense)); + TEST_ASSERT(err == ZVEC_OK); + + uint32_t nnz = 3; + uint32_t sparse_indices[] = {1, 5, 9}; + float sparse_values[] = {0.2f, 0.5f, 0.9f}; + char sparse_buffer[sizeof(nnz) + sizeof(sparse_indices) + + sizeof(sparse_values)]; + memcpy(sparse_buffer, &nnz, sizeof(nnz)); + memcpy(sparse_buffer + sizeof(nnz), sparse_indices, + sizeof(sparse_indices)); + memcpy(sparse_buffer + sizeof(nnz) + sizeof(sparse_indices), + sparse_values, sizeof(sparse_values)); + err = zvec_doc_add_field_by_value(doc, "sparse", + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, + sparse_buffer, sizeof(sparse_buffer)); + TEST_ASSERT(err == ZVEC_OK); + + ZVecDoc *docs[] = {doc}; + size_t success_count = 0; + size_t error_count = 0; + err = zvec_collection_upsert(collection, (const ZVecDoc **)docs, 1, + &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 1); + TEST_ASSERT(error_count == 0); + + const char *pks[] = {"pk_nullable"}; + ZVecDoc **fetched = NULL; + size_t fetched_count = 0; + err = zvec_collection_fetch(collection, pks, 1, &fetched, &fetched_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fetched_count == 1); + if (fetched && fetched_count == 1) { + TEST_ASSERT(zvec_doc_has_field(fetched[0], "weight") == true); + TEST_ASSERT(zvec_doc_has_field_value(fetched[0], "weight") == false); + TEST_ASSERT(zvec_doc_is_field_null(fetched[0], "weight") == true); + } + zvec_docs_free(fetched, fetched_count); + zvec_doc_destroy(doc); + } + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + zvec_test_delete_dir(temp_dir); + + TEST_END(); +} + +// ============================================================================= +// Actual Query Execution Tests +// ============================================================================= + +void test_actual_vector_queries(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_actual_queries"; + + // Create schema with vector field + ZVecCollectionSchema *schema = zvec_collection_schema_create("query_test"); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Add ID field + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + zvec_collection_schema_add_field(schema, id_field); + + // Add vector field with HNSW index + // clang-format off + ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_L2, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + ZVecFieldSchema *vec_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 4); + zvec_field_schema_set_hnsw_index(vec_field, &hnsw_params); + zvec_collection_schema_add_field(schema, vec_field); + + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Insert test documents + float vec1[] = {1.0f, 0.0f, 0.0f, 0.0f}; + float vec2[] = {0.0f, 1.0f, 0.0f, 0.0f}; + float vec3[] = {0.0f, 0.0f, 1.0f, 0.0f}; + float vec4[] = {0.7f, 0.7f, 0.0f, 0.0f}; // Similar to vec1 and vec2 + + ZVecDoc *docs[4]; + for (int i = 0; i < 4; i++) { + docs[i] = zvec_doc_create(); + zvec_doc_set_pk(docs[i], zvec_test_make_pk(i + 1)); + zvec_doc_add_field_by_value(docs[i], "id", ZVEC_DATA_TYPE_INT64, + &(int64_t){i + 1}, sizeof(int64_t)); + } + + zvec_doc_add_field_by_value( + docs[0], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, vec1, sizeof(vec1)); + zvec_doc_add_field_by_value( + docs[1], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, vec2, sizeof(vec2)); + zvec_doc_add_field_by_value( + docs[2], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, vec3, sizeof(vec3)); + zvec_doc_add_field_by_value( + docs[3], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, vec4, sizeof(vec4)); + + size_t success_count, error_count; + err = zvec_collection_insert(collection, (const ZVecDoc **)docs, 4, + &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 4); + TEST_ASSERT(error_count == 0); + + // Flush collection to build index + zvec_collection_flush(collection); + + // Test 1: Basic vector search + ZVecVectorQuery query1 = {0}; + query1.field_name = (ZVecString){.data = "embedding", .length = 9}; + query1.query_vector = + (ZVecByteArray){.data = (uint8_t *)vec1, .length = sizeof(vec1)}; + query1.topk = 3; + query1.include_vector = true; + query1.include_doc_id = true; + + ZVecDoc **results = NULL; + size_t result_count = 0; + err = zvec_collection_query(collection, &query1, &results, &result_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(result_count > 0); + TEST_ASSERT(results != NULL); + + // First result should be vec1 itself (distance ~0) + if (result_count > 0) { + float score = zvec_doc_get_score(results[0]); + TEST_ASSERT(score < 0.001f); // Very small distance + } + + zvec_docs_free(results, result_count); + + // Test 2: Search with filter + ZVecVectorQuery query2 = query1; + query2.filter = (ZVecString){.data = "id > 2", .length = 6}; + + err = zvec_collection_query(collection, &query2, &results, &result_count); + TEST_ASSERT(err == ZVEC_OK); + + // Should only return documents with id > 2 + for (size_t i = 0; i < result_count; i++) { + int64_t id; + zvec_doc_get_field_value_basic(results[i], "id", ZVEC_DATA_TYPE_INT64, + &id, sizeof(id)); + TEST_ASSERT(id > 2); + } + + zvec_docs_free(results, result_count); + + // Cleanup documents + for (int i = 0; i < 4; i++) { + zvec_doc_destroy(docs[i]); + } + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_index_creation_and_management(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_index_management"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Test 1: Create HNSW index + // clang-format off + ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_COSINE, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + + err = + zvec_collection_create_hnsw_index(collection, "dense", &hnsw_params); + TEST_ASSERT(err == ZVEC_OK); + + // Test 2: Create scalar index + // clang-format off + ZVecIndexParams invert_params = ZVEC_INVERT_PARAMS(true, false); + // clang-format on + + err = zvec_collection_create_invert_index(collection, "name", + &invert_params); + TEST_ASSERT(err == ZVEC_OK); + + err = zvec_collection_drop_index(collection, "name"); + TEST_ASSERT(err == ZVEC_OK); + + // Test 3: Optimize collection + err = zvec_collection_optimize(collection); + TEST_ASSERT(err == ZVEC_OK); + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_ddl_operations(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_ddl"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + size_t field_count = zvec_collection_schema_get_field_count(schema); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Test 1: Add new column + ZVecFieldSchema *new_field = + zvec_field_schema_create("new_int32", ZVEC_DATA_TYPE_INT32, true, 0); + TEST_ASSERT(new_field != NULL); + + err = zvec_collection_add_column(collection, new_field, NULL); + TEST_ASSERT(err == ZVEC_OK); + + // Test 2: Get collection schema and verify field count + ZVecCollectionSchema *retrieved_schema = NULL; + err = zvec_collection_get_schema(collection, &retrieved_schema); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(retrieved_schema != NULL); + + size_t new_field_count = + zvec_collection_schema_get_field_count(retrieved_schema); + TEST_ASSERT((field_count + 1) == new_field_count); + + // Test 3: Alter column + ZVecFieldSchema *alter_field = + zvec_field_schema_create("new_float", ZVEC_DATA_TYPE_FLOAT, true, 0); + TEST_ASSERT(alter_field != NULL); + + err = zvec_collection_alter_column(collection, "new_int32", "", + alter_field); + TEST_ASSERT(err == ZVEC_OK); + + // Test 4: Drop column + err = zvec_collection_drop_column(collection, "new_float"); + TEST_ASSERT(err == ZVEC_OK); + + // Test 5: Verify field count after drop + err = zvec_collection_get_schema(collection, &retrieved_schema); + TEST_ASSERT(err == ZVEC_OK); + new_field_count = + zvec_collection_schema_get_field_count(retrieved_schema); + TEST_ASSERT(new_field_count == field_count); + + zvec_collection_schema_destroy(retrieved_schema); + zvec_field_schema_destroy(new_field); + zvec_field_schema_destroy(alter_field); + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_field_ddl_operations(void) { + TEST_START(); + + // Test field schema creation with various configurations + ZVecFieldSchema *field1 = + zvec_field_schema_create("test_field1", ZVEC_DATA_TYPE_STRING, false, 0); + TEST_ASSERT(field1 != NULL); + TEST_ASSERT(strcmp(field1->name->data, "test_field1") == 0); + TEST_ASSERT(field1->data_type == ZVEC_DATA_TYPE_STRING); + TEST_ASSERT(field1->nullable == false); + TEST_ASSERT(field1->dimension == 0); + + ZVecFieldSchema *field2 = zvec_field_schema_create( + "test_field2", ZVEC_DATA_TYPE_VECTOR_FP32, true, 128); + TEST_ASSERT(field2 != NULL); + TEST_ASSERT(field2->data_type == ZVEC_DATA_TYPE_VECTOR_FP32); + TEST_ASSERT(field2->nullable == true); + TEST_ASSERT(field2->dimension == 128); + + // Test index parameter setting + // clang-format off + ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_L2, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + + ZVecErrorCode err = zvec_field_schema_set_index_params(field2, &hnsw_params); + TEST_ASSERT(err == ZVEC_OK); + + // Cleanup + zvec_field_schema_destroy(field1); + zvec_field_schema_destroy(field2); + + TEST_END(); +} + +void test_performance_benchmarks(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_performance"; + + ZVecCollectionSchema *schema = zvec_collection_schema_create("perf_test"); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Create simple schema for performance testing + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + zvec_collection_schema_add_field(schema, id_field); + + ZVecFieldSchema *vec_field = + zvec_field_schema_create("vec", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + // clang-format off + ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_L2, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); /* NOLINT */ + // clang-format on + zvec_field_schema_set_hnsw_index(vec_field, &hnsw_params); + zvec_collection_schema_add_field(schema, vec_field); + + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + TEST_ASSERT(collection != NULL); + + if (collection) { + const size_t BATCH_SIZE = 1000; + const size_t TOTAL_DOCS = 10000; + + // Test bulk insertion performance +#ifdef _POSIX_C_SOURCE + struct timeval start_time, end_time; + gettimeofday(&start_time, NULL); +#else + clock_t start_clock = clock(); +#endif + + for (size_t batch_start = 0; batch_start < TOTAL_DOCS; + batch_start += BATCH_SIZE) { + ZVecDoc *batch_docs[BATCH_SIZE]; + size_t current_batch_size = (batch_start + BATCH_SIZE > TOTAL_DOCS) + ? TOTAL_DOCS - batch_start + : BATCH_SIZE; + + // Create batch of documents + for (size_t i = 0; i < current_batch_size; i++) { + batch_docs[i] = zvec_doc_create(); + zvec_doc_set_pk(batch_docs[i], zvec_test_make_pk(batch_start + i)); + + int64_t id = batch_start + i; + zvec_doc_add_field_by_value(batch_docs[i], "id", ZVEC_DATA_TYPE_INT64, + &id, sizeof(id)); + + // Create random vector + float vec[128]; + for (int j = 0; j < 128; j++) { + vec[j] = (float)rand() / RAND_MAX; + } + zvec_doc_add_field_by_value(batch_docs[i], "vec", + ZVEC_DATA_TYPE_VECTOR_FP32, vec, + sizeof(vec)); + } + + // Insert batch + size_t success_count, error_count; + err = zvec_collection_insert(collection, (const ZVecDoc **)batch_docs, + current_batch_size, &success_count, + &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == current_batch_size); + TEST_ASSERT(error_count == 0); + + // Cleanup batch documents + for (size_t i = 0; i < current_batch_size; i++) { + zvec_doc_destroy(batch_docs[i]); + } + } + +#ifdef _POSIX_C_SOURCE + gettimeofday(&end_time, NULL); + double insert_time = (end_time.tv_sec - start_time.tv_sec) + + (end_time.tv_usec - start_time.tv_usec) / 1000000.0; +#else + clock_t end_clock = clock(); + double insert_time = ((double)(end_clock - start_clock)) / CLOCKS_PER_SEC; +#endif + printf(" Inserted %zu documents in %.3f seconds (%.0f docs/sec)\n", + TOTAL_DOCS, insert_time, TOTAL_DOCS / insert_time); + + // Flush and optimize + zvec_collection_flush(collection); + zvec_collection_optimize(collection); + + // Test query performance + float query_vec[128]; + for (int i = 0; i < 128; i++) { + query_vec[i] = (float)rand() / RAND_MAX; + } + + ZVecVectorQuery query = {0}; + query.field_name = (ZVecString){.data = "vec", .length = 3}; + query.query_vector = (ZVecByteArray){.data = (uint8_t *)query_vec, + .length = sizeof(query_vec)}; + query.topk = 10; + query.include_vector = false; + query.include_doc_id = true; + + const int QUERY_COUNT = 100; +#ifdef _POSIX_C_SOURCE + struct timeval query_start_time, query_end_time; + gettimeofday(&query_start_time, NULL); +#else + clock_t query_start_clock = clock(); +#endif + + for (int q = 0; q < QUERY_COUNT; q++) { + ZVecDoc **results = NULL; + size_t result_count = 0; + + err = + zvec_collection_query(collection, &query, &results, &result_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(result_count <= 10); + + zvec_docs_free(results, result_count); + } + +#ifdef _POSIX_C_SOURCE + gettimeofday(&query_end_time, NULL); + double query_time = + (query_end_time.tv_sec - query_start_time.tv_sec) + + (query_end_time.tv_usec - query_start_time.tv_usec) / 1000000.0; +#else + clock_t query_end_clock = clock(); + double query_time = + ((double)(query_end_clock - query_start_clock)) / CLOCKS_PER_SEC; +#endif + double avg_query_time = + (query_time * 1000) / QUERY_COUNT; // ms per query + printf(" Average query time: %.2f ms\n", avg_query_time); + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +// ============================================================================= +// Additional tests for uncovered API functions +// ============================================================================= + +void test_zvec_shutdown(void) { + TEST_START(); + + // Test shutdown + ZVecErrorCode err = zvec_shutdown(); + TEST_ASSERT(err == ZVEC_OK); + + // Re-initialize for other tests + ZVecConfigData *config = zvec_config_data_create(); + TEST_ASSERT(config != NULL); + err = zvec_initialize(config); + TEST_ASSERT(err == ZVEC_OK); + zvec_config_data_destroy(config); + + TEST_END(); +} + +void test_index_params_creation_functions(void) { + TEST_START(); + + // Test HNSW parameters using macro + // clang-format off + ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_COSINE, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + TEST_ASSERT(hnsw_params.index_type == ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params.metric_type == ZVEC_METRIC_TYPE_COSINE); + TEST_ASSERT(hnsw_params.hnsw.m == 16); + TEST_ASSERT(hnsw_params.hnsw.ef_construction == 100); + TEST_ASSERT(hnsw_params.hnsw.ef_search == 50); + + // Test IVF parameters using macro + // clang-format off + ZVecIndexParams ivf_params = ZVEC_IVF_PARAMS(ZVEC_METRIC_TYPE_L2, 100, 10, true, 5, ZVEC_QUANTIZE_TYPE_INT8); + // clang-format on + TEST_ASSERT(ivf_params.index_type == ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(ivf_params.metric_type == ZVEC_METRIC_TYPE_L2); + TEST_ASSERT(ivf_params.ivf.n_list == 100); + TEST_ASSERT(ivf_params.ivf.n_iters == 10); + TEST_ASSERT(ivf_params.ivf.use_soar == true); + TEST_ASSERT(ivf_params.ivf.n_probe == 5); + + // Test Flat parameters using macro + // clang-format off + // clang-format off + ZVecIndexParams flat_params = + ZVEC_FLAT_PARAMS(ZVEC_METRIC_TYPE_IP, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + // clang-format on + TEST_ASSERT(flat_params.index_type == ZVEC_INDEX_TYPE_FLAT); + TEST_ASSERT(flat_params.metric_type == ZVEC_METRIC_TYPE_IP); + + // Test Invert parameters using macro + // clang-format off + ZVecIndexParams invert_params = ZVEC_INVERT_PARAMS(true, false); + // clang-format on + TEST_ASSERT(invert_params.index_type == ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(invert_params.invert.enable_range_optimization == true); + TEST_ASSERT(invert_params.invert.enable_extended_wildcard == false); + + TEST_END(); +} + +void test_collection_advanced_index_functions(void) { + TEST_START(); + + const char *temp_dir = "/tmp/zvec_test_advanced_index"; + zvec_test_delete_dir(temp_dir); + + // Create schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("test_collection"); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Add fields + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + ZVecFieldSchema *vec_field = + zvec_field_schema_create("vec", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + zvec_collection_schema_add_field(schema, id_field); + zvec_collection_schema_add_field(schema, vec_field); + + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + options.max_doc_count_per_segment = 1000; + ZVecCollection *collection = NULL; + + ZVecErrorCode err = zvec_collection_create_and_open(temp_dir, schema, + &options, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + // Test zvec_collection_create_flat_index + // clang-format off + ZVecIndexParams flat_params = + ZVEC_FLAT_PARAMS(ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + err = zvec_collection_create_flat_index(collection, "vec", &flat_params); + TEST_ASSERT(err == ZVEC_OK); + + // Test zvec_collection_create_ivf_index + // clang-format off + ZVecIndexParams ivf_params = ZVEC_IVF_PARAMS(ZVEC_METRIC_TYPE_L2, 100, 10, true, 5, ZVEC_QUANTIZE_TYPE_INT8); + // clang-format on + err = zvec_collection_drop_index(collection, + "vec"); // Drop previous index first + TEST_ASSERT(err == ZVEC_OK); + err = zvec_collection_create_ivf_index(collection, "vec", &ivf_params); + TEST_ASSERT(err == ZVEC_OK); + + // Test zvec_collection_create_hnsw_index + // clang-format off + ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_COSINE, 16, 100, 50, ZVEC_QUANTIZE_TYPE_FP16); + // clang-format on + err = zvec_collection_drop_index(collection, + "vec"); // Drop previous index first + TEST_ASSERT(err == ZVEC_OK); + err = zvec_collection_create_hnsw_index(collection, "vec", &hnsw_params); + TEST_ASSERT(err == ZVEC_OK); + + // Test zvec_field_schema_set_ivf_index + ZVecFieldSchema *new_vec_field = zvec_field_schema_create( + "vec2", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + TEST_ASSERT(new_vec_field != NULL); + // clang-format off + ZVecIndexParams ivf_params2 = ZVEC_IVF_PARAMS(ZVEC_METRIC_TYPE_IP, 50, 5, false, 3, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + zvec_field_schema_set_ivf_index(new_vec_field, &ivf_params2); + TEST_ASSERT(new_vec_field->has_index == true); + zvec_field_schema_destroy(new_vec_field); + + zvec_collection_destroy(collection); + } + zvec_collection_schema_destroy(schema); + } + + zvec_test_delete_dir(temp_dir); + TEST_END(); +} + +void test_collection_query_functions(void) { + TEST_START(); + + const char *temp_dir = "/tmp/zvec_test_query_funcs"; + zvec_test_delete_dir(temp_dir); + + // Create schema and collection + ZVecCollectionSchema *schema = zvec_collection_schema_create("query_test"); + // clang-format off + ZVecIndexParams hnsw_params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_L2, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + // clang-format on + + ZVecFieldSchema *name_field = + zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); + ZVecFieldSchema *vec_field = + zvec_field_schema_create("vec", ZVEC_DATA_TYPE_VECTOR_FP32, false, 4); + zvec_field_schema_set_hnsw_index(vec_field, &hnsw_params); + + zvec_collection_schema_add_field(schema, name_field); + zvec_collection_schema_add_field(schema, vec_field); + + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + // Insert test documents + ZVecDoc *doc1 = zvec_doc_create(); + zvec_doc_set_pk(doc1, "doc1"); + float vec1[4] = {1.0f, 0.0f, 0.0f, 0.0f}; + zvec_doc_add_field_by_value(doc1, "vec", ZVEC_DATA_TYPE_VECTOR_FP32, vec1, + sizeof(vec1)); + zvec_doc_add_field_by_value(doc1, "name", ZVEC_DATA_TYPE_STRING, + "document1", 9); + + ZVecDoc *doc2 = zvec_doc_create(); + zvec_doc_set_pk(doc2, "doc2"); + float vec2[4] = {0.0f, 1.0f, 0.0f, 0.0f}; + zvec_doc_add_field_by_value(doc2, "vec", ZVEC_DATA_TYPE_VECTOR_FP32, vec2, + sizeof(vec2)); + zvec_doc_add_field_by_value(doc2, "name", ZVEC_DATA_TYPE_STRING, + "document2", 9); + + ZVecDoc *docs[] = {doc1, doc2}; + size_t success_count, error_count; + err = zvec_collection_insert(collection, (const ZVecDoc **)docs, 2, + &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + + zvec_collection_flush(collection); + zvec_collection_optimize(collection); + + // Test zvec_collection_fetch + const char *pks[] = {"doc1", "doc2"}; + ZVecDoc **results = NULL; + size_t found_count = 0; + err = zvec_collection_fetch(collection, pks, 2, &results, &found_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(found_count == 2); + zvec_docs_free(results, found_count); + + // Test zvec_collection_query_by_group + ZVecGroupByVectorQuery group_query = {0}; + group_query.field_name = ZVEC_STRING("vec"); + float query_vec[4] = {0.5f, 0.5f, 0.0f, 0.0f}; + group_query.query_vector.data = (uint8_t *)query_vec; + group_query.query_vector.length = sizeof(query_vec); + group_query.group_by_field_name = ZVEC_STRING("name"); + group_query.group_count = 2; + group_query.group_topk = 1; + group_query.include_vector = false; + + ZVecStringArray output_fields = {0}; + output_fields.count = 1; + output_fields.strings = + (ZVecString *)malloc(sizeof(ZVecString) * output_fields.count); + output_fields.strings[0] = ZVEC_STRING("name"); + group_query.output_fields = output_fields; + + ZVecDoc **group_results = NULL; + ZVecString **group_values = NULL; + size_t group_result_count = 0; + err = + zvec_collection_query_by_group(collection, &group_query, &group_results, + &group_values, &group_result_count); + TEST_ASSERT(err == ZVEC_OK); + if (group_results) { + zvec_docs_free(group_results, group_result_count); + } + if (group_values) { + for (size_t i = 0; i < group_result_count; i++) { + zvec_free_string(group_values[i]); + } + free(group_values); + } + + free(output_fields.strings); + + // Test zvec_collection_get_options + ZVecCollectionOptions *options = NULL; + err = zvec_collection_get_options(collection, &options); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(options != NULL); + free(options); + + zvec_collection_destroy(collection); + zvec_doc_destroy(doc1); + zvec_doc_destroy(doc2); + } + + zvec_collection_schema_destroy(schema); + zvec_test_delete_dir(temp_dir); + + TEST_END(); +} + +void test_doc_advanced_functions(void) { + TEST_START(); + + // Test zvec_doc_clear + ZVecDoc *doc = zvec_doc_create(); + zvec_doc_set_pk(doc, "test_pk"); + zvec_doc_add_field_by_value(doc, "field1", ZVEC_DATA_TYPE_INT32, + &(int32_t){100}, sizeof(int32_t)); + TEST_ASSERT(zvec_doc_get_field_count(doc) > 0); + zvec_doc_clear(doc); + TEST_ASSERT(zvec_doc_get_field_count(doc) == 0); + + // Test zvec_doc_get_pk_copy + zvec_doc_set_pk(doc, "test_pk_copy"); + const char *pk_copy = zvec_doc_get_pk_copy(doc); + TEST_ASSERT(pk_copy != NULL); + TEST_ASSERT(strcmp(pk_copy, "test_pk_copy") == 0); + free((void *)pk_copy); + + // Test zvec_doc_is_empty + ZVecDoc *empty_doc = zvec_doc_create(); + TEST_ASSERT(zvec_doc_is_empty(empty_doc) == true); + zvec_doc_add_field_by_value(empty_doc, "test", ZVEC_DATA_TYPE_INT32, + &(int32_t){1}, sizeof(int32_t)); + TEST_ASSERT(zvec_doc_is_empty(empty_doc) == false); + zvec_doc_destroy(empty_doc); + + // Test zvec_doc_memory_usage + ZVecDoc *mem_doc = zvec_doc_create(); + zvec_doc_set_pk(mem_doc, "memory_test"); + char large_data[1024]; + memset(large_data, 'A', sizeof(large_data)); + zvec_doc_add_field_by_value(mem_doc, "large_field", ZVEC_DATA_TYPE_STRING, + large_data, sizeof(large_data)); + size_t mem_usage = zvec_doc_memory_usage(mem_doc); + TEST_ASSERT(mem_usage > 0); + zvec_doc_destroy(mem_doc); + + // Test zvec_doc_merge + ZVecDoc *doc1 = zvec_doc_create(); + zvec_doc_set_pk(doc1, "merge_test"); + zvec_doc_add_field_by_value(doc1, "field1", ZVEC_DATA_TYPE_INT32, + &(int32_t){100}, sizeof(int32_t)); + + ZVecDoc *doc2 = zvec_doc_create(); + zvec_doc_add_field_by_value(doc2, "field2", ZVEC_DATA_TYPE_STRING, "merged", + 6); + + zvec_doc_merge(doc1, doc2); + TEST_ASSERT(zvec_doc_has_field(doc1, "field1") == true); + TEST_ASSERT(zvec_doc_has_field(doc1, "field2") == true); + + zvec_doc_destroy(doc1); + zvec_doc_destroy(doc2); + + // Test zvec_doc_validate + ZVecCollectionSchema *schema = zvec_collection_schema_create("validate_test"); + ZVecFieldSchema *val_field = + zvec_field_schema_create("test_field", ZVEC_DATA_TYPE_INT32, false, 0); + zvec_collection_schema_add_field(schema, val_field); + + ZVecDoc *val_doc = zvec_doc_create(); + zvec_doc_set_pk(val_doc, "test_pk"); + zvec_doc_add_field_by_value(val_doc, "test_field", ZVEC_DATA_TYPE_INT32, + &(int32_t){42}, sizeof(int32_t)); + + char *error_msg = NULL; + ZVecErrorCode err = zvec_doc_validate(val_doc, schema, false, &error_msg); + TEST_ASSERT(err == ZVEC_OK); + if (error_msg) { + free(error_msg); + } + + zvec_doc_destroy(val_doc); + zvec_collection_schema_destroy(schema); + zvec_doc_destroy(doc); + + // Test zvec_doc_to_detail_string + ZVecDoc *detail_doc = zvec_doc_create(); + zvec_doc_set_pk(detail_doc, "detail_test"); + zvec_doc_add_field_by_value(detail_doc, "int_field", ZVEC_DATA_TYPE_INT32, + &(int32_t){12345}, sizeof(int32_t)); + zvec_doc_add_field_by_value(detail_doc, "str_field", ZVEC_DATA_TYPE_STRING, + "hello", 5); + + char *detail_str = NULL; + err = zvec_doc_to_detail_string(detail_doc, &detail_str); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(detail_str != NULL); + printf(" Document detail: %s\n", detail_str); + free(detail_str); + + zvec_doc_destroy(detail_doc); + + TEST_END(); +} + +void test_array_memory_functions(void) { + TEST_START(); + + // Test ZVecStringArray + ZVecStringArray *str_array = zvec_string_array_create(3); + TEST_ASSERT(str_array != NULL); + if (str_array) { + TEST_ASSERT(str_array->count == 3); + TEST_ASSERT(str_array->strings != NULL); + + // Add strings at specific indices + zvec_string_array_add(str_array, 0, "string1"); + zvec_string_array_add(str_array, 1, "string2"); + zvec_string_array_add(str_array, 2, "string3"); + + // Verify strings were added + TEST_ASSERT(strcmp(str_array->strings[0].data, "string1") == 0); + TEST_ASSERT(strcmp(str_array->strings[1].data, "string2") == 0); + TEST_ASSERT(strcmp(str_array->strings[2].data, "string3") == 0); + zvec_string_array_destroy(str_array); + } + + // Test ZVecMutableByteArray + ZVecMutableByteArray *byte_array = zvec_byte_array_create(1024); + TEST_ASSERT(byte_array != NULL); + if (byte_array) { + TEST_ASSERT(byte_array->capacity == 1024); + TEST_ASSERT(byte_array->length == 0); + TEST_ASSERT(byte_array->data != NULL); + + // Write some data + byte_array->data[0] = 0x01; + byte_array->data[1] = 0x02; + byte_array->data[2] = 0x03; + byte_array->length = 3; + + TEST_ASSERT(byte_array->length == 3); + TEST_ASSERT(byte_array->data[0] == 0x01); + TEST_ASSERT(byte_array->data[1] == 0x02); + TEST_ASSERT(byte_array->data[2] == 0x03); + + zvec_byte_array_destroy(byte_array); + } + + // Test ZVecFloatArray + ZVecFloatArray *float_array = zvec_float_array_create(10); + TEST_ASSERT(float_array != NULL); + if (float_array) { + TEST_ASSERT(float_array->length == 10); + TEST_ASSERT(float_array->data != NULL); + + // Note: Data is initialized to 0 by zvec_float_array_create + // The const qualifier indicates this is typically used for read-only access + // For testing, we verify the allocation succeeded and length is correct + TEST_ASSERT(float_array->data[0] == 0.0f); + TEST_ASSERT(float_array->data[9] == 0.0f); + + zvec_float_array_destroy(float_array); + } + + // Test ZVecInt64Array + ZVecInt64Array *int64_array = zvec_int64_array_create(5); + TEST_ASSERT(int64_array != NULL); + if (int64_array) { + TEST_ASSERT(int64_array->length == 5); + TEST_ASSERT(int64_array->data != NULL); + + // Note: Data is initialized to 0 by zvec_int64_array_create + // The const qualifier indicates this is typically used for read-only access + TEST_ASSERT(int64_array->data[0] == 0); + TEST_ASSERT(int64_array->data[4] == 0); + + zvec_int64_array_destroy(int64_array); + } + + // Test edge case: create with zero size + ZVecMutableByteArray *zero_array = zvec_byte_array_create(0); + TEST_ASSERT(zero_array != NULL); + if (zero_array) { + zvec_byte_array_destroy(zero_array); + } + + TEST_END(); +} + +// ============================================================================= +// Main function +// ============================================================================= + +int main(void) { + printf("Starting comprehensive C API tests...\n\n"); + + // Clean up previous test directories + printf("Cleaning up previous test directories...\n"); + system("rm -rf /tmp/zvec_test_*"); + printf("Cleanup completed.\n\n"); + + test_version_functions(); + test_error_handling_functions(); + test_zvec_config(); + test_zvec_initialize(); + test_zvec_string_functions(); + + // Schema-related tests + test_schema_basic_operations(); + test_schema_edge_cases(); + test_schema_field_operations(); + test_normal_schema_creation(); + test_schema_with_indexes(); + test_schema_max_doc_count(); + + // Field-related tests + test_field_schema_functions(); + test_field_helper_functions(); + test_field_ddl_operations(); + + // Collection-related tests + test_collection_basic_operations(); + test_collection_edge_cases(); + test_collection_delete_by_filter(); + test_collection_stats(); + test_collection_stats_functions(); + test_collection_dml_functions(); + test_collection_nullable_roundtrip(); + test_collection_ddl_operations(); + + // Doc-related tests + test_doc_creation(); + test_doc_primary_key(); + test_doc_basic_operations(); + test_doc_null_field_api(); + test_doc_get_field_value_basic(); + test_doc_get_field_value_copy(); + test_doc_get_field_value_pointer(); + test_doc_field_operations(); + test_doc_error_conditions(); + test_doc_serialization(); + test_doc_add_field_by_value(); + test_doc_add_field_by_struct(); + + // Index tests + test_index_params(); + test_index_params_functions(); + test_index_params_api_functions(); + test_index_creation_and_management(); + + // Query tests + test_query_params_functions(); + test_actual_vector_queries(); + + // Performance tests + // test_performance_benchmarks(); + + // Utility function tests + test_utility_functions(); + + // Memory management tests + test_memory_management_functions(); + + // Additional API coverage tests + test_zvec_shutdown(); + test_index_params_creation_functions(); + test_collection_advanced_index_functions(); + test_collection_query_functions(); + test_doc_advanced_functions(); + test_array_memory_functions(); + + printf("\n=== Comprehensive Test Summary ===\n"); + printf("Total tests: %d\n", test_count); + printf("Passed: %d\n", passed_count); + printf("Failed: %d\n", test_count - passed_count); + + return test_count == passed_count ? 0 : 1; +} diff --git a/tests/c_api/utils.c b/tests/c_api/utils.c new file mode 100644 index 00000000..7d287761 --- /dev/null +++ b/tests/c_api/utils.c @@ -0,0 +1,922 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "utils.h" +#include +#include +#include +#include + +// ============================================================================= +// Internal Helper Functions +// ============================================================================= + +static char *strdup_safe(const char *str) { + if (!str) return NULL; + size_t len = strlen(str) + 1; + char *copy = (char *)malloc(len); + if (copy) { + memcpy(copy, str, len); + } + return copy; +} + +// ============================================================================= +// Schema Creation Helper Functions Implementation +// ============================================================================= + +ZVecCollectionSchema *zvec_test_create_temp_schema(void) { + // Create collection schema using C API + ZVecCollectionSchema *schema = zvec_collection_schema_create("demo"); + schema->max_doc_count_per_segment = 1000; + + // Create index parameters using C API (using new flat structure with macros) + ZVecIndexParams invert_params = ZVEC_INVERT_PARAMS(true, true); + ZVecIndexParams dense_hnsw_params = ZVEC_HNSW_PARAMS( + ZVEC_METRIC_TYPE_L2, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + ZVecIndexParams sparse_hnsw_params = ZVEC_HNSW_PARAMS( + ZVEC_METRIC_TYPE_IP, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + + + // Create and add fields + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + zvec_field_schema_set_invert_index(id_field, &invert_params); + zvec_collection_schema_add_field(schema, id_field); + + // Create name field (inverted index without optimization) + ZVecIndexParams name_invert_params = ZVEC_INVERT_PARAMS(false, false); + ZVecFieldSchema *name_field = + zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); + zvec_field_schema_set_invert_index(name_field, &name_invert_params); + zvec_collection_schema_add_field(schema, name_field); + + // Create weight field (no index) + ZVecFieldSchema *weight_field = + zvec_field_schema_create("weight", ZVEC_DATA_TYPE_FLOAT, true, 0); + zvec_collection_schema_add_field(schema, weight_field); + + // Create dense field (HNSW index) + ZVecFieldSchema *dense_field = + zvec_field_schema_create("dense", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + zvec_field_schema_set_hnsw_index(dense_field, &dense_hnsw_params); + zvec_collection_schema_add_field(schema, dense_field); + + // Create sparse field (HNSW index) + ZVecFieldSchema *sparse_field = zvec_field_schema_create( + "sparse", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, false, 0); + zvec_field_schema_set_hnsw_index(sparse_field, &sparse_hnsw_params); + zvec_collection_schema_add_field(schema, sparse_field); + + return schema; +} + +ZVecCollectionSchema *zvec_test_create_scalar_schema(void) { + // Create collection schema using C API + ZVecCollectionSchema *schema = zvec_collection_schema_create("demo"); + + // Create fields + ZVecFieldSchema *int32_field = + zvec_field_schema_create("int32", ZVEC_DATA_TYPE_INT32, false, 0); + zvec_collection_schema_add_field(schema, int32_field); + + ZVecFieldSchema *string_field = + zvec_field_schema_create("string", ZVEC_DATA_TYPE_STRING, false, 0); + zvec_collection_schema_add_field(schema, string_field); + + return schema; +} + +ZVecCollectionSchema *zvec_test_create_normal_schema( + bool nullable, const char *name, const ZVecIndexParams *scalar_index_params, + const ZVecIndexParams *vector_index_params, uint64_t max_doc_count) { + // Create collection schema using C API + ZVecCollectionSchema *schema = + zvec_collection_schema_create(name ? name : "demo"); + schema->max_doc_count_per_segment = max_doc_count; + + // Create scalar fields (8) + const char *scalar_names[] = {"int32", "string", "uint32", "bool", + "float", "double", "int64", "uint64"}; + ZVecDataType scalar_types[] = {ZVEC_DATA_TYPE_INT32, ZVEC_DATA_TYPE_STRING, + ZVEC_DATA_TYPE_UINT32, ZVEC_DATA_TYPE_BOOL, + ZVEC_DATA_TYPE_FLOAT, ZVEC_DATA_TYPE_DOUBLE, + ZVEC_DATA_TYPE_INT64, ZVEC_DATA_TYPE_UINT64}; + + for (int i = 0; i < 8; i++) { + ZVecFieldSchema *field = + zvec_field_schema_create(scalar_names[i], scalar_types[i], nullable, 0); + if (scalar_index_params) { + zvec_field_schema_set_invert_index(field, scalar_index_params); + } + zvec_collection_schema_add_field(schema, field); + } + + // Create array fields (8) + const char *array_names[] = {"array_int32", "array_string", "array_uint32", + "array_bool", "array_float", "array_double", + "array_int64", "array_uint64"}; + ZVecDataType array_types[] = { + ZVEC_DATA_TYPE_ARRAY_INT32, ZVEC_DATA_TYPE_ARRAY_STRING, + ZVEC_DATA_TYPE_ARRAY_UINT32, ZVEC_DATA_TYPE_ARRAY_BOOL, + ZVEC_DATA_TYPE_ARRAY_FLOAT, ZVEC_DATA_TYPE_ARRAY_DOUBLE, + ZVEC_DATA_TYPE_ARRAY_INT64, ZVEC_DATA_TYPE_ARRAY_UINT64}; + + for (int i = 0; i < 8; i++) { + ZVecFieldSchema *field = + zvec_field_schema_create(array_names[i], array_types[i], nullable, 0); + if (scalar_index_params) { + zvec_field_schema_set_invert_index(field, scalar_index_params); + } + zvec_collection_schema_add_field(schema, field); + } + + // Create vector fields (5) + // dense vectors + ZVecFieldSchema *dense_fp32 = zvec_field_schema_create( + "dense_fp32", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + if (vector_index_params) { + zvec_field_schema_set_hnsw_index(dense_fp32, vector_index_params); + } + zvec_collection_schema_add_field(schema, dense_fp32); + + ZVecFieldSchema *dense_fp16 = zvec_field_schema_create( + "dense_fp16", ZVEC_DATA_TYPE_VECTOR_FP16, false, 128); + ZVecIndexParams flat_params1 = + ZVEC_FLAT_PARAMS(ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + zvec_field_schema_set_flat_index(dense_fp16, &flat_params1); + zvec_collection_schema_add_field(schema, dense_fp16); + + ZVecFieldSchema *dense_int8 = zvec_field_schema_create( + "dense_int8", ZVEC_DATA_TYPE_VECTOR_INT8, false, 128); + ZVecIndexParams flat_params2 = + ZVEC_FLAT_PARAMS(ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + zvec_field_schema_set_flat_index(dense_int8, &flat_params2); + zvec_collection_schema_add_field(schema, dense_int8); + + // sparse vectors + ZVecFieldSchema *sparse_fp32 = zvec_field_schema_create( + "sparse_fp32", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, false, 0); + if (vector_index_params) { + zvec_field_schema_set_hnsw_index(sparse_fp32, vector_index_params); + } + zvec_collection_schema_add_field(schema, sparse_fp32); + + ZVecFieldSchema *sparse_fp16 = zvec_field_schema_create( + "sparse_fp16", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16, false, 0); + ZVecIndexParams flat_params3 = + ZVEC_FLAT_PARAMS(ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + zvec_field_schema_set_flat_index(sparse_fp16, &flat_params3); + zvec_collection_schema_add_field(schema, sparse_fp16); + + return schema; +} + +ZVecCollectionSchema *zvec_test_create_schema_with_scalar_index( + bool nullable, bool enable_optimize, const char *name) { + ZVecIndexParams *invert_params = + zvec_test_create_default_invert_params(enable_optimize); + ZVecCollectionSchema *schema = + zvec_test_create_normal_schema(nullable, name, invert_params, NULL, 1000); + free(invert_params); + return schema; +} + +ZVecCollectionSchema *zvec_test_create_schema_with_vector_index( + bool nullable, const char *name, + const ZVecIndexParams *vector_index_params) { + ZVecIndexParams *default_params = NULL; + if (!vector_index_params) { + default_params = zvec_test_create_default_hnsw_params(); + } + + ZVecCollectionSchema *schema = zvec_test_create_normal_schema( + nullable, name, NULL, + vector_index_params ? vector_index_params : default_params, 1000); + + if (default_params) { + free(default_params); + } + + return schema; +} + +ZVecCollectionSchema *zvec_test_create_schema_with_max_doc_count( + uint64_t doc_count) { + return zvec_test_create_normal_schema(false, "demo", NULL, NULL, doc_count); +} + +// ============================================================================= +// Document Creation Helper Functions Implementation +// ============================================================================= + +char *zvec_test_make_pk(uint64_t doc_id) { + char *pk = (char *)malloc(32); // Sufficiently large buffer + if (pk) { + snprintf(pk, 32, "pk_%llu", (unsigned long long)doc_id); + } + return pk; +} + +uint64_t zvec_test_extract_doc_id(const char *pk) { + if (!pk || strlen(pk) < 4) return 0; + return strtoull(pk + 3, NULL, 10); +} + +ZVecDoc *zvec_test_create_doc(uint64_t doc_id, + const ZVecCollectionSchema *schema, + const char *pk) { + if (!schema) return NULL; + ZVecDoc *doc = zvec_doc_create(); + if (!doc) return NULL; + + // Set primary key + char *primary_key = pk ? strdup_safe(pk) : zvec_test_make_pk(doc_id); + if (primary_key) { + zvec_doc_set_pk(doc, primary_key); + free(primary_key); + } + + // Create test data for each field + for (size_t i = 0; i < schema->field_count; i++) { + // Fix type mismatch issue - remove address operator + const ZVecFieldSchema *field = schema->fields[i]; + // Remove unused variable + // ZVecErrorCode err = ZVEC_OK; + + switch (field->data_type) { + case ZVEC_DATA_TYPE_BINARY: { + char binary_str[32]; + snprintf(binary_str, sizeof(binary_str), "binary_%llu", + (unsigned long long)doc_id); + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + binary_str, strlen(binary_str)); + break; + } + case ZVEC_DATA_TYPE_BOOL: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(bool){doc_id % 10 == 0}, sizeof(bool)); + break; + } + case ZVEC_DATA_TYPE_INT32: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(int32_t){(int32_t)doc_id}, + sizeof(int32_t)); + break; + } + case ZVEC_DATA_TYPE_INT64: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(int64_t){(int64_t)doc_id}, + sizeof(int64_t)); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(uint32_t){(uint32_t)doc_id}, + sizeof(uint32_t)); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(uint64_t){(uint64_t)doc_id}, + sizeof(uint64_t)); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(float){(float)doc_id}, sizeof(float)); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(double){(double)doc_id}, sizeof(double)); + break; + } + case ZVEC_DATA_TYPE_STRING: { + char string_val[64]; + snprintf(string_val, sizeof(string_val), "value_%llu", + (unsigned long long)doc_id); + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + string_val, strlen(string_val)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_BOOL: { + bool bool_array[10]; + for (int j = 0; j < 10; j++) { + bool_array[j] = (doc_id + j) % 2 == 0; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + bool_array, sizeof(bool_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + int32_t int32_array[10]; + for (int j = 0; j < 10; j++) { + int32_array[j] = (int32_t)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + int32_array, sizeof(int32_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + int64_t int64_array[10]; + for (int j = 0; j < 10; j++) { + int64_array[j] = (int64_t)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + int64_array, sizeof(int64_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + uint32_t uint32_array[10]; + for (int j = 0; j < 10; j++) { + uint32_array[j] = (uint32_t)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + uint32_array, sizeof(uint32_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + uint64_t uint64_array[10]; + for (int j = 0; j < 10; j++) { + uint64_array[j] = (uint64_t)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + uint64_array, sizeof(uint64_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + float float_array[10]; + for (int j = 0; j < 10; j++) { + float_array[j] = (float)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + float_array, sizeof(float_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + double double_array[10]; + for (int j = 0; j < 10; j++) { + double_array[j] = (double)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + double_array, sizeof(double_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_STRING: { + // String arrays need special handling + char string_data[256]; + size_t offset = 0; + for (int j = 0; j < 10; j++) { + char temp_str[32]; + snprintf(temp_str, sizeof(temp_str), "value_%llu_%d", + (unsigned long long)doc_id, j); + size_t len = strlen(temp_str); + if (offset + len + 1 < sizeof(string_data)) { + strcpy(string_data + offset, temp_str); + offset += len + 1; + } + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + string_data, offset); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + uint32_t *vector_data = + (uint32_t *)malloc(field->dimension * sizeof(uint32_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (uint32_t)(doc_id + j); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(uint32_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + uint64_t *vector_data = + (uint64_t *)malloc(field->dimension * sizeof(uint64_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (uint64_t)(doc_id + j); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(uint64_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + float *vector_data = (float *)malloc(field->dimension * sizeof(float)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(float)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + double *vector_data = + (double *)malloc(field->dimension * sizeof(double)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (double)(doc_id + j * 0.1); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(double)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + // FP16 needs special handling, simplified to FP32 here + float *vector_data = (float *)malloc(field->dimension * sizeof(float)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(float)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + int8_t *vector_data = + (int8_t *)malloc(field->dimension * sizeof(int8_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (int8_t)((doc_id + j) % 256); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(int8_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + int16_t *vector_data = + (int16_t *)malloc(field->dimension * sizeof(int16_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (int16_t)((doc_id + j) % 65536); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(int16_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + // Sparse vectors need special handling + uint32_t nnz = field->dimension > 0 + ? field->dimension / 10 + : 10; // Number of non-zero elements + size_t sparse_size = + sizeof(uint32_t) + nnz * (sizeof(uint32_t) + sizeof(float)); + void *sparse_data = malloc(sparse_size); + if (sparse_data) { + uint32_t *data_ptr = (uint32_t *)sparse_data; + *data_ptr = nnz; // Set number of non-zero elements + uint32_t *indices = data_ptr + 1; + float *values = (float *)(indices + nnz); + for (uint32_t j = 0; j < nnz; j++) { + indices[j] = j * 10; // Index + values[j] = (float)(doc_id + j * 0.1); // Value + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + sparse_data, sparse_size); + free(sparse_data); + } + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { + // Sparse FP16 vectors, simplified handling + uint32_t nnz = field->dimension > 0 ? field->dimension / 10 : 10; + size_t sparse_size = + sizeof(uint32_t) + + nnz * (sizeof(uint32_t) + + sizeof(float)); // Still use float for storage + void *sparse_data = malloc(sparse_size); + if (sparse_data) { + uint32_t *data_ptr = (uint32_t *)sparse_data; + *data_ptr = nnz; + uint32_t *indices = data_ptr + 1; + float *values = (float *)(indices + nnz); + for (uint32_t j = 0; j < nnz; j++) { + indices[j] = j * 10; + values[j] = (float)(doc_id + j * 0.1); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + sparse_data, sparse_size); + free(sparse_data); + } + break; + } + + default: + // Unsupported data type + break; + } + + // Remove reference to removed variable err + /* + if (err != ZVEC_OK) { + // Error handling: continue processing other fields + } + */ + } + + return doc; +} + +ZVecDoc *zvec_test_create_doc_null(uint64_t doc_id, + const ZVecCollectionSchema *schema, + const char *pk) { + // Reuse create_doc function, but only process vector fields + ZVecDoc *doc = zvec_doc_create(); + if (!doc) return NULL; + + // Set primary key + char *primary_key = pk ? strdup_safe(pk) : zvec_test_make_pk(doc_id); + if (primary_key) { + zvec_doc_set_pk(doc, primary_key); + free(primary_key); + } + + // Only create data for vector fields + for (size_t i = 0; i < schema->field_count; i++) { + const ZVecFieldSchema *field = schema->fields[i]; + + // Only process specific vector type fields + if (field->data_type != ZVEC_DATA_TYPE_VECTOR_FP32 && + field->data_type != ZVEC_DATA_TYPE_VECTOR_FP16 && + field->data_type != ZVEC_DATA_TYPE_VECTOR_INT8 && + field->data_type != ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 && + field->data_type != ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16) { + continue; + } + + ZVecErrorCode err = ZVEC_OK; + + switch (field->data_type) { + case ZVEC_DATA_TYPE_VECTOR_FP32: { + float *vector_data = (float *)malloc(field->dimension * sizeof(float)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, vector_data, + field->dimension * sizeof(float)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + double *vector_data = + (double *)malloc(field->dimension * sizeof(double)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (double)(doc_id + j * 0.1); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, vector_data, + field->dimension * sizeof(double)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + float *vector_data = (float *)malloc(field->dimension * sizeof(float)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, vector_data, + field->dimension * sizeof(float)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + int8_t *vector_data = + (int8_t *)malloc(field->dimension * sizeof(int8_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (int8_t)(doc_id % 128); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, vector_data, + field->dimension * sizeof(int8_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + int16_t *vector_data = + (int16_t *)malloc(field->dimension * sizeof(int16_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (int16_t)(doc_id % 32768); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, vector_data, + field->dimension * sizeof(int16_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + const size_t nnz = 100; + size_t sparse_size = + sizeof(size_t) + nnz * (sizeof(uint32_t) + sizeof(float)); + char *sparse_data = (char *)malloc(sparse_size); + if (sparse_data) { + char *ptr = sparse_data; + *((size_t *)ptr) = nnz; + ptr += sizeof(size_t); + + for (size_t j = 0; j < nnz; j++) { + *((uint32_t *)ptr) = (uint32_t)j; + ptr += sizeof(uint32_t); + *((float *)ptr) = (float)(doc_id + j * 0.1); + ptr += sizeof(float); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, sparse_data, + sparse_size); + free(sparse_data); + } + break; + } + default: + break; + } + + + if (err != ZVEC_OK) { + zvec_doc_destroy(doc); + return NULL; + } + } + + return doc; +} + +ZVecDoc *zvec_test_create_doc_with_fields(uint64_t doc_id, + const char **field_names, + const ZVecDataType *field_types, + size_t field_count, const char *pk) { + ZVecDoc *doc = zvec_doc_create(); + if (!doc) return NULL; + + // Set primary key + char *primary_key = pk ? strdup_safe(pk) : zvec_test_make_pk(doc_id); + if (primary_key) { + zvec_doc_set_pk(doc, primary_key); + free(primary_key); + } + + // Create data for specified fields + for (size_t i = 0; i < field_count; i++) { + ZVecErrorCode err = ZVEC_OK; + + switch (field_types[i]) { + case ZVEC_DATA_TYPE_INT32: + err = zvec_doc_add_field_by_value(doc, field_names[i], field_types[i], + &(int32_t){(int32_t)doc_id}, + sizeof(int32_t)); + break; + case ZVEC_DATA_TYPE_STRING: { + char string_val[64]; + snprintf(string_val, sizeof(string_val), "value_%llu", + (unsigned long long)doc_id); + err = zvec_doc_add_field_by_value(doc, field_names[i], field_types[i], + string_val, strlen(string_val)); + break; + } + case ZVEC_DATA_TYPE_FLOAT: + err = + zvec_doc_add_field_by_value(doc, field_names[i], field_types[i], + &(float){(float)doc_id}, sizeof(float)); + break; + case ZVEC_DATA_TYPE_VECTOR_FP32: { + float vector_data[128]; + for (int j = 0; j < 128; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + err = zvec_doc_add_field_by_value(doc, field_names[i], field_types[i], + vector_data, sizeof(vector_data)); + break; + } + default: + // Other types can be added here + break; + } + + if (err != ZVEC_OK) { + zvec_doc_destroy(doc); + return NULL; + } + } + + return doc; +} + +// ============================================================================= +// Index Parameter Creation Helper Functions Implementation +// ============================================================================= + +ZVecIndexParams *zvec_test_create_default_hnsw_params(void) { + ZVecIndexParams *params = (ZVecIndexParams *)malloc(sizeof(ZVecIndexParams)); + if (!params) return NULL; + + *params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_IP, 16, 100, 50, + ZVEC_QUANTIZE_TYPE_UNDEFINED); + + return params; +} + +ZVecIndexParams *zvec_test_create_default_flat_params(void) { + ZVecIndexParams *params = (ZVecIndexParams *)malloc(sizeof(ZVecIndexParams)); + if (!params) return NULL; + + *params = ZVEC_FLAT_PARAMS(ZVEC_METRIC_TYPE_IP, ZVEC_QUANTIZE_TYPE_UNDEFINED); + + return params; +} + +ZVecIndexParams *zvec_test_create_default_invert_params(bool enable_optimize) { + ZVecIndexParams *params = (ZVecIndexParams *)malloc(sizeof(ZVecIndexParams)); + if (!params) return NULL; + + *params = ZVEC_INVERT_PARAMS(enable_optimize, enable_optimize); + + return params; +} + +// ============================================================================= +// Field Schema Creation Helper Functions Implementation +// ============================================================================= + +ZVecFieldSchema *zvec_test_create_scalar_field( + const char *name, ZVecDataType data_type, bool nullable, + const ZVecIndexParams *invert_params) { + ZVecFieldSchema *field = (ZVecFieldSchema *)malloc(sizeof(ZVecFieldSchema)); + if (!field) return NULL; + + field->name = (ZVecString *)malloc(sizeof(ZVecString)); + if (!field->name) { + free(field); + return NULL; + } + field->name->data = name ? strdup(name) : NULL; + field->name->length = name ? strlen(name) : 0; + field->name->capacity = name ? strlen(name) + 1 : 0; + field->data_type = data_type; + field->nullable = nullable; + field->dimension = 0; + field->has_index = (invert_params != NULL); + if (invert_params) { + field->index_params = *invert_params; + } + + return field; +} + +ZVecFieldSchema *zvec_test_create_vector_field( + const char *name, ZVecDataType data_type, uint32_t dimension, bool nullable, + const ZVecIndexParams *vector_index_params) { + ZVecFieldSchema *field = (ZVecFieldSchema *)malloc(sizeof(ZVecFieldSchema)); + if (!field) return NULL; + + field->name = (ZVecString *)malloc(sizeof(ZVecString)); + if (!field->name) { + free(field); + return NULL; + } + field->name->data = name ? strdup(name) : NULL; + field->name->length = name ? strlen(name) : 0; + field->name->capacity = name ? strlen(name) + 1 : 0; + field->data_type = data_type; + field->nullable = nullable; + field->dimension = dimension; + field->has_index = (vector_index_params != NULL); + if (vector_index_params) { + field->index_params = *vector_index_params; + } + + return field; +} + +ZVecFieldSchema *zvec_test_create_sparse_vector_field( + const char *name, ZVecDataType data_type, bool nullable, + const ZVecIndexParams *vector_index_params) { + ZVecFieldSchema *field = (ZVecFieldSchema *)malloc(sizeof(ZVecFieldSchema)); + if (!field) return NULL; + + field->name = (ZVecString *)malloc(sizeof(ZVecString)); + if (!field->name) { + free(field); + return NULL; + } + field->name->data = name ? strdup(name) : NULL; + field->name->length = name ? strlen(name) : 0; + field->name->capacity = name ? strlen(name) + 1 : 0; + field->data_type = data_type; + field->nullable = nullable; + field->dimension = 0; + field->has_index = (vector_index_params != NULL); + if (vector_index_params) { + field->index_params = *vector_index_params; + } + + return field; +} + +// ============================================================================= +// Memory Management Helper Functions Implementation +// ============================================================================= + +void zvec_test_free_field_schemas(ZVecFieldSchema *fields, size_t count) { + if (!fields) return; + + for (size_t i = 0; i < count; i++) { + if (fields[i].name) { + if (fields[i].name->data) { + free(fields[i].name->data); + } + free(fields[i].name); + } + // Note: index_params is now an embedded value, not a pointer + // It will be freed automatically when the struct is freed + } + free(fields); +} + +void zvec_test_free_strings(char **strings, size_t count) { + if (!strings) return; + + for (size_t i = 0; i < count; i++) { + if (strings[i]) { + free(strings[i]); + } + } + + free(strings); +} + +// ============================================================================= +// File System Helper Functions Implementation +// ============================================================================= + +/** + * @brief Delete directory and all its contents (wrapper function) + * + * @param dir_path Directory path + * @return int 0 for success, -1 for failure + */ +int zvec_test_delete_dir(const char *dir_path) { + if (!dir_path) { + return -1; + } + +#ifdef _WIN32 + // Windows platform implementation + char cmd[1024]; + snprintf(cmd, sizeof(cmd), "rd /s /q \"%s\" >nul 2>&1", dir_path); + int result = system(cmd); + return (result == 0) ? 0 : -1; +#else + // Unix/Linux/macOS platform implementation + char cmd[1024]; + snprintf(cmd, sizeof(cmd), "rm -rf \"%s\" 2>/dev/null", dir_path); + int result = system(cmd); + return (result == 0) ? 0 : -1; +#endif +} diff --git a/tests/c_api/utils.h b/tests/c_api/utils.h new file mode 100644 index 00000000..0e9b42b7 --- /dev/null +++ b/tests/c_api/utils.h @@ -0,0 +1,255 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ZVEC_TESTS_C_API_UTILS_H +#define ZVEC_TESTS_C_API_UTILS_H + +#include +#include +#include +#include "zvec/c_api.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// ============================================================================= +// Schema Creation Helper Functions +// ============================================================================= + +/** + * @brief Create temporary test schema + * Contains basic scalar fields and vector fields + * + * @return ZVecCollectionSchema* Created schema pointer, needs to be released by + * calling zvec_collection_schema_cleanup + */ +ZVecCollectionSchema *zvec_test_create_temp_schema(void); + +/** + * @brief Create pure scalar schema + * Contains only scalar fields (int32, string) + * + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_scalar_schema(void); + +/** + * @brief Create full-featured schema + * Contains all supported data type fields + * + * @param nullable Whether to allow null values + * @param name Schema name + * @param scalar_index_params Scalar index parameters (can be NULL) + * @param vector_index_params Vector index parameters (can be NULL) + * @param max_doc_count Maximum documents per segment + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_normal_schema( + bool nullable, const char *name, const ZVecIndexParams *scalar_index_params, + const ZVecIndexParams *vector_index_params, uint64_t max_doc_count); + +/** + * @brief Create schema with scalar index + * + * @param nullable Whether to allow null values + * @param enable_optimize Whether to enable optimization + * @param name Schema name + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_schema_with_scalar_index( + bool nullable, bool enable_optimize, const char *name); + +/** + * @brief Create schema with vector index + * + * @param nullable Whether to allow null values + * @param name Schema name + * @param vector_index_params Vector index parameters (can be NULL, uses default + * HNSW parameters) + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_schema_with_vector_index( + bool nullable, const char *name, + const ZVecIndexParams *vector_index_params); + +/** + * @brief Create schema with specified maximum document count + * + * @param doc_count Maximum documents per segment + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_schema_with_max_doc_count( + uint64_t doc_count); + +// ============================================================================= +// Document Creation Helper Functions +// ============================================================================= + +/** + * @brief Generate primary key based on document ID + * + * @param doc_id Document ID + * @return char* Generated primary key string, needs to be released by calling + * free() + */ +char *zvec_test_make_pk(uint64_t doc_id); + +/** + * @brief Create complete document + * Create corresponding test data for each field according to schema + * + * @param doc_id Document ID + * @param schema Schema pointer + * @param pk Primary key (can be NULL, auto-generated) + * @return ZVecDoc* Created document pointer, needs to be released by calling + * zvec_doc_destroy + */ +ZVecDoc *zvec_test_create_doc(uint64_t doc_id, + const ZVecCollectionSchema *schema, + const char *pk); + +/** + * @brief Create partial null document + * Only set values for vector fields, keep scalar fields as null + * + * @param doc_id Document ID + * @param schema Schema pointer + * @param pk Primary key (can be NULL, auto-generated) + * @return ZVecDoc* Created document pointer + */ +ZVecDoc *zvec_test_create_doc_null(uint64_t doc_id, + const ZVecCollectionSchema *schema, + const char *pk); + +/** + * @brief Create document with specified fields + * Only create data for specified fields + * + * @param doc_id Document ID + * @param field_names Field name array + * @param field_types Field type array + * @param field_count Number of fields + * @param pk Primary key (can be NULL, auto-generated) + * @return ZVecDoc* Created document pointer + */ +ZVecDoc *zvec_test_create_doc_with_fields(uint64_t doc_id, + const char **field_names, + const ZVecDataType *field_types, + size_t field_count, const char *pk); + +// ============================================================================= +// Index Parameter Creation Helper Functions +// ============================================================================= + +/** + * @brief Create default HNSW index parameters + * + * @return ZVecIndexParams* Created parameter pointer + */ +ZVecIndexParams *zvec_test_create_default_hnsw_params(void); + +/** + * @brief Create default Flat index parameters + * + * @return ZVecIndexParams* Created parameter pointer + */ +ZVecIndexParams *zvec_test_create_default_flat_params(void); + +/** + * @brief Create default scalar index parameters + * + * @param enable_optimize Whether to enable optimization + * @return ZVecIndexParams* Created parameter pointer + */ +ZVecIndexParams *zvec_test_create_default_invert_params(bool enable_optimize); + +// ============================================================================= +// Field Schema Creation Helper Functions +// ============================================================================= + +/** + * @brief Create scalar field schema + * + * @param name Field name + * @param data_type Data type + * @param nullable Whether to allow null values + * @param invert_params Scalar index parameters (can be NULL) + * @return ZVecFieldSchema* Created field schema pointer, needs to be released + * by calling free() + */ +ZVecFieldSchema *zvec_test_create_scalar_field( + const char *name, ZVecDataType data_type, bool nullable, + const ZVecIndexParams *invert_params); + +/** + * @brief Create vector field schema + * + * @param name Field name + * @param data_type Data type + * @param dimension Vector dimension + * @param nullable Whether to allow null values + * @param vector_index_params Vector index parameters (can be NULL) + * @return ZVecFieldSchema* Created field schema pointer + */ +ZVecFieldSchema *zvec_test_create_vector_field( + const char *name, ZVecDataType data_type, uint32_t dimension, bool nullable, + const ZVecIndexParams *vector_index_params); + +/** + * @brief Create sparse vector field schema + * + * @param name Field name + * @param data_type Data type + * @param nullable Whether to allow null values + * @param vector_index_params Vector index parameters (can be NULL) + * @return ZVecFieldSchema* Created field schema pointer + */ +ZVecFieldSchema *zvec_test_create_sparse_vector_field( + const char *name, ZVecDataType data_type, bool nullable, + const ZVecIndexParams *vector_index_params); + +// ============================================================================= +// Memory Management Helper Functions +// ============================================================================= + +/** + * @brief Free field schema array + * + * @param fields Field array pointer + * @param count Number of fields + */ +void zvec_test_free_field_schemas(ZVecFieldSchema *fields, size_t count); + +/** + * @brief Free string array + * + * @param strings String array pointer + * @param count Number of strings + */ +void zvec_test_free_strings(char **strings, size_t count); + +/** + * @brief Delete directory and all its contents + * + * @param dir_path Directory path + * @return int 0 for success, -1 for failure + */ +int zvec_test_delete_dir(const char *dir_path); + +#ifdef __cplusplus +} +#endif + +#endif // ZVEC_TESTS_C_API_UTILS_H \ No newline at end of file diff --git a/tests/core/algorithm/ivf/ivf_searcher_test.cc b/tests/core/algorithm/ivf/ivf_searcher_test.cc index 9911e0e2..75d5df1c 100644 --- a/tests/core/algorithm/ivf/ivf_searcher_test.cc +++ b/tests/core/algorithm/ivf/ivf_searcher_test.cc @@ -392,7 +392,7 @@ TEST_F(IVFSearcherTest, TestSimpleCosine) { { size_t topk = 33; context->set_topk(topk); - + std::string new_vec; IndexQueryMeta new_meta; ASSERT_EQ(0, reformer->convert(query.data(), qmeta, &new_vec, &new_meta));