From 1ae8da2898b07b29b251f6157ca00f43ac8b30a4 Mon Sep 17 00:00:00 2001 From: Dsssyc Date: Wed, 4 Mar 2026 06:59:13 +0000 Subject: [PATCH] feat: Improve file reading logic for large database files and update version to 0.1.12 --- README.md | 1 + fastcarto/fastdb/src/FastVectorDb.cpp | 30 +++++++++++++++++++++++++-- pyproject.toml | 2 +- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 5a2e530..a4e792a 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ A C++ local database library with cross language bindings. Aiming to be a fast, lightweight, and easy-to-use data communication solution for RPC and coupled modeling in scientific computing. ## What's new +- **2026-03-04 (Release 0.1.12)**: Fixed a critical issue where loading large database files (> 2GB) on Linux/Unix systems would fail to read the complete file, leading to missing tables or data corruption. The file reading logic has been improved to correctly handle partial reads for large files. (PR #23) - **2026-03-04 (Memory Overflow Improvement)**: Enhanced the `MemoryStream` implementation to handle large data sizes exceeding 4GB without causing size overflow in `chunk_data_t.size` (u32). This improvement allows for more robust handling of large datasets in memory. (PR #22) - **2026-02-28 (Release Improvement)**: Fix bugs related to build process in Windows. (PR #20) - **2025-12-31(Bug Fix)**: Fixed an issue where shared memory segments were not being properly unregistered from the resource tracker upon closing, which could lead to resource leaks. (PR #17) diff --git a/fastcarto/fastdb/src/FastVectorDb.cpp b/fastcarto/fastdb/src/FastVectorDb.cpp index faf3efb..fc890c6 100644 --- a/fastcarto/fastdb/src/FastVectorDb.cpp +++ b/fastcarto/fastdb/src/FastVectorDb.cpp @@ -146,7 +146,20 @@ printf("loading [%s] ...",filename); } size_t size = fileStat.st_size; void* pdata = malloc(sizeof(u8)*size+64); - _read(fd, pdata, size); + + size_t total_read = 0; + while(total_read < size) { + unsigned int chunk = (size - total_read > 0x40000000) ? 0x40000000 : (unsigned int)(size - total_read); + int r = _read(fd, (char*)pdata + total_read, chunk); + if (r == -1) { + printf("Error reading file: %s\n", strerror(errno)); + _close(fd); + free(pdata); + return NULL; + } + if (r == 0) break; + total_read += r; + } _close(fd); #else int fd = open(filename, O_RDONLY); @@ -165,7 +178,20 @@ printf("loading [%s] ...",filename); } size_t size = fileStat.st_size; void* pdata = malloc(sizeof(u8)*size+64); - read(fd,pdata,size); + + size_t total_read = 0; + while(total_read < size) { + ssize_t r = read(fd, (char*)pdata + total_read, size - total_read); + if (r == -1) { + if (errno == EINTR) continue; + printf("Error reading file: %s\n", strerror(errno)); + close(fd); + free(pdata); + return NULL; + } + if (r == 0) break; // EOF + total_read += r; + } close(fd); #endif auto db = load(pdata,size,free_data_buffer,0); diff --git a/pyproject.toml b/pyproject.toml index fddba41..66eab47 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "fastdb4py" -version = "0.1.11" +version = "0.1.12" description = "FastCarto database bindings" readme = "README.md" requires-python = ">=3.10"