From dd820e1dc65ccd1f2014402d2808abd48fd83e02 Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 21:28:16 +0200 Subject: [PATCH 01/36] adds DB documentation and an initial migrator --- migrator/Makefile | 94 ++++++ migrator/README.md | 152 +++++++++ migrator/db-formats.md | 135 ++++++++ migrator/migrator.c | 742 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 1123 insertions(+) create mode 100644 migrator/Makefile create mode 100644 migrator/README.md create mode 100644 migrator/db-formats.md create mode 100644 migrator/migrator.c diff --git a/migrator/Makefile b/migrator/Makefile new file mode 100644 index 0000000..b9bbdef --- /dev/null +++ b/migrator/Makefile @@ -0,0 +1,94 @@ +CC = gcc +CFLAGS = -O2 -Wall -Wextra -std=c11 +LDFLAGS = + +# --------------------------------------------------------------------------- +# Auto-detect available backends via pkg-config (or direct lib checks). +# Each enabled backend appends -DHAVE_ plus its compile/link flags. +# --------------------------------------------------------------------------- + +# --- Tokyo Cabinet ---------------------------------------------------------- +TC_LIBS := $(shell pkg-config --libs tokyocabinet 2>/dev/null) +TC_CFLAGS := $(shell pkg-config --cflags tokyocabinet 2>/dev/null) +ifneq ($(TC_LIBS),) + CFLAGS += -DHAVE_TOKYOCABINET $(TC_CFLAGS) + LDFLAGS += $(TC_LIBS) + $(info [+] Tokyo Cabinet detected) +else + $(info [-] Tokyo Cabinet not found (install: libtokyocabinet-dev)) +endif + +# --- Kyoto Cabinet ---------------------------------------------------------- +KC_LIBS := $(shell pkg-config --libs kyotocabinet 2>/dev/null) +KC_CFLAGS := $(shell pkg-config --cflags kyotocabinet 2>/dev/null) +ifneq ($(KC_LIBS),) + CFLAGS += -DHAVE_KYOTOCABINET $(KC_CFLAGS) + LDFLAGS += $(KC_LIBS) + $(info [+] Kyoto Cabinet detected) +else + $(info [-] Kyoto Cabinet not found (install: libkyotocabinet-dev)) +endif + +# --- LevelDB ---------------------------------------------------------------- +LDB_LIBS := $(shell pkg-config --libs leveldb 2>/dev/null) +LDB_CFLAGS := $(shell pkg-config --cflags leveldb 2>/dev/null) +ifneq ($(LDB_LIBS),) + CFLAGS += -DHAVE_LEVELDB $(LDB_CFLAGS) + LDFLAGS += $(LDB_LIBS) + $(info [+] LevelDB detected) +else + # Fallback: try direct link (common on Debian/Ubuntu without .pc file) + LDB_DIRECT := $(shell echo 'int main(){}' | $(CC) -x c - -lleveldb -o /dev/null 2>/dev/null && echo yes) + ifeq ($(LDB_DIRECT),yes) + CFLAGS += -DHAVE_LEVELDB + LDFLAGS += -lleveldb + $(info [+] LevelDB detected (direct link)) + else + $(info [-] LevelDB not found (install: libleveldb-dev)) + endif +endif + +# --- SQLite3 ---------------------------------------------------------------- +SQ_LIBS := $(shell pkg-config --libs sqlite3 2>/dev/null) +SQ_CFLAGS := $(shell pkg-config --cflags sqlite3 2>/dev/null) +ifneq ($(SQ_LIBS),) + CFLAGS += -DHAVE_SQLITE3 $(SQ_CFLAGS) + LDFLAGS += $(SQ_LIBS) + $(info [+] SQLite3 detected) +else + $(info [-] SQLite3 not found (install: libsqlite3-dev)) +endif + +# --- LMDB ------------------------------------------------------------------- +MDB_DIRECT := $(shell echo 'int main(){}' | $(CC) -x c - -llmdb -o /dev/null 2>/dev/null && echo yes) +ifeq ($(MDB_DIRECT),yes) + CFLAGS += -DHAVE_LMDB + LDFLAGS += -llmdb + $(info [+] LMDB detected) +else + $(info [-] LMDB not found (install: liblmdb-dev)) +endif + +# --- Tkrzw ------------------------------------------------------------------ +TKRZW_DIRECT := $(shell echo 'int main(){}' | $(CC) -x c - -ltkrzw -o /dev/null 2>/dev/null && echo yes) +ifeq ($(TKRZW_DIRECT),yes) + CFLAGS += -DHAVE_TKRZW + LDFLAGS += -ltkrzw + $(info [+] Tkrzw detected) +else + $(info [-] Tkrzw not found (install: libtkrzw-dev)) +endif + +# --------------------------------------------------------------------------- + +TARGET = migrator + +.PHONY: all clean + +all: $(TARGET) + +$(TARGET): migrator.c + $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) + +clean: + rm -f $(TARGET) diff --git a/migrator/README.md b/migrator/README.md new file mode 100644 index 0000000..b9e47bd --- /dev/null +++ b/migrator/README.md @@ -0,0 +1,152 @@ +# duc Database Migrator + +A standalone command-line tool that converts a duc index database from any +supported backend format to any other, without losing data. + +For a detailed description of each backend's on-disk format, internal +structure, and quirks see **[db-formats.md](db-formats.md)**. + +--- + +## Overview + +duc stores its index as a simple key-value database. The backend is chosen +at compile time; all backends share the same logical schema but differ in +file format, compression, and performance characteristics. + +`migrator` links every available backend into a single binary and performs a +raw KV copy between them — all duc-internal keys (`duc_db_version`, +`duc_index_reports`, path records, …) are transferred verbatim. + +Typical use cases: + +- Upgrading from the 1.4.6 default (`tokyocabinet`) to the 1.5.0 default (`tkrzw`) +- Converting a `leveldb` directory-based database to a single-file format +- Switching to `sqlite3` for inspection with standard SQL tooling + +--- + +## Supported Backends + +| Backend | Format | Compression | Default in | +|-----------------|---------|-------------------|-------------| +| `tokyocabinet` | File | Optional (deflate)| 1.4.6 | +| `kyotocabinet` | File | Always (kct) | — | +| `leveldb` | **Dir** | Always (Snappy) | — | +| `sqlite3` | File | None | — | +| `lmdb` | File | None | — | +| `tkrzw` | File | Optional (ZSTD) | 1.5.0-rc2 | + +All backends listed above are compiled into one binary if the corresponding +library is present at build time. The Makefile reports which ones were +detected. + +> **Note on LevelDB:** the `path` for a LevelDB database is a **directory**, +> not a file. Pass the directory path to `--from` or `--to` accordingly. + +--- + +## Building + +Dependencies are auto-detected via `pkg-config` (and direct linker probes for +LMDB and Tkrzw, which often lack `.pc` files). + +```sh +cd migrator +make +``` + +Example output showing which backends were found: + +``` +[+] Tokyo Cabinet detected +[-] Kyoto Cabinet not found (install: libkyotocabinet-dev) +[+] LevelDB detected (direct link) +[+] SQLite3 detected +[+] LMDB detected +[+] Tkrzw detected +``` + +At least two backends must be compiled in to perform a migration. + +### Manual flags + +If auto-detection fails you can pass flags directly: + +```sh +make CFLAGS="-DHAVE_TOKYOCABINET -DHAVE_TKRZW" \ + LDFLAGS="-ltokyocabinet -ltkrzw" +``` + +--- + +## Usage + +``` +./migrator --from : --to : +``` + +`format` is one of the backend names in the table above; `path` is the +filesystem path to the database file (or directory for LevelDB). + +### Examples + +**Tokyo Cabinet → Tkrzw** (the common 1.4.6 → 1.5.0 upgrade path): + +```sh +./migrator \ + --from tokyocabinet:~/.cache/duc/duc.db \ + --to tkrzw:~/.cache/duc/duc.tkrzw.db +``` + +**Tokyo Cabinet → SQLite3** (for ad-hoc SQL inspection): + +```sh +./migrator \ + --from tokyocabinet:/var/cache/duc/duc.db \ + --to sqlite3:/tmp/duc-inspect.sqlite +# Then: sqlite3 /tmp/duc-inspect.sqlite "select key from blobs" +``` + +**LevelDB directory → LMDB single file:** + +```sh +./migrator \ + --from leveldb:/var/cache/duc/duc-leveldb/ \ + --to lmdb:/var/cache/duc/duc.lmdb +``` + +--- + +## How It Works + +1. The source database is opened **read-only**. +2. A full cursor scan iterates every key-value record in storage order. +3. Each record is written verbatim to the destination database. +4. Both databases are flushed and closed cleanly on completion. + +Progress is printed every 10 000 records; the final line reports the total +count and any write errors. + +Because the copy is raw (below the duc abstraction layer), the destination +database is immediately usable by duc without re-indexing. + +--- + +## Caveats + +- **`duc_db_version`** is copied as-is. Backends that do not normally store + this key (LevelDB, SQLite3, LMDB) will have it present after migration, + which is harmless. Backends that validate it on open (Tokyo Cabinet, Kyoto + Cabinet, Tkrzw) will accept it as long as the version string matches the + compiled duc version. + +- **LevelDB** stores its data in a directory; make sure the destination + directory either does not exist or is empty before migrating into it. + +- **LMDB** pre-allocates a large virtual address range (1 GB on 32-bit, 256 GB + on 64-bit). Actual disk usage is much smaller; the reservation is virtual + memory only. + +- The migrator does **not** validate the integrity of the source database + before copying. Run `duc info` on the source first if in doubt. diff --git a/migrator/db-formats.md b/migrator/db-formats.md new file mode 100644 index 0000000..a620aba --- /dev/null +++ b/migrator/db-formats.md @@ -0,0 +1,135 @@ +# duc DB Backend Formats + +Reference for all database backends supported across duc versions, derived from +the source implementations in `src/libduc/db-*.c` and `configure.ac`. + +--- + +## Tokyo Cabinet (`tokyocabinet`) + +- **Introduced:** ≤ 1.4.6 +- **Default in:** 1.4.6 +- **Storage layout:** Single file on disk +- **Magic header (first bytes):** `ToKyO CaBiNeT` +- **Internal type:** `TCBDB` — B+ Tree Database +- **Compression:** Optional deflate (`BDBTDEFLATE`), enabled via `--compress` flag +- **Tuning:** `tcbdbtune(hdb, 256, 512, 131072, 9, 11, BDBTLARGE [| BDBTDEFLATE])` +- **Version check:** Stores and validates `duc_db_version` key on open +- **Notes:** + - The `BDBTLARGE` flag is always set, allowing the file to exceed 2 GB. + - `DUC_OPEN_FORCE` triggers `BDBOTRUNC`, which truncates and recreates the file. + +--- + +## Kyoto Cabinet (`kyotocabinet`) + +- **Introduced:** ≤ 1.4.6 +- **Default in:** — +- **Storage layout:** Single file on disk +- **Magic header (first bytes):** `Kyoto CaBiNeT` +- **Internal type:** KCT (Tree Cabinet), opened with `#type=kct#opts=c` +- **Compression:** Enabled unconditionally via `opts=c` in the open string +- **Version check:** Stores and validates `duc_db_version` key on open +- **Notes:** + - Error mapping is incomplete; all backend errors map to `DUC_E_UNKNOWN`. + - The `DUC_OPEN_COMPRESS` flag is accepted but has no additional effect since + compression is always on via the open string. + +--- + +## LevelDB (`leveldb`) + +- **Introduced:** ≤ 1.4.6 +- **Default in:** — +- **Storage layout:** **Directory** (not a single file); LevelDB stores multiple + SSTable (`.ldb`/`.sst`) and manifest files inside a directory. +- **Magic header:** N/A — detected as a directory by `duc_db_type_check()` +- **Compression:** Snappy compression is always enabled + (`leveldb_snappy_compression`); the `DUC_OPEN_COMPRESS` flag has no effect. +- **Version check:** None — does not store or check `duc_db_version` +- **Notes:** + - Because the path is a directory, it behaves differently from all other + backends when specifying `--database`. + - `leveldb_options_set_create_if_missing` is always set; the DB is created + automatically if it does not exist. + +--- + +## SQLite3 (`sqlite3`) + +- **Introduced:** ≤ 1.4.6 +- **Default in:** — +- **Storage layout:** Single file on disk +- **Magic header (first bytes):** `SQLite format 3` +- **Internal schema:** Single table `blobs(key UNIQUE PRIMARY KEY, value)` with + an additional index `keys` on the `key` column. +- **Compression:** None — no compression support +- **Version check:** None — does not store or check `duc_db_version` +- **Notes:** + - All writes are batched inside a single `BEGIN`/`COMMIT` transaction that + spans the lifetime of the open database (committed on `db_close`). + - On open, a deliberate bogus query (`select bogus from bogus`) is run to + detect corrupt files that `sqlite3_open()` would otherwise accept silently. + - `insert or replace` semantics are used, so re-indexing a path overwrites the + previous entry cleanly. + +--- + +## LMDB (`lmdb`) + +- **Introduced:** ≤ 1.4.6 +- **Default in:** — +- **Storage layout:** Single file on disk (opened with `MDB_NOSUBDIR`) +- **Magic header:** Standard LMDB file header (not checked by duc's type + detector; falls through to `unknown`) +- **Compression:** None — no compression support +- **Version check:** None — does not store or check `duc_db_version` +- **Memory map size:** + - 32-bit platforms: 1 GB (`1024 * 1024 * 1024`) + - 64-bit platforms: 256 GB (`1024 * 1024 * 1024 * 256`) +- **Notes:** + - Uses a single write transaction (`MDB_txn`) for all puts, committed on + `db_close`. A write error in `db_put` calls `exit(1)` immediately. + - The large pre-allocated map size is a virtual address reservation only; + actual disk usage grows on demand. + +--- + +## Tkrzw (`tkrzw`) + +- **Introduced:** 1.5.0-rc2 +- **Default in:** 1.5.0-rc2 +- **Storage layout:** Single file on disk +- **Magic header:** Tkrzw-specific header (not yet checked by duc's type + detector) +- **Internal type:** `HashDBM` with `StdFile` file driver +- **Base open options:** `dbm=HashDBM,file=StdFile,offset_width=5` +- **Compression:** Optional ZSTD record compression (`record_comp_mode=RECORD_COMP_ZSTD`), + enabled at compile time via `--with-tkrzw-zstd` and at runtime via the + `DUC_OPEN_COMPRESS` flag. Falls back to `NONE` if not compiled in. +- **Version check:** Stores and validates `duc_db_version` key on open +- **Filesystem size hints:** The `num_buckets` tuning parameter is scaled via + new `DUC_FS_*` flags: + | Flag | `num_buckets` | + |-------------------|---------------| + | `DUC_FS_BIG` | 100,000,000 | + | `DUC_FS_BIGGER` | 1,000,000,000 | + | `DUC_FS_BIGGEST` | 10,000,000,000| +- **Notes:** + - `DUC_OPEN_FORCE` appends `,truncate=true` to the options string, recreating + the file. + - Tkrzw is a successor/spiritual replacement for both Tokyo Cabinet and Kyoto + Cabinet, providing a modern hash-based store with better compression options. + +--- + +## Summary Table + +| Backend | File/Dir | Single file | Compression | Version key | Default in | +|----------------|----------|-------------|-------------------|-------------|-------------| +| Tokyo Cabinet | File | Yes | Optional (deflate)| Yes | 1.4.6 | +| Kyoto Cabinet | File | Yes | Always (kct opts) | Yes | — | +| LevelDB | Dir | **No** | Always (Snappy) | No | — | +| SQLite3 | File | Yes | None | No | — | +| LMDB | File | Yes | None | No | — | +| Tkrzw | File | Yes | Optional (ZSTD) | Yes | 1.5.0-rc2 | diff --git a/migrator/migrator.c b/migrator/migrator.c new file mode 100644 index 0000000..db1c6b3 --- /dev/null +++ b/migrator/migrator.c @@ -0,0 +1,742 @@ +/* + * migrator.c - duc database backend converter + * + * Copies every raw key-value record from a duc database stored in one backend + * format into a new database using a different backend. All six backends are + * compiled into a single binary (guarded by HAVE_* macros), so any source / + * destination pairing is possible without multiple build variants. + * + * Usage: + * ./migrator --from : --to : + * + * Supported formats (enabled at compile time via HAVE_* flags): + * tokyocabinet, kyotocabinet, leveldb, sqlite3, lmdb, tkrzw + * + * The migration is a raw KV copy (below the duc abstraction layer), so every + * key is transferred verbatim, including duc_db_version and duc_index_reports. + */ + +#include +#include +#include + + +/* ============================================================ + * Generic backend interface + * ============================================================ */ + +typedef struct { + const char *name; + + /* Open the database at path. readonly=1 for source, 0 for destination. + * Returns an opaque handle on success, NULL on failure. */ + void *(*open)(const char *path, int readonly); + + /* Flush and close. */ + void (*close)(void *handle); + + /* Write one record. Returns 0 on success, -1 on error. */ + int (*put)(void *handle, + const void *key, size_t klen, + const void *val, size_t vlen); + + /* Iteration --------------------------------------------------- + * iter_new() – create an iterator positioned before the first record. + * iter_next() – advance and fill *key/*val with malloc'd buffers; + * caller must free() both. Returns 1, or 0 when done. + * iter_free() – destroy the iterator. + */ + void *(*iter_new)(void *handle); + int (*iter_next)(void *iter, + void **key, size_t *klen, + void **val, size_t *vlen); + void (*iter_free)(void *iter); +} backend_ops_t; + + +/* ============================================================ + * Tokyo Cabinet (TCBDB – B+ tree) + * ============================================================ */ +#ifdef HAVE_TOKYOCABINET +#include +#include + +typedef struct { TCBDB *hdb; BDBCUR *cur; } tc_iter_t; + +static void *tc_open(const char *path, int readonly) +{ + TCBDB *hdb = tcbdbnew(); + tcbdbtune(hdb, 256, 512, 131072, 9, 11, BDBTLARGE); + uint32_t mode = readonly + ? (HDBONOLCK | HDBOREADER) + : (HDBOWRITER | HDBOCREAT); + if (!tcbdbopen(hdb, path, mode)) { + fprintf(stderr, "tokyocabinet: cannot open '%s': %s\n", + path, tcbdberrmsg(tcbdbecode(hdb))); + tcbdbdel(hdb); + return NULL; + } + return hdb; +} + +static void tc_close(void *h) +{ + tcbdbclose((TCBDB *)h); + tcbdbdel((TCBDB *)h); +} + +static int tc_put(void *h, const void *k, size_t kl, const void *v, size_t vl) +{ + return tcbdbput((TCBDB *)h, k, (int)kl, v, (int)vl) ? 0 : -1; +} + +static void *tc_iter_new(void *h) +{ + tc_iter_t *it = malloc(sizeof *it); + it->hdb = (TCBDB *)h; + it->cur = tcbdbcurnew(it->hdb); + tcbdbcurfirst(it->cur); + return it; +} + +static int tc_iter_next(void *iter, + void **key, size_t *klen, + void **val, size_t *vlen) +{ + tc_iter_t *it = iter; + int ks, vs; + /* tcbdbcurkey / tcbdbcurval each return a malloc'd buffer */ + *key = tcbdbcurkey(it->cur, &ks); + if (!*key) return 0; + *klen = (size_t)ks; + *val = tcbdbcurval(it->cur, &vs); + *vlen = (size_t)vs; + tcbdbcurnext(it->cur); + return 1; +} + +static void tc_iter_free(void *iter) +{ + tc_iter_t *it = iter; + tcbdbcurdel(it->cur); + free(it); +} + +static const backend_ops_t tc_ops = { + "tokyocabinet", + tc_open, tc_close, tc_put, + tc_iter_new, tc_iter_next, tc_iter_free +}; +#endif /* HAVE_TOKYOCABINET */ + + +/* ============================================================ + * Kyoto Cabinet (KCT – tree cabinet with compression) + * ============================================================ */ +#ifdef HAVE_KYOTOCABINET +#include + +typedef struct { KCCUR *cur; } kc_iter_t; + +static void *kc_open(const char *path, int readonly) +{ + KCDB *kdb = kcdbnew(); + char fname[4096]; + snprintf(fname, sizeof fname, "%s#type=kct#opts=c", path); + uint32_t mode = readonly ? KCOREADER : (KCOWRITER | KCOCREATE); + if (!kcdbopen(kdb, fname, mode)) { + fprintf(stderr, "kyotocabinet: cannot open '%s'\n", path); + kcdbdel(kdb); + return NULL; + } + return kdb; +} + +static void kc_close(void *h) +{ + kcdbclose((KCDB *)h); + kcdbdel((KCDB *)h); +} + +static int kc_put(void *h, const void *k, size_t kl, const void *v, size_t vl) +{ + return kcdbset((KCDB *)h, k, kl, v, vl) ? 0 : -1; +} + +static void *kc_iter_new(void *h) +{ + kc_iter_t *it = malloc(sizeof *it); + it->cur = kcdbcursor((KCDB *)h); + kccurfirst(it->cur); + return it; +} + +static int kc_iter_next(void *iter, + void **key, size_t *klen, + void **val, size_t *vlen) +{ + kc_iter_t *it = iter; + size_t ks, vs; + const char *vp; + /* + * kccurget: returns malloc'd key; *vbp is a separately malloc'd value + * buffer (both must be freed individually with free/kcfree). + * step=1 advances the cursor after the read. + */ + char *k = kccurget(it->cur, &ks, &vp, &vs, 1); + if (!k) return 0; + *key = k; + *klen = ks; + /* Copy value into a fresh buffer so caller can always call free() on it */ + *val = malloc(vs); + memcpy(*val, vp, vs); + kcfree((void *)vp); + *vlen = vs; + return 1; +} + +static void kc_iter_free(void *iter) +{ + kc_iter_t *it = iter; + kccurdel(it->cur); + free(it); +} + +static const backend_ops_t kc_ops = { + "kyotocabinet", + kc_open, kc_close, kc_put, + kc_iter_new, kc_iter_next, kc_iter_free +}; +#endif /* HAVE_KYOTOCABINET */ + + +/* ============================================================ + * LevelDB (SSTable directory, Snappy compression) + * ============================================================ */ +#ifdef HAVE_LEVELDB +#include + +typedef struct { + leveldb_t *db; + leveldb_options_t *options; + leveldb_readoptions_t *roptions; + leveldb_writeoptions_t *woptions; +} ldb_handle_t; + +typedef struct { + leveldb_iterator_t *it; + leveldb_readoptions_t *roptions; +} ldb_iter_t; + +static void *ldb_open(const char *path, int readonly) +{ + ldb_handle_t *h = malloc(sizeof *h); + char *err = NULL; + h->options = leveldb_options_create(); + h->roptions = leveldb_readoptions_create(); + h->woptions = leveldb_writeoptions_create(); + leveldb_options_set_create_if_missing(h->options, !readonly); + leveldb_options_set_compression(h->options, leveldb_snappy_compression); + h->db = leveldb_open(h->options, path, &err); + if (err) { + fprintf(stderr, "leveldb: cannot open '%s': %s\n", path, err); + leveldb_free(err); + leveldb_options_destroy(h->options); + leveldb_readoptions_destroy(h->roptions); + leveldb_writeoptions_destroy(h->woptions); + free(h); + return NULL; + } + return h; +} + +static void ldb_close(void *handle) +{ + ldb_handle_t *h = handle; + leveldb_close(h->db); + leveldb_options_destroy(h->options); + leveldb_readoptions_destroy(h->roptions); + leveldb_writeoptions_destroy(h->woptions); + free(h); +} + +static int ldb_put(void *handle, + const void *k, size_t kl, + const void *v, size_t vl) +{ + ldb_handle_t *h = handle; + char *err = NULL; + leveldb_put(h->db, h->woptions, k, kl, v, vl, &err); + if (err) { leveldb_free(err); return -1; } + return 0; +} + +static void *ldb_iter_new(void *handle) +{ + ldb_handle_t *h = handle; + ldb_iter_t *it = malloc(sizeof *it); + it->roptions = leveldb_readoptions_create(); + it->it = leveldb_create_iterator(h->db, it->roptions); + leveldb_iter_seek_to_first(it->it); + return it; +} + +static int ldb_iter_next(void *iter, + void **key, size_t *klen, + void **val, size_t *vlen) +{ + ldb_iter_t *it = iter; + if (!leveldb_iter_valid(it->it)) return 0; + /* LevelDB returns pointers into its internal buffer; must copy */ + const char *k = leveldb_iter_key(it->it, klen); + const char *v = leveldb_iter_value(it->it, vlen); + *key = malloc(*klen); memcpy(*key, k, *klen); + *val = malloc(*vlen); memcpy(*val, v, *vlen); + leveldb_iter_next(it->it); + return 1; +} + +static void ldb_iter_free(void *iter) +{ + ldb_iter_t *it = iter; + leveldb_iter_destroy(it->it); + leveldb_readoptions_destroy(it->roptions); + free(it); +} + +static const backend_ops_t ldb_ops = { + "leveldb", + ldb_open, ldb_close, ldb_put, + ldb_iter_new, ldb_iter_next, ldb_iter_free +}; +#endif /* HAVE_LEVELDB */ + + +/* ============================================================ + * SQLite3 (single-file, table: blobs(key, value)) + * ============================================================ */ +#ifdef HAVE_SQLITE3 +#include + +typedef struct { sqlite3 *s; } sq_handle_t; +typedef struct { sqlite3_stmt *stmt; } sq_iter_t; + +static void *sq_open(const char *path, int readonly) +{ + sq_handle_t *h = malloc(sizeof *h); + int flags = readonly + ? SQLITE_OPEN_READONLY + : (SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE); + if (sqlite3_open_v2(path, &h->s, flags, NULL) != SQLITE_OK) { + fprintf(stderr, "sqlite3: cannot open '%s': %s\n", + path, sqlite3_errmsg(h->s)); + sqlite3_close(h->s); + free(h); + return NULL; + } + if (!readonly) { + sqlite3_exec(h->s, + "create table if not exists blobs" + "(key unique primary key, value)", 0, 0, 0); + sqlite3_exec(h->s, + "create index if not exists keys on blobs(key)", 0, 0, 0); + sqlite3_exec(h->s, "begin", 0, 0, 0); + } + return h; +} + +static void sq_close(void *handle) +{ + sq_handle_t *h = handle; + sqlite3_exec(h->s, "commit", 0, 0, 0); + sqlite3_close(h->s); + free(h); +} + +static int sq_put(void *handle, + const void *k, size_t kl, + const void *v, size_t vl) +{ + sq_handle_t *h = handle; + sqlite3_stmt *stmt; + sqlite3_prepare(h->s, + "insert or replace into blobs(key,value) values(?,?)", + -1, &stmt, 0); + sqlite3_bind_blob(stmt, 1, k, (int)kl, SQLITE_STATIC); + sqlite3_bind_blob(stmt, 2, v, (int)vl, SQLITE_STATIC); + sqlite3_step(stmt); + sqlite3_finalize(stmt); + return 0; +} + +static void *sq_iter_new(void *handle) +{ + sq_handle_t *h = handle; + sq_iter_t *it = malloc(sizeof *it); + sqlite3_prepare_v2(h->s, + "select key, value from blobs", + -1, &it->stmt, 0); + return it; +} + +static int sq_iter_next(void *iter, + void **key, size_t *klen, + void **val, size_t *vlen) +{ + sq_iter_t *it = iter; + if (sqlite3_step(it->stmt) != SQLITE_ROW) return 0; + /* Pointers are only valid until the next sqlite3_step(); copy them */ + *klen = (size_t)sqlite3_column_bytes(it->stmt, 0); + *vlen = (size_t)sqlite3_column_bytes(it->stmt, 1); + *key = malloc(*klen); + memcpy(*key, sqlite3_column_blob(it->stmt, 0), *klen); + *val = malloc(*vlen); + memcpy(*val, sqlite3_column_blob(it->stmt, 1), *vlen); + return 1; +} + +static void sq_iter_free(void *iter) +{ + sq_iter_t *it = iter; + sqlite3_finalize(it->stmt); + free(it); +} + +static const backend_ops_t sq_ops = { + "sqlite3", + sq_open, sq_close, sq_put, + sq_iter_new, sq_iter_next, sq_iter_free +}; +#endif /* HAVE_SQLITE3 */ + + +/* ============================================================ + * LMDB (memory-mapped single file, MDB_NOSUBDIR) + * ============================================================ */ +#ifdef HAVE_LMDB +#include + +typedef struct { MDB_env *env; MDB_dbi dbi; MDB_txn *txn; } mdb_handle_t; +typedef struct { MDB_cursor *cur; int started; } mdb_iter_t; + +static void *mdb_open(const char *path, int readonly) +{ + mdb_handle_t *h = malloc(sizeof *h); + unsigned int env_flags = MDB_NOSUBDIR; + unsigned int open_flags = 0; + unsigned int txn_flags = 0; + + /* Virtual map: 1 GB on 32-bit, 256 GB on 64-bit */ + size_t map_size = 1024u * 1024u * 1024u; + if (sizeof(size_t) == 8) map_size *= 256u; + + if (readonly) { + env_flags |= MDB_RDONLY; + txn_flags |= MDB_RDONLY; + } else { + open_flags |= MDB_CREATE; + } + + int rc; + if ((rc = mdb_env_create(&h->env)) != MDB_SUCCESS) goto err; + if ((rc = mdb_env_set_mapsize(h->env, map_size)) != MDB_SUCCESS) goto err; + if ((rc = mdb_env_open(h->env, path, env_flags, 0664)) != MDB_SUCCESS) goto err; + if ((rc = mdb_txn_begin(h->env, NULL, txn_flags, &h->txn)) != MDB_SUCCESS) goto err; + if ((rc = mdb_open(h->txn, NULL, open_flags, &h->dbi)) != MDB_SUCCESS) goto err; + return h; +err: + fprintf(stderr, "lmdb: cannot open '%s': %s\n", path, mdb_strerror(rc)); + mdb_env_close(h->env); + free(h); + return NULL; +} + +static void mdb_be_close(void *handle) +{ + mdb_handle_t *h = handle; + mdb_txn_commit(h->txn); + mdb_dbi_close(h->env, h->dbi); + mdb_env_close(h->env); + free(h); +} + +static int mdb_be_put(void *handle, + const void *k, size_t kl, + const void *v, size_t vl) +{ + mdb_handle_t *h = handle; + MDB_val mk = { kl, (void *)k }; + MDB_val mv = { vl, (void *)v }; + return mdb_put(h->txn, h->dbi, &mk, &mv, 0) == MDB_SUCCESS ? 0 : -1; +} + +static void *mdb_iter_new(void *handle) +{ + mdb_handle_t *h = handle; + mdb_iter_t *it = malloc(sizeof *it); + mdb_cursor_open(h->txn, h->dbi, &it->cur); + it->started = 0; + return it; +} + +static int mdb_iter_next(void *iter, + void **key, size_t *klen, + void **val, size_t *vlen) +{ + mdb_iter_t *it = iter; + MDB_val mk, mv; + MDB_cursor_op op = it->started ? MDB_NEXT : MDB_FIRST; + it->started = 1; + if (mdb_cursor_get(it->cur, &mk, &mv, op) != MDB_SUCCESS) return 0; + /* LMDB data lives in the memory-mapped file; copy before txn ends */ + *klen = mk.mv_size; *key = malloc(mk.mv_size); memcpy(*key, mk.mv_data, mk.mv_size); + *vlen = mv.mv_size; *val = malloc(mv.mv_size); memcpy(*val, mv.mv_data, mv.mv_size); + return 1; +} + +static void mdb_iter_free(void *iter) +{ + mdb_iter_t *it = iter; + mdb_cursor_close(it->cur); + free(it); +} + +static const backend_ops_t mdb_ops = { + "lmdb", + mdb_open, mdb_be_close, mdb_be_put, + mdb_iter_new, mdb_iter_next, mdb_iter_free +}; +#endif /* HAVE_LMDB */ + + +/* ============================================================ + * Tkrzw (HashDBM, StdFile; new default in 1.5.0-rc2) + * ============================================================ */ +#ifdef HAVE_TKRZW +#include + +static void *tkrzw_be_open(const char *path, int readonly) +{ + TkrzwDBM *hdb = tkrzw_dbm_open( + path, !readonly, + "dbm=HashDBM,file=StdFile,offset_width=5"); + if (!hdb) { + TkrzwStatus s = tkrzw_get_last_status(); + fprintf(stderr, "tkrzw: cannot open '%s': %s\n", path, s.message); + return NULL; + } + return hdb; +} + +static void tkrzw_be_close(void *h) +{ + tkrzw_dbm_close((TkrzwDBM *)h); +} + +static int tkrzw_be_put(void *h, + const void *k, size_t kl, + const void *v, size_t vl) +{ + return tkrzw_dbm_set((TkrzwDBM *)h, + k, (int32_t)kl, + v, (int32_t)vl, + 1 /* overwrite */) ? 0 : -1; +} + +typedef struct { TkrzwDBMIter *it; } tkrzw_iter_t; + +static void *tkrzw_iter_new(void *h) +{ + tkrzw_iter_t *it = malloc(sizeof *it); + it->it = tkrzw_dbm_make_iterator((TkrzwDBM *)h); + tkrzw_dbm_iter_first(it->it); + return it; +} + +static int tkrzw_iter_next(void *iter, + void **key, size_t *klen, + void **val, size_t *vlen) +{ + tkrzw_iter_t *it = iter; + int32_t ks, vs; + char *k, *v; + /* tkrzw_dbm_iter_get returns malloc'd key and value; step separately */ + if (!tkrzw_dbm_iter_get(it->it, &k, &ks, &v, &vs)) return 0; + *key = k; *klen = (size_t)ks; + *val = v; *vlen = (size_t)vs; + tkrzw_dbm_iter_next(it->it); + return 1; +} + +static void tkrzw_iter_free(void *iter) +{ + tkrzw_iter_t *it = iter; + tkrzw_dbm_iter_free(it->it); + free(it); +} + +static const backend_ops_t tkrzw_ops = { + "tkrzw", + tkrzw_be_open, tkrzw_be_close, tkrzw_be_put, + tkrzw_iter_new, tkrzw_iter_next, tkrzw_iter_free +}; +#endif /* HAVE_TKRZW */ + + +/* ============================================================ + * Backend registry + * ============================================================ */ + +static const backend_ops_t * const backends[] = { +#ifdef HAVE_TOKYOCABINET + &tc_ops, +#endif +#ifdef HAVE_KYOTOCABINET + &kc_ops, +#endif +#ifdef HAVE_LEVELDB + &ldb_ops, +#endif +#ifdef HAVE_SQLITE3 + &sq_ops, +#endif +#ifdef HAVE_LMDB + &mdb_ops, +#endif +#ifdef HAVE_TKRZW + &tkrzw_ops, +#endif + NULL +}; + +static const backend_ops_t *find_backend(const char *name) +{ + for (int i = 0; backends[i]; i++) + if (strcmp(backends[i]->name, name) == 0) + return backends[i]; + return NULL; +} + +static void list_backends(FILE *out) +{ + fprintf(out, "Compiled-in backends:"); + for (int i = 0; backends[i]; i++) + fprintf(out, " %s", backends[i]->name); + fprintf(out, "\n"); +} + + +/* ============================================================ + * Argument parsing helpers + * ============================================================ */ + +static void usage(const char *argv0) +{ + fprintf(stderr, + "Usage: %s --from : --to :\n\n" + "Copies every key-value record from a duc database in one backend\n" + "format to a new database using a different backend. The migration\n" + "is a raw KV copy (below the duc abstraction layer), so all internal\n" + "duc keys (duc_db_version, duc_index_reports, …) are transferred too.\n\n", + argv0); + list_backends(stderr); +} + +/* Split "format:path" on the first colon. + * Writes the format name into fmt (NUL-terminated) and sets *path. + * Returns 0 on success, -1 if no colon is present. */ +static int split_spec(const char *arg, + char *fmt, size_t fmtsz, + const char **path) +{ + const char *colon = strchr(arg, ':'); + if (!colon) return -1; + size_t flen = (size_t)(colon - arg); + if (flen >= fmtsz) flen = fmtsz - 1; + memcpy(fmt, arg, flen); + fmt[flen] = '\0'; + *path = colon + 1; + return 0; +} + + +/* ============================================================ + * main + * ============================================================ */ + +int main(int argc, char **argv) +{ + const char *from_arg = NULL; + const char *to_arg = NULL; + + for (int i = 1; i < argc; i++) { + if (!strcmp(argv[i], "--from") && i + 1 < argc) from_arg = argv[++i]; + else if (!strcmp(argv[i], "--to") && i + 1 < argc) to_arg = argv[++i]; + else { usage(argv[0]); return 1; } + } + + if (!from_arg || !to_arg) { usage(argv[0]); return 1; } + + char from_fmt[64], to_fmt[64]; + const char *from_path, *to_path; + + if (split_spec(from_arg, from_fmt, sizeof from_fmt, &from_path) < 0) { + fprintf(stderr, "error: --from argument must be :\n"); + return 1; + } + if (split_spec(to_arg, to_fmt, sizeof to_fmt, &to_path) < 0) { + fprintf(stderr, "error: --to argument must be :\n"); + return 1; + } + + const backend_ops_t *src_ops = find_backend(from_fmt); + const backend_ops_t *dst_ops = find_backend(to_fmt); + + if (!src_ops) { + fprintf(stderr, "error: unknown source backend '%s'\n", from_fmt); + list_backends(stderr); + return 1; + } + if (!dst_ops) { + fprintf(stderr, "error: unknown destination backend '%s'\n", to_fmt); + list_backends(stderr); + return 1; + } + + fprintf(stderr, "Migrating: %s:%s -> %s:%s\n", + from_fmt, from_path, to_fmt, to_path); + + void *src = src_ops->open(from_path, 1 /* read-only */); + if (!src) return 1; + + void *dst = dst_ops->open(to_path, 0 /* read-write */); + if (!dst) { src_ops->close(src); return 1; } + + void *iter = src_ops->iter_new(src); + void *key, *val; + size_t klen, vlen; + unsigned long count = 0, errors = 0; + + while (src_ops->iter_next(iter, &key, &klen, &val, &vlen)) { + if (dst_ops->put(dst, key, klen, val, vlen) != 0) { + fprintf(stderr, "warning: failed to write record %lu\n", count); + errors++; + } + free(key); + free(val); + count++; + if (count % 10000 == 0) + fprintf(stderr, "\r %lu records copied...", count); + } + + src_ops->iter_free(iter); + src_ops->close(src); + dst_ops->close(dst); + + fprintf(stderr, "\nDone: %lu records copied", count); + if (errors) + fprintf(stderr, " (%lu write errors)", errors); + fprintf(stderr, ".\n"); + + return errors ? 1 : 0; +} From da87055db2b4e0e780003ef5d5f6af356503585f Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 21:35:32 +0200 Subject: [PATCH 02/36] add tool to build DUC for any of the available backends. Example script output: bash build-all-backends.sh 2>&1 ==> Building duc-tkrzw ... -> /home/me/duc/testing/duc-tkrzw OK ==> Building duc-tokyocabinet ... -> /home/me/duc/testing/duc-tokyocabinet OK ==> Building duc-sqlite3 ... -> /home/me/duc/testing/duc-sqlite3 OK ==> Building duc-lmdb ... -> /home/me/duc/testing/duc-lmdb OK ==> Building duc-leveldb ... -> /home/me/duc/testing/duc-leveldb OK ==> Building duc-kyotocabinet ... -> /home/me/duc/testing/duc-kyotocabinet OK === Build summary === OK tkrzw (duc version: 1.5.0-rc2) OK tokyocabinet (duc version: 1.5.0-rc2) OK sqlite3 (duc version: 1.5.0-rc2) OK lmdb (duc version: 1.5.0-rc2) OK leveldb (duc version: 1.5.0-rc2) OK kyotocabinet (duc version: 1.5.0-rc2) --- testing/build-all-backends.sh | 64 +++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100755 testing/build-all-backends.sh diff --git a/testing/build-all-backends.sh b/testing/build-all-backends.sh new file mode 100755 index 0000000..dd63e38 --- /dev/null +++ b/testing/build-all-backends.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +JOBS="${JOBS:-$(nproc)}" + +# Ensure the script is executed from within the testing/ directory +if [[ "$(pwd)" != "$SCRIPT_DIR" ]]; then + echo "error: must be run from the testing/ directory" >&2 + echo " cd $(basename "$SCRIPT_DIR") && bash $(basename "$0")" >&2 + exit 1 +fi + +BACKENDS=(tkrzw tokyocabinet sqlite3 lmdb leveldb kyotocabinet) + +cd "$ROOT_DIR" + +# Regenerate build system if configure is missing or older than configure.ac +if [[ ! -f configure || configure.ac -nt configure ]]; then + echo "==> Running autoreconf -i ..." + autoreconf -i +fi + +failed=() + +for backend in "${BACKENDS[@]}"; do + echo "" + echo "==> Building duc-$backend ..." + + if ! ./configure --with-db-backend="$backend" > "$SCRIPT_DIR/build-$backend.log" 2>&1; then + echo " configure FAILED (see testing/build-$backend.log)" + failed+=("$backend") + continue + fi + + if ! make -j"$JOBS" >> "$SCRIPT_DIR/build-$backend.log" 2>&1; then + echo " make FAILED (see testing/build-$backend.log)" + failed+=("$backend") + continue + fi + + cp duc "$SCRIPT_DIR/duc-$backend" + echo " -> $SCRIPT_DIR/duc-$backend OK" +done + +echo "" +echo "=== Build summary ===" +for backend in "${BACKENDS[@]}"; do + bin="$SCRIPT_DIR/duc-$backend" + if [[ " ${failed[*]:-} " == *" $backend "* ]]; then + echo " FAIL $backend" + elif [[ -x "$bin" ]]; then + echo " OK $backend ($("$bin" --version 2>&1 | head -1))" + else + echo " MISS $backend" + fi +done + +if [[ ${#failed[@]} -gt 0 ]]; then + echo "" + echo "Some backends failed: ${failed[*]}" + exit 1 +fi From 39d9d2901d5a85480fc78f41202e48e828f3d20a Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 22:03:39 +0200 Subject: [PATCH 03/36] added script to test indexing with the different backends Example: ./test-compare-backends.sh Indexing path: /usr/share/doc DB dir: /home/gruinelli/temp/duc/testing/dbs [tkrzw] indexing ... opening tkzrw DB with compression: RECORD_COMP_ZSTD done. dumping json ... done. (7140333 bytes) [tokyocabinet] indexing ... done. dumping json ... done. (7140333 bytes) [sqlite3] indexing ... done. dumping json ... done. (7140333 bytes) [lmdb] indexing ... done. dumping json ... done. (7140333 bytes) [leveldb] indexing ... done. dumping json ... done. (7140333 bytes) [kyotocabinet] indexing ... done. dumping json ... done. (7140333 bytes) === Pairwise JSON comparison === tkrzw == tokyocabinet [identical] tkrzw == sqlite3 [identical] tkrzw == lmdb [identical] tkrzw == leveldb [identical] tkrzw == kyotocabinet [identical] tokyocabinet == sqlite3 [identical] tokyocabinet == lmdb [identical] tokyocabinet == leveldb [identical] tokyocabinet == kyotocabinet [identical] sqlite3 == lmdb [identical] sqlite3 == leveldb [identical] sqlite3 == kyotocabinet [identical] lmdb == leveldb [identical] lmdb == kyotocabinet [identical] leveldb == kyotocabinet [identical] Result: all backends produce identical JSON output. --- testing/README.md | 71 ++++++++++++++++++++ testing/build-all-backends.sh | 14 ++++ testing/test-compare-backends.sh | 107 +++++++++++++++++++++++++++++++ 3 files changed, 192 insertions(+) create mode 100644 testing/README.md create mode 100755 testing/test-compare-backends.sh diff --git a/testing/README.md b/testing/README.md new file mode 100644 index 0000000..b275383 --- /dev/null +++ b/testing/README.md @@ -0,0 +1,71 @@ +# duc — multi-backend testing + +This directory contains scripts and pre-built binaries for building and +cross-testing `duc` across all supported database backends. + +## Backends + +| Backend | Binary | DB file / directory | +|----------------|----------------------|----------------------------| +| tkrzw | `duc-tkrzw` | `*.db` | +| tokyocabinet | `duc-tokyocabinet` | `*.db` | +| sqlite3 | `duc-sqlite3` | `*.db` | +| lmdb | `duc-lmdb` | `*.db` | +| leveldb | `duc-leveldb` | `*.dir/` (directory) | +| kyotocabinet | `duc-kyotocabinet` | `*.db` | + +## Scripts + +### `build-all-backends.sh` + +Builds a separate `duc-` binary for every supported database backend. + +**Must be run from the `testing/` directory.** + +```bash +cd testing +bash build-all-backends.sh +``` + +- Runs `autoreconf -i` if `configure` is missing or older than `configure.ac`. +- For each backend: runs `./configure --with-db-backend=`, then `make`. +- Copies the resulting binary to `testing/duc-`. +- Saves full build output to `testing/build-.log`. +- Exits with a non-zero status if any backend fails to build. + +The number of parallel make jobs can be controlled via the `JOBS` environment +variable (defaults to `nproc`): + +```bash +JOBS=4 bash build-all-backends.sh +``` + +### `test-compare-backends.sh` + +Indexes a filesystem path with every available `duc-` binary, dumps +the result as JSON, and performs a pairwise comparison to verify that all +backends produce identical output. + +```bash +bash test-compare-backends.sh [PATH] +``` + +- `PATH` defaults to `/usr/share/doc` if not specified. +- Skips any backend whose binary is not present in `testing/`. +- Temporary databases and JSON files are written to a `mktemp` directory and + cleaned up automatically on exit. +- Exits with a non-zero status if any pair of backends produces different JSON. + +## Dependencies + +The following development libraries must be installed before building: + +```bash +sudo apt-get install \ + libtokyocabinet-dev \ + libkyotocabinet-dev \ + libleveldb-dev \ + liblmdb-dev \ + libsqlite3-dev \ + libtkrzw-dev +``` diff --git a/testing/build-all-backends.sh b/testing/build-all-backends.sh index dd63e38..4331866 100755 --- a/testing/build-all-backends.sh +++ b/testing/build-all-backends.sh @@ -1,4 +1,18 @@ #!/usr/bin/env bash +# +# build-all-backends.sh — Build duc for every supported database backend. +# +# For each backend (tkrzw, tokyocabinet, sqlite3, lmdb, leveldb, kyotocabinet) +# this script runs ./configure --with-db-backend=, compiles duc, and +# copies the resulting binary as testing/duc-. Build output for each +# backend is saved to testing/build-.log. +# +# Usage: +# cd testing && bash build-all-backends.sh +# +# Environment: +# JOBS — number of parallel make jobs (default: nproc) +# set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" diff --git a/testing/test-compare-backends.sh b/testing/test-compare-backends.sh new file mode 100755 index 0000000..262afc7 --- /dev/null +++ b/testing/test-compare-backends.sh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash +# +# test-compare-backends.sh — Index a path with every duc backend and compare JSON output. +# +# For each duc- binary found in the same directory, this script: +# 1. Indexes the given path into a persistent database in testing/dbs/. +# 2. Dumps the database content as JSON. +# 3. Performs a pairwise diff of all JSON outputs and reports any differences. +# +# Database files are kept in testing/dbs/ after the run for further inspection. +# JSON files are kept in testing/dbs/ after the run alongside the databases. +# Lock files left behind by backends (e.g. lmdb .db-lock) are removed after indexing. +# +# Usage: +# bash test-compare-backends.sh [PATH] +# +# Arguments: +# PATH — filesystem path to index (default: /usr/share/doc) +# +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +INDEX_PATH="${1:-/usr/share/doc}" + +BACKENDS=(tkrzw tokyocabinet sqlite3 lmdb leveldb kyotocabinet) +DBDIR="$SCRIPT_DIR/dbs" + +mkdir -p "$DBDIR" + +echo "Indexing path: $INDEX_PATH" +echo "DB dir: $DBDIR" +echo "" + +# Index and dump JSON for each backend +for backend in "${BACKENDS[@]}"; do + bin="$SCRIPT_DIR/duc-$backend" + if [[ ! -x "$bin" ]]; then + echo "[$backend] SKIP — binary not found: $bin" + continue + fi + + # leveldb uses a directory as DB path + if [[ "$backend" == "leveldb" ]]; then + db="$DBDIR/$backend.dir" + else + db="$DBDIR/$backend.db" + fi + + json_file="$DBDIR/$backend.json" + + rm -rf "$db" + + echo -n "[$backend] indexing ... " + if "$bin" index -q -d "$db" "$INDEX_PATH" 2>&1; then + echo -n "done. dumping json ... " + "$bin" json -d "$db" "$INDEX_PATH" > "$json_file" 2>&1 + echo "done. ($(wc -c < "$json_file") bytes)" + else + echo "FAILED" + continue + fi +done + +echo "" +echo "=== Pairwise JSON comparison ===" +echo "" + +# Collect successfully produced JSON files +successful=() +for backend in "${BACKENDS[@]}"; do + f="$DBDIR/$backend.json" + [[ -s "$f" ]] && successful+=("$backend") +done + +if [[ ${#successful[@]} -lt 2 ]]; then + echo "Need at least 2 successful backends to compare." + exit 1 +fi + +all_match=true +for ((i = 0; i < ${#successful[@]}; i++)); do + for ((j = i + 1; j < ${#successful[@]}; j++)); do + a="${successful[$i]}" + b="${successful[$j]}" + fa="$DBDIR/$a.json" + fb="$DBDIR/$b.json" + if diff -q "$fa" "$fb" > /dev/null 2>&1; then + echo " $a == $b [identical]" + else + echo " $a != $b [DIFFER]" + all_match=false + diff --unified=3 "$fa" "$fb" | head -40 || true + echo " ..." + fi + done +done + +# Remove lock files left behind by backends (e.g. lmdb creates a .db-lock) +rm -f "$DBDIR"/*.lock "$DBDIR"/*.db-lock + +echo "" +if $all_match; then + echo "Result: all backends produce identical JSON output." +else + echo "Result: differences found between backends (see above)." + exit 1 +fi From d08836e4c69194bc24075e7067c4e6d90724ae3f Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 22:07:14 +0200 Subject: [PATCH 04/36] added readme --- testing/README.md | 7 +++---- testing/test-compare-backends.sh | 4 +--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/testing/README.md b/testing/README.md index b275383..6ec79fb 100644 --- a/testing/README.md +++ b/testing/README.md @@ -1,7 +1,6 @@ # duc — multi-backend testing -This directory contains scripts and pre-built binaries for building and -cross-testing `duc` across all supported database backends. +This directory contains scripts for building and cross-testing `duc` across all supported database backends. ## Backends @@ -52,8 +51,8 @@ bash test-compare-backends.sh [PATH] - `PATH` defaults to `/usr/share/doc` if not specified. - Skips any backend whose binary is not present in `testing/`. -- Temporary databases and JSON files are written to a `mktemp` directory and - cleaned up automatically on exit. +- Database files are written to `testing/dbs/` and **kept after the run** for + further inspection. - Exits with a non-zero status if any pair of backends produces different JSON. ## Dependencies diff --git a/testing/test-compare-backends.sh b/testing/test-compare-backends.sh index 262afc7..63bcf5b 100755 --- a/testing/test-compare-backends.sh +++ b/testing/test-compare-backends.sh @@ -7,9 +7,7 @@ # 2. Dumps the database content as JSON. # 3. Performs a pairwise diff of all JSON outputs and reports any differences. # -# Database files are kept in testing/dbs/ after the run for further inspection. -# JSON files are kept in testing/dbs/ after the run alongside the databases. -# Lock files left behind by backends (e.g. lmdb .db-lock) are removed after indexing. +# Database files and the JSON outputs are kept in testing/dbs/ after the run for further inspection. # # Usage: # bash test-compare-backends.sh [PATH] From d7b6a9044f0014c28607ba06eb293384b9e9ae0a Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 22:13:34 +0200 Subject: [PATCH 05/36] =?UTF-8?q?fix=20migrator=20build=20errors:=20kccurf?= =?UTF-8?q?irst=E2=86=92kccurjump,=20mdb=5Fopen=20macro=20conflict,=20comm?= =?UTF-8?q?ent=20warning?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- migrator/migrator.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/migrator/migrator.c b/migrator/migrator.c index db1c6b3..37bfde5 100644 --- a/migrator/migrator.c +++ b/migrator/migrator.c @@ -42,7 +42,7 @@ typedef struct { /* Iteration --------------------------------------------------- * iter_new() – create an iterator positioned before the first record. - * iter_next() – advance and fill *key/*val with malloc'd buffers; + * iter_next() – advance and fill *key, *val with malloc'd buffers; * caller must free() both. Returns 1, or 0 when done. * iter_free() – destroy the iterator. */ @@ -167,7 +167,7 @@ static void *kc_iter_new(void *h) { kc_iter_t *it = malloc(sizeof *it); it->cur = kcdbcursor((KCDB *)h); - kccurfirst(it->cur); + kccurjump(it->cur); return it; } @@ -419,7 +419,7 @@ static const backend_ops_t sq_ops = { typedef struct { MDB_env *env; MDB_dbi dbi; MDB_txn *txn; } mdb_handle_t; typedef struct { MDB_cursor *cur; int started; } mdb_iter_t; -static void *mdb_open(const char *path, int readonly) +static void *mdb_be_open(const char *path, int readonly) { mdb_handle_t *h = malloc(sizeof *h); unsigned int env_flags = MDB_NOSUBDIR; @@ -503,7 +503,7 @@ static void mdb_iter_free(void *iter) static const backend_ops_t mdb_ops = { "lmdb", - mdb_open, mdb_be_close, mdb_be_put, + mdb_be_open, mdb_be_close, mdb_be_put, mdb_iter_new, mdb_iter_next, mdb_iter_free }; #endif /* HAVE_LMDB */ From 9922c4b46d967edeaaa2f3086f8eabeda91e406e Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 22:29:42 +0200 Subject: [PATCH 06/36] migrator: open tkrzw with RECORD_COMP_ZSTD to match duc's default compression --- migrator/migrator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/migrator/migrator.c b/migrator/migrator.c index 37bfde5..d5fc072 100644 --- a/migrator/migrator.c +++ b/migrator/migrator.c @@ -519,7 +519,7 @@ static void *tkrzw_be_open(const char *path, int readonly) { TkrzwDBM *hdb = tkrzw_dbm_open( path, !readonly, - "dbm=HashDBM,file=StdFile,offset_width=5"); + "dbm=HashDBM,file=StdFile,offset_width=5,record_comp_mode=RECORD_COMP_ZSTD"); if (!hdb) { TkrzwStatus s = tkrzw_get_last_status(); fprintf(stderr, "tkrzw: cannot open '%s': %s\n", path, s.message); From 2a1e56db75ed4dd699d5f7028bca0c80e8039f47 Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 22:34:25 +0200 Subject: [PATCH 07/36] added migration testscript --- testing/.gitignore | 3 + testing/test-compare-backends.sh | 4 + testing/test-migrate-db-any-to-any.sh | 106 ++++++++++++++++++++++++++ 3 files changed, 113 insertions(+) create mode 100644 testing/.gitignore create mode 100755 testing/test-migrate-db-any-to-any.sh diff --git a/testing/.gitignore b/testing/.gitignore new file mode 100644 index 0000000..cdd7f79 --- /dev/null +++ b/testing/.gitignore @@ -0,0 +1,3 @@ +*.log +duc-* +dbs \ No newline at end of file diff --git a/testing/test-compare-backends.sh b/testing/test-compare-backends.sh index 63bcf5b..94d740c 100755 --- a/testing/test-compare-backends.sh +++ b/testing/test-compare-backends.sh @@ -9,6 +9,10 @@ # # Database files and the JSON outputs are kept in testing/dbs/ after the run for further inspection. # +# Requirements: +# Run build-all-backends.sh first to compile the duc- binaries that +# this script expects to find in the same directory. +# # Usage: # bash test-compare-backends.sh [PATH] # diff --git a/testing/test-migrate-db-any-to-any.sh b/testing/test-migrate-db-any-to-any.sh new file mode 100755 index 0000000..e7606ce --- /dev/null +++ b/testing/test-migrate-db-any-to-any.sh @@ -0,0 +1,106 @@ +#!/usr/bin/env bash +# +# test_migrate-db-any-to-any.sh — Migrate every duc database in dbs/ to every other backend format. +# +# For each source database found in testing/dbs/ the script invokes the migrator +# binary for every other backend, producing a converted database in +# testing/dbs/migrated/. Output files are named -to-. +# (or -to-.dir for LevelDB). Per-migration logs are written to +# testing/dbs/migrated/logs/. +# +# Any existing output file/directory for a given pair is removed before +# migrating so the run is always clean and reproducible. +# +# Usage: +# bash test_migrate-db-any-to-any.sh +# +# Environment: +# TIMEOUT — seconds allowed per migration before it is killed (default: 120) +# +# Requirements: +# - ../migrator/migrator must be built (cd ../migrator && make) +# - Source databases must exist in dbs/ (run test-compare-backends.sh first) +# +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +DBDIR="$SCRIPT_DIR/dbs" +OUTDIR="$DBDIR/migrated" +MIGRATOR="$SCRIPT_DIR/../migrator/migrator" +LOGDIR="$OUTDIR/logs" +TIMEOUT="${TIMEOUT:-120}" + +if [[ ! -x "$MIGRATOR" ]]; then + echo "error: migrator binary not found: $MIGRATOR" >&2 + echo " cd ../migrator && make" >&2 + exit 1 +fi + +mkdir -p "$OUTDIR" "$LOGDIR" + +# Map each backend to its source path and file extension +declare -A DB_PATH +declare -A DB_EXT +DB_PATH[tkrzw]="$DBDIR/tkrzw.db" +DB_EXT[tkrzw]="db" +DB_PATH[tokyocabinet]="$DBDIR/tokyocabinet.db" +DB_EXT[tokyocabinet]="db" +DB_PATH[sqlite3]="$DBDIR/sqlite3.db" +DB_EXT[sqlite3]="db" +DB_PATH[lmdb]="$DBDIR/lmdb.db" +DB_EXT[lmdb]="db" +DB_PATH[leveldb]="$DBDIR/leveldb.dir" +DB_EXT[leveldb]="dir" +DB_PATH[kyotocabinet]="$DBDIR/kyotocabinet.db" +DB_EXT[kyotocabinet]="db" + +BACKENDS=(tkrzw tokyocabinet sqlite3 lmdb leveldb kyotocabinet) + +failed=() +skipped=() + +for src in "${BACKENDS[@]}"; do + src_path="${DB_PATH[$src]}" + if [[ ! -e "$src_path" ]]; then + echo "[$src] SKIP — source DB not found: $src_path" + skipped+=("$src:*") + continue + fi + + for dst in "${BACKENDS[@]}"; do + [[ "$src" == "$dst" ]] && continue + + dst_ext="${DB_EXT[$dst]}" + out_path="$OUTDIR/${src}-to-${dst}.${dst_ext}" + + rm -rf "$out_path" + + log="$LOGDIR/${src}-to-${dst}.log" + printf " %-14s -> %-14s ... " "$src" "$dst" + if timeout "$TIMEOUT" "$MIGRATOR" --from "${src}:${src_path}" --to "${dst}:${out_path}" > "$log" 2>&1; then + echo "ok" + else + rc=$? + if [[ $rc -eq 124 ]]; then + echo "TIMEOUT (>${TIMEOUT}s)" + else + echo "FAILED (rc=$rc)" + fi + failed+=("${src}-to-${dst}") + fi + done +done + +echo "" +echo "=== Migration summary ===" +total=$(( ${#BACKENDS[@]} * (${#BACKENDS[@]} - 1) )) +echo " Attempted : $total" +echo " Failed : ${#failed[@]}" +echo " Skipped : ${#skipped[@]}" + +if [[ ${#failed[@]} -gt 0 ]]; then + echo "" + echo "Failed migrations:" + for f in "${failed[@]}"; do echo " $f"; done + exit 1 +fi From 6dc59d8f1f14e36126f9a019bc31489b6b2232e9 Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 22:35:13 +0200 Subject: [PATCH 08/36] added readme --- testing/README.md | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/testing/README.md b/testing/README.md index 6ec79fb..386304c 100644 --- a/testing/README.md +++ b/testing/README.md @@ -1,6 +1,6 @@ # duc — multi-backend testing -This directory contains scripts for building and cross-testing `duc` across all supported database backends. +This directory contains scripts for building, cross-testing, and migrating `duc` databases across all supported backends. ## Backends @@ -55,6 +55,28 @@ bash test-compare-backends.sh [PATH] further inspection. - Exits with a non-zero status if any pair of backends produces different JSON. +### `test_migrate-db-any-to-any.sh` + +Migrates every database in `testing/dbs/` to every other backend format using +the `migrator` binary, producing 30 output databases in `testing/dbs/migrated/`. + +```bash +bash test_migrate-db-any-to-any.sh +``` + +- Requires `../migrator/migrator` to be built (`cd ../migrator && make`). +- Requires source databases in `dbs/` (run `test-compare-backends.sh` first). +- Output files are named `-to-.` (e.g. `tkrzw-to-sqlite3.db`). +- LevelDB outputs use a `.dir` directory instead of a file. +- Each migration is time-limited; set `TIMEOUT` to override (default: 120 s): + +```bash +TIMEOUT=60 bash test_migrate-db-any-to-any.sh +``` + +- Per-migration stdout/stderr is saved to `dbs/migrated/logs/-to-.log`. +- Exits with a non-zero status if any migration fails or times out. + ## Dependencies The following development libraries must be installed before building: From f2caf81efea1afd504ef5fa28c712c5e36d90ac8 Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 22:40:39 +0200 Subject: [PATCH 09/36] migrator: fix tkrzw offset_width mismatch and kyotocabinet double-free in iterator --- migrator/migrator.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/migrator/migrator.c b/migrator/migrator.c index d5fc072..20f132f 100644 --- a/migrator/migrator.c +++ b/migrator/migrator.c @@ -178,19 +178,17 @@ static int kc_iter_next(void *iter, kc_iter_t *it = iter; size_t ks, vs; const char *vp; - /* - * kccurget: returns malloc'd key; *vbp is a separately malloc'd value - * buffer (both must be freed individually with free/kcfree). - * step=1 advances the cursor after the read. - */ + /* kccurget packs key+value in one allocation; step=1 advances the cursor. */ char *k = kccurget(it->cur, &ks, &vp, &vs, 1); if (!k) return 0; *key = k; *klen = ks; /* Copy value into a fresh buffer so caller can always call free() on it */ + /* vp points into the same allocation as k (kccurget packs key+value in + * one buffer); copy the value but do NOT kcfree(vp) — freeing k via + * the caller's free(*key) releases the whole block. */ *val = malloc(vs); memcpy(*val, vp, vs); - kcfree((void *)vp); *vlen = vs; return 1; } @@ -519,7 +517,7 @@ static void *tkrzw_be_open(const char *path, int readonly) { TkrzwDBM *hdb = tkrzw_dbm_open( path, !readonly, - "dbm=HashDBM,file=StdFile,offset_width=5,record_comp_mode=RECORD_COMP_ZSTD"); + "dbm=HashDBM,file=StdFile,record_comp_mode=RECORD_COMP_ZSTD"); if (!hdb) { TkrzwStatus s = tkrzw_get_last_status(); fprintf(stderr, "tkrzw: cannot open '%s': %s\n", path, s.message); From b9e73b69798d9ab193ad5ed51d292f403f48e441 Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 22:45:21 +0200 Subject: [PATCH 10/36] renamed file --- testing/{test-migrate-db-any-to-any.sh => test-migrator.sh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename testing/{test-migrate-db-any-to-any.sh => test-migrator.sh} (100%) diff --git a/testing/test-migrate-db-any-to-any.sh b/testing/test-migrator.sh similarity index 100% rename from testing/test-migrate-db-any-to-any.sh rename to testing/test-migrator.sh From 2a1bd322977a6969af3e350bf3e91f9acd3fc9e2 Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 22:52:02 +0200 Subject: [PATCH 11/36] updated script --- testing/test-migrator.sh | 123 +++++++++++++++++++++++++++++++++------ 1 file changed, 105 insertions(+), 18 deletions(-) diff --git a/testing/test-migrator.sh b/testing/test-migrator.sh index e7606ce..e298b83 100755 --- a/testing/test-migrator.sh +++ b/testing/test-migrator.sh @@ -12,18 +12,24 @@ # migrating so the run is always clean and reproducible. # # Usage: -# bash test_migrate-db-any-to-any.sh +# bash test-migrator.sh [PATH] +# +# Arguments: +# PATH — filesystem path that was indexed (default: /usr/share/doc) +# Must match the path used when running test-compare-backends.sh. # # Environment: # TIMEOUT — seconds allowed per migration before it is killed (default: 120) # # Requirements: # - ../migrator/migrator must be built (cd ../migrator && make) -# - Source databases must exist in dbs/ (run test-compare-backends.sh first) +# - Source databases and JSON files must exist in dbs/ +# (run test-compare-backends.sh first) # set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +INDEX_PATH="${1:-/usr/share/doc}" DBDIR="$SCRIPT_DIR/dbs" OUTDIR="$DBDIR/migrated" MIGRATOR="$SCRIPT_DIR/../migrator/migrator" @@ -56,14 +62,25 @@ DB_EXT[kyotocabinet]="db" BACKENDS=(tkrzw tokyocabinet sqlite3 lmdb leveldb kyotocabinet) -failed=() +migrate_failed=() +migrate_ok=() skipped=() +json_failed=() +json_ok=() +diff_fail=() +diff_ok=() + +# ============================================================ +# Phase 1 — Migrate all databases +# ============================================================ +echo "=== Phase 1: Migrate ===" +echo "" for src in "${BACKENDS[@]}"; do src_path="${DB_PATH[$src]}" if [[ ! -e "$src_path" ]]; then - echo "[$src] SKIP — source DB not found: $src_path" - skipped+=("$src:*") + echo " [$src] SKIP — source DB not found: $src_path" + skipped+=("$src") continue fi @@ -72,35 +89,105 @@ for src in "${BACKENDS[@]}"; do dst_ext="${DB_EXT[$dst]}" out_path="$OUTDIR/${src}-to-${dst}.${dst_ext}" + log="$LOGDIR/${src}-to-${dst}.log" rm -rf "$out_path" - log="$LOGDIR/${src}-to-${dst}.log" printf " %-14s -> %-14s ... " "$src" "$dst" if timeout "$TIMEOUT" "$MIGRATOR" --from "${src}:${src_path}" --to "${dst}:${out_path}" > "$log" 2>&1; then echo "ok" + migrate_ok+=("${src}-to-${dst}") else rc=$? - if [[ $rc -eq 124 ]]; then - echo "TIMEOUT (>${TIMEOUT}s)" - else - echo "FAILED (rc=$rc)" - fi - failed+=("${src}-to-${dst}") + [[ $rc -eq 124 ]] && echo "TIMEOUT (>${TIMEOUT}s)" || echo "FAILED (rc=$rc)" + migrate_failed+=("${src}-to-${dst}") fi done done +# ============================================================ +# Phase 2 — Export each migrated database to JSON +# ============================================================ +echo "" +echo "=== Phase 2: Export JSON ===" +echo "" + +for pair in "${migrate_ok[@]}"; do + src="${pair%%-to-*}" + dst="${pair##*-to-}" + dst_ext="${DB_EXT[$dst]}" + out_path="$OUTDIR/${pair}.${dst_ext}" + migrated_json="$OUTDIR/${pair}.json" + dst_bin="$SCRIPT_DIR/duc-$dst" + + printf " %-30s ... " "$pair" + if [[ ! -x "$dst_bin" ]]; then + echo "SKIP (duc-$dst not found)" + continue + fi + if "$dst_bin" json -d "$out_path" "$INDEX_PATH" > "$migrated_json" 2>&1; then + echo "ok ($(wc -c < "$migrated_json") bytes)" + json_ok+=("$pair") + else + echo "FAILED" + json_failed+=("$pair") + fi +done + +# ============================================================ +# Phase 3 — Compare each migrated JSON against source JSON +# ============================================================ +echo "" +echo "=== Phase 3: Compare JSON ===" echo "" -echo "=== Migration summary ===" + +for pair in "${json_ok[@]}"; do + src="${pair%%-to-*}" + src_json="$DBDIR/${src}.json" + migrated_json="$OUTDIR/${pair}.json" + + printf " %-30s ... " "$pair" + if [[ ! -s "$src_json" ]]; then + echo "SKIP (no source JSON for $src)" + continue + fi + if diff -q "$src_json" "$migrated_json" > /dev/null 2>&1; then + echo "match" + diff_ok+=("$pair") + else + echo "DIFFER" + diff_fail+=("$pair") + fi +done + +# ============================================================ +# Summary +# ============================================================ +echo "" +echo "=== Summary ===" total=$(( ${#BACKENDS[@]} * (${#BACKENDS[@]} - 1) )) -echo " Attempted : $total" -echo " Failed : ${#failed[@]}" -echo " Skipped : ${#skipped[@]}" +echo " Migrations attempted : $total" +echo " Migration failed : ${#migrate_failed[@]}" +echo " JSON export failed : ${#json_failed[@]}" +echo " JSON match : ${#diff_ok[@]}" +echo " JSON differ : ${#diff_fail[@]}" -if [[ ${#failed[@]} -gt 0 ]]; then +if [[ ${#diff_fail[@]} -gt 0 ]]; then + echo "" + echo "Migrations with JSON differences:" + for f in "${diff_fail[@]}"; do + echo " --- $f ---" + diff --unified=3 "$DBDIR/${f%%-to-*}.json" "$OUTDIR/$f.json" | head -20 || true + echo "" + done +fi + +if [[ ${#migrate_failed[@]} -gt 0 ]]; then echo "" echo "Failed migrations:" - for f in "${failed[@]}"; do echo " $f"; done + for f in "${migrate_failed[@]}"; do echo " $f"; done +fi + +if [[ ${#migrate_failed[@]} -gt 0 || ${#diff_fail[@]} -gt 0 || ${#json_failed[@]} -gt 0 ]]; then exit 1 fi From 0864a62d5d144ed96774aa219c5385c8a965bbe8 Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 22:54:06 +0200 Subject: [PATCH 12/36] =?UTF-8?q?migrator:=20fix=20sqlite3=20key=20binding?= =?UTF-8?q?=20=E2=80=94=20use=20bind=5Ftext=20to=20match=20duc's=20db-sqli?= =?UTF-8?q?te3.c?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- migrator/migrator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/migrator/migrator.c b/migrator/migrator.c index 20f132f..5eab3e3 100644 --- a/migrator/migrator.c +++ b/migrator/migrator.c @@ -360,7 +360,7 @@ static int sq_put(void *handle, sqlite3_prepare(h->s, "insert or replace into blobs(key,value) values(?,?)", -1, &stmt, 0); - sqlite3_bind_blob(stmt, 1, k, (int)kl, SQLITE_STATIC); + sqlite3_bind_text(stmt, 1, k, (int)kl, SQLITE_STATIC); sqlite3_bind_blob(stmt, 2, v, (int)vl, SQLITE_STATIC); sqlite3_step(stmt); sqlite3_finalize(stmt); From 84331a912e6bb3982ce83c9d835d4ddb6345db6d Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 22:57:38 +0200 Subject: [PATCH 13/36] . --- testing/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/testing/README.md b/testing/README.md index 386304c..fc8417d 100644 --- a/testing/README.md +++ b/testing/README.md @@ -55,13 +55,13 @@ bash test-compare-backends.sh [PATH] further inspection. - Exits with a non-zero status if any pair of backends produces different JSON. -### `test_migrate-db-any-to-any.sh` +### `test-migrator.sh` Migrates every database in `testing/dbs/` to every other backend format using the `migrator` binary, producing 30 output databases in `testing/dbs/migrated/`. ```bash -bash test_migrate-db-any-to-any.sh +bash test-migrator.sh ``` - Requires `../migrator/migrator` to be built (`cd ../migrator && make`). @@ -71,7 +71,7 @@ bash test_migrate-db-any-to-any.sh - Each migration is time-limited; set `TIMEOUT` to override (default: 120 s): ```bash -TIMEOUT=60 bash test_migrate-db-any-to-any.sh +bash test-migrator.sh ``` - Per-migration stdout/stderr is saved to `dbs/migrated/logs/-to-.log`. From 8d232154ebffdf3d1d24ca4634f3483e155e66ff Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 23:01:02 +0200 Subject: [PATCH 14/36] updated timeout --- testing/test-migrator.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/test-migrator.sh b/testing/test-migrator.sh index e298b83..19cb997 100755 --- a/testing/test-migrator.sh +++ b/testing/test-migrator.sh @@ -34,7 +34,7 @@ DBDIR="$SCRIPT_DIR/dbs" OUTDIR="$DBDIR/migrated" MIGRATOR="$SCRIPT_DIR/../migrator/migrator" LOGDIR="$OUTDIR/logs" -TIMEOUT="${TIMEOUT:-120}" +TIMEOUT="${TIMEOUT:-10}" if [[ ! -x "$MIGRATOR" ]]; then echo "error: migrator binary not found: $MIGRATOR" >&2 @@ -60,7 +60,7 @@ DB_EXT[leveldb]="dir" DB_PATH[kyotocabinet]="$DBDIR/kyotocabinet.db" DB_EXT[kyotocabinet]="db" -BACKENDS=(tkrzw tokyocabinet sqlite3 lmdb leveldb kyotocabinet) +BACKENDS=(tokyocabinet kyotocabinet sqlite3 lmdb leveldb tkrzw) migrate_failed=() migrate_ok=() From c6e3be07deaf12ed948590b92b2877b08c3cdc72 Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 23:11:37 +0200 Subject: [PATCH 15/36] tkrzw: cap num_buckets=131072 to avoid 100M-bucket default (476 MB sparse file) --- migrator/migrator.c | 2 +- src/libduc/db-tkrzw.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/migrator/migrator.c b/migrator/migrator.c index 5eab3e3..3e12db9 100644 --- a/migrator/migrator.c +++ b/migrator/migrator.c @@ -517,7 +517,7 @@ static void *tkrzw_be_open(const char *path, int readonly) { TkrzwDBM *hdb = tkrzw_dbm_open( path, !readonly, - "dbm=HashDBM,file=StdFile,record_comp_mode=RECORD_COMP_ZSTD"); + "dbm=HashDBM,file=StdFile,num_buckets=131072,record_comp_mode=RECORD_COMP_ZSTD"); if (!hdb) { TkrzwStatus s = tkrzw_get_last_status(); fprintf(stderr, "tkrzw: cannot open '%s': %s\n", path, s.message); diff --git a/src/libduc/db-tkrzw.c b/src/libduc/db-tkrzw.c index 90840d3..d09da2c 100644 --- a/src/libduc/db-tkrzw.c +++ b/src/libduc/db-tkrzw.c @@ -55,7 +55,7 @@ struct db *db_open(const char *path_db, int flags, duc_errno *e) struct db *db; int compress = 0; int writeable = 0; - char options[256] = "dbm=HashDBM,file=StdFile,offset_width=5"; + char options[256] = "dbm=HashDBM,file=StdFile,offset_width=5,num_buckets=131072"; if (flags & DUC_OPEN_FORCE) { char trunc[] = ",truncate=true"; From 3d4285e2a8160697c23664b33d26fb5721d0a27b Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 23:14:33 +0200 Subject: [PATCH 16/36] =?UTF-8?q?revert:=20db-tkrzw.c=20num=5Fbuckets=20ch?= =?UTF-8?q?ange=20=E2=80=94=20do=20not=20touch=20duc=20core?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/libduc/db-tkrzw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libduc/db-tkrzw.c b/src/libduc/db-tkrzw.c index d09da2c..90840d3 100644 --- a/src/libduc/db-tkrzw.c +++ b/src/libduc/db-tkrzw.c @@ -55,7 +55,7 @@ struct db *db_open(const char *path_db, int flags, duc_errno *e) struct db *db; int compress = 0; int writeable = 0; - char options[256] = "dbm=HashDBM,file=StdFile,offset_width=5,num_buckets=131072"; + char options[256] = "dbm=HashDBM,file=StdFile,offset_width=5"; if (flags & DUC_OPEN_FORCE) { char trunc[] = ",truncate=true"; From cb52d3dd2911793b24aaac4f715b0c07f8ede67f Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 23:15:47 +0200 Subject: [PATCH 17/36] update test script --- testing/test-migrator.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/test-migrator.sh b/testing/test-migrator.sh index 19cb997..c52711f 100755 --- a/testing/test-migrator.sh +++ b/testing/test-migrator.sh @@ -34,7 +34,7 @@ DBDIR="$SCRIPT_DIR/dbs" OUTDIR="$DBDIR/migrated" MIGRATOR="$SCRIPT_DIR/../migrator/migrator" LOGDIR="$OUTDIR/logs" -TIMEOUT="${TIMEOUT:-10}" +TIMEOUT="${TIMEOUT:-300}" if [[ ! -x "$MIGRATOR" ]]; then echo "error: migrator binary not found: $MIGRATOR" >&2 From bdcb20f781524e40c5fbe81878146e23533f37dc Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 23:18:23 +0200 Subject: [PATCH 18/36] =?UTF-8?q?migrator:=20add=20progress=20output=20?= =?UTF-8?q?=E2=80=94=20scan/copy=20status=20lines=20with=20flush?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- migrator/migrator.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/migrator/migrator.c b/migrator/migrator.c index 3e12db9..072d673 100644 --- a/migrator/migrator.c +++ b/migrator/migrator.c @@ -710,7 +710,12 @@ int main(int argc, char **argv) void *dst = dst_ops->open(to_path, 0 /* read-write */); if (!dst) { src_ops->close(src); return 1; } + fprintf(stderr, "Scanning source index...\n"); + fflush(stderr); void *iter = src_ops->iter_new(src); + fprintf(stderr, "Copying records...\n"); + fflush(stderr); + void *key, *val; size_t klen, vlen; unsigned long count = 0, errors = 0; @@ -723,8 +728,10 @@ int main(int argc, char **argv) free(key); free(val); count++; - if (count % 10000 == 0) - fprintf(stderr, "\r %lu records copied...", count); + if (count % 1000 == 0) { + fprintf(stderr, "\r %lu records...", count); + fflush(stderr); + } } src_ops->iter_free(iter); From b20f3ed80737b414c2e9f1a56f22122dfb6fe774 Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 23:20:21 +0200 Subject: [PATCH 19/36] . --- testing/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/README.md b/testing/README.md index fc8417d..c7a1f1f 100644 --- a/testing/README.md +++ b/testing/README.md @@ -68,7 +68,7 @@ bash test-migrator.sh - Requires source databases in `dbs/` (run `test-compare-backends.sh` first). - Output files are named `-to-.` (e.g. `tkrzw-to-sqlite3.db`). - LevelDB outputs use a `.dir` directory instead of a file. -- Each migration is time-limited; set `TIMEOUT` to override (default: 120 s): +- Each migration is time-limited; set `TIMEOUT` to override (default: 300 s, the migration from `tkrzw` to any other format is really slow!): ```bash bash test-migrator.sh From 5a9af1ac7a86c497564075a28dd09e634f0764dd Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 23:26:43 +0200 Subject: [PATCH 20/36] migrator: add count() per backend; replace verbose output with ASCII progress bar --- migrator/migrator.c | 91 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 73 insertions(+), 18 deletions(-) diff --git a/migrator/migrator.c b/migrator/migrator.c index 072d673..4875941 100644 --- a/migrator/migrator.c +++ b/migrator/migrator.c @@ -51,6 +51,9 @@ typedef struct { void **key, size_t *klen, void **val, size_t *vlen); void (*iter_free)(void *iter); + + /* Return total number of records, or 0 if not cheaply available. */ + size_t (*count)(void *handle); } backend_ops_t; @@ -122,10 +125,13 @@ static void tc_iter_free(void *iter) free(it); } +static size_t tc_count(void *h) { return (size_t)tcbdbrnum((TCBDB *)h); } + static const backend_ops_t tc_ops = { "tokyocabinet", tc_open, tc_close, tc_put, - tc_iter_new, tc_iter_next, tc_iter_free + tc_iter_new, tc_iter_next, tc_iter_free, + tc_count }; #endif /* HAVE_TOKYOCABINET */ @@ -200,10 +206,13 @@ static void kc_iter_free(void *iter) free(it); } +static size_t kc_count(void *h) { return (size_t)kcdbcount((KCDB *)h); } + static const backend_ops_t kc_ops = { "kyotocabinet", kc_open, kc_close, kc_put, - kc_iter_new, kc_iter_next, kc_iter_free + kc_iter_new, kc_iter_next, kc_iter_free, + kc_count }; #endif /* HAVE_KYOTOCABINET */ @@ -302,10 +311,13 @@ static void ldb_iter_free(void *iter) free(it); } +static size_t ldb_count(void *h) { (void)h; return 0; } + static const backend_ops_t ldb_ops = { "leveldb", ldb_open, ldb_close, ldb_put, - ldb_iter_new, ldb_iter_next, ldb_iter_free + ldb_iter_new, ldb_iter_next, ldb_iter_free, + ldb_count }; #endif /* HAVE_LEVELDB */ @@ -400,10 +412,23 @@ static void sq_iter_free(void *iter) free(it); } +static size_t sq_count(void *h) +{ + sq_handle_t *sh = h; + sqlite3_stmt *stmt; + size_t n = 0; + if (sqlite3_prepare_v2(sh->s, "select count(*) from blobs", -1, &stmt, 0) == SQLITE_OK) { + if (sqlite3_step(stmt) == SQLITE_ROW) n = (size_t)sqlite3_column_int64(stmt, 0); + sqlite3_finalize(stmt); + } + return n; +} + static const backend_ops_t sq_ops = { "sqlite3", sq_open, sq_close, sq_put, - sq_iter_new, sq_iter_next, sq_iter_free + sq_iter_new, sq_iter_next, sq_iter_free, + sq_count }; #endif /* HAVE_SQLITE3 */ @@ -499,10 +524,19 @@ static void mdb_iter_free(void *iter) free(it); } +static size_t mdb_count(void *h) +{ + mdb_handle_t *mh = h; + MDB_stat st; + if (mdb_stat(mh->txn, mh->dbi, &st) == MDB_SUCCESS) return (size_t)st.ms_entries; + return 0; +} + static const backend_ops_t mdb_ops = { "lmdb", mdb_be_open, mdb_be_close, mdb_be_put, - mdb_iter_new, mdb_iter_next, mdb_iter_free + mdb_iter_new, mdb_iter_next, mdb_iter_free, + mdb_count }; #endif /* HAVE_LMDB */ @@ -573,10 +607,13 @@ static void tkrzw_iter_free(void *iter) free(it); } +static size_t tkrzw_count(void *h) { return (size_t)tkrzw_dbm_count((TkrzwDBM *)h); } + static const backend_ops_t tkrzw_ops = { "tkrzw", tkrzw_be_open, tkrzw_be_close, tkrzw_be_put, - tkrzw_iter_new, tkrzw_iter_next, tkrzw_iter_free + tkrzw_iter_new, tkrzw_iter_next, tkrzw_iter_free, + tkrzw_count }; #endif /* HAVE_TKRZW */ @@ -710,38 +747,56 @@ int main(int argc, char **argv) void *dst = dst_ops->open(to_path, 0 /* read-write */); if (!dst) { src_ops->close(src); return 1; } - fprintf(stderr, "Scanning source index...\n"); + size_t total = src_ops->count(src); + + fprintf(stderr, "Scanning..."); fflush(stderr); void *iter = src_ops->iter_new(src); - fprintf(stderr, "Copying records...\n"); + fprintf(stderr, "\r"); fflush(stderr); void *key, *val; size_t klen, vlen; - unsigned long count = 0, errors = 0; + unsigned long done = 0, errors = 0; + const int BAR = 40; while (src_ops->iter_next(iter, &key, &klen, &val, &vlen)) { - if (dst_ops->put(dst, key, klen, val, vlen) != 0) { - fprintf(stderr, "warning: failed to write record %lu\n", count); + if (dst_ops->put(dst, key, klen, val, vlen) != 0) errors++; - } free(key); free(val); - count++; - if (count % 1000 == 0) { - fprintf(stderr, "\r %lu records...", count); + done++; + if (done % 100 == 0 || done == 1) { + if (total > 0) { + int filled = (int)((double)done / total * BAR); + fprintf(stderr, "\r ["); + for (int i = 0; i < BAR; i++) + fputc(i < filled ? '=' : (i == filled ? '>' : ' '), stderr); + fprintf(stderr, "] %lu/%zu (%d%%)", + done, total, (int)((double)done / total * 100)); + } else { + fprintf(stderr, "\r %lu records", done); + } fflush(stderr); } } + /* Final completed bar */ + if (total > 0) { + fprintf(stderr, "\r ["); + for (int i = 0; i < BAR; i++) fputc('=', stderr); + fprintf(stderr, "] %lu/%lu (100%%)\n", done, done); + } else { + fprintf(stderr, "\r %lu records\n", done); + } + src_ops->iter_free(iter); src_ops->close(src); dst_ops->close(dst); - fprintf(stderr, "\nDone: %lu records copied", count); if (errors) - fprintf(stderr, " (%lu write errors)", errors); - fprintf(stderr, ".\n"); + fprintf(stderr, " %lu write error(s)\n", errors); + fprintf(stderr, "Done: %lu records copied.\n", done); return errors ? 1 : 0; } From ade47d2292df3f24663c0657c3d40606c804bd45 Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 23:31:53 +0200 Subject: [PATCH 21/36] migrator: warn (not error) if fix-crashes-on-indexing not in ancestry; raise TIMEOUT to 300s --- migrator/Makefile | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/migrator/Makefile b/migrator/Makefile index b9bbdef..ae589a6 100644 --- a/migrator/Makefile +++ b/migrator/Makefile @@ -2,6 +2,17 @@ CC = gcc CFLAGS = -O2 -Wall -Wextra -std=c11 LDFLAGS = +# --------------------------------------------------------------------------- +# Warn if fix-crashes-on-indexing is not an ancestor of HEAD. +# That branch contains essential fixes (buffer.c grow logic) without which +# duc databases can be silently corrupted. Merge it before deploying. +# --------------------------------------------------------------------------- +_BRANCH_OK := $(shell git merge-base --is-ancestor fix-crashes-on-indexing HEAD 2>/dev/null && echo yes || echo no) +ifneq ($(_BRANCH_OK),yes) + $(warning *** fix-crashes-on-indexing is not in HEAD ancestry.) + $(warning *** Merge that branch before deploying — it contains critical database fixes.) +endif + # --------------------------------------------------------------------------- # Auto-detect available backends via pkg-config (or direct lib checks). # Each enabled backend appends -DHAVE_ plus its compile/link flags. From e68e211f41e0c5921259f7da76e8810321e0a3ac Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 23:32:15 +0200 Subject: [PATCH 22/36] migrator: remove branch ancestry check from Makefile --- migrator/Makefile | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/migrator/Makefile b/migrator/Makefile index ae589a6..b9bbdef 100644 --- a/migrator/Makefile +++ b/migrator/Makefile @@ -2,17 +2,6 @@ CC = gcc CFLAGS = -O2 -Wall -Wextra -std=c11 LDFLAGS = -# --------------------------------------------------------------------------- -# Warn if fix-crashes-on-indexing is not an ancestor of HEAD. -# That branch contains essential fixes (buffer.c grow logic) without which -# duc databases can be silently corrupted. Merge it before deploying. -# --------------------------------------------------------------------------- -_BRANCH_OK := $(shell git merge-base --is-ancestor fix-crashes-on-indexing HEAD 2>/dev/null && echo yes || echo no) -ifneq ($(_BRANCH_OK),yes) - $(warning *** fix-crashes-on-indexing is not in HEAD ancestry.) - $(warning *** Merge that branch before deploying — it contains critical database fixes.) -endif - # --------------------------------------------------------------------------- # Auto-detect available backends via pkg-config (or direct lib checks). # Each enabled backend appends -DHAVE_ plus its compile/link flags. From d4679be6f4c0c0e9accce2d2c98727d7d12dd889 Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 3 Apr 2026 23:58:09 +0200 Subject: [PATCH 23/36] fix migrator --- migrator/migrator.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/migrator/migrator.c b/migrator/migrator.c index 4875941..cbfefa4 100644 --- a/migrator/migrator.c +++ b/migrator/migrator.c @@ -449,10 +449,6 @@ static void *mdb_be_open(const char *path, int readonly) unsigned int open_flags = 0; unsigned int txn_flags = 0; - /* Virtual map: 1 GB on 32-bit, 256 GB on 64-bit */ - size_t map_size = 1024u * 1024u * 1024u; - if (sizeof(size_t) == 8) map_size *= 256u; - if (readonly) { env_flags |= MDB_RDONLY; txn_flags |= MDB_RDONLY; @@ -462,7 +458,13 @@ static void *mdb_be_open(const char *path, int readonly) int rc; if ((rc = mdb_env_create(&h->env)) != MDB_SUCCESS) goto err; - if ((rc = mdb_env_set_mapsize(h->env, map_size)) != MDB_SUCCESS) goto err; + if (!readonly) { + /* For write: give a large virtual address space so large DBs fit. */ + size_t map_size = 1024u * 1024u * 1024u; + if (sizeof(size_t) == 8) map_size *= 256u; + if ((rc = mdb_env_set_mapsize(h->env, map_size)) != MDB_SUCCESS) goto err; + } + /* For readonly: use size=0 — LMDB adopts the mapsize from the file header. */ if ((rc = mdb_env_open(h->env, path, env_flags, 0664)) != MDB_SUCCESS) goto err; if ((rc = mdb_txn_begin(h->env, NULL, txn_flags, &h->txn)) != MDB_SUCCESS) goto err; if ((rc = mdb_open(h->txn, NULL, open_flags, &h->dbi)) != MDB_SUCCESS) goto err; From 48a83199b0cde2479e2a91bd2c9b3a3a25d36f1e Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Sat, 4 Apr 2026 00:00:49 +0200 Subject: [PATCH 24/36] show warning --- testing/test-migrator.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/testing/test-migrator.sh b/testing/test-migrator.sh index c52711f..30b1621 100755 --- a/testing/test-migrator.sh +++ b/testing/test-migrator.sh @@ -84,6 +84,10 @@ for src in "${BACKENDS[@]}"; do continue fi + if [[ "$src" == "tkrzw" ]]; then + echo " [tkrzw] WARNING: tkrzw source iteration is very slow — this may take several minutes per destination" + fi + for dst in "${BACKENDS[@]}"; do [[ "$src" == "$dst" ]] && continue From 4224709a8c62189a940e7d51728af3ae16ae993a Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Sat, 4 Apr 2026 00:05:29 +0200 Subject: [PATCH 25/36] . --- testing/README.md | 19 +++++++++++-------- testing/test-migrator.sh | 32 ++++++++++++++++++++++++++++---- 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/testing/README.md b/testing/README.md index c7a1f1f..4293e6c 100644 --- a/testing/README.md +++ b/testing/README.md @@ -58,25 +58,28 @@ bash test-compare-backends.sh [PATH] ### `test-migrator.sh` Migrates every database in `testing/dbs/` to every other backend format using -the `migrator` binary, producing 30 output databases in `testing/dbs/migrated/`. +the `migrator` binary, producing output databases in `testing/dbs/migrated/`. ```bash -bash test-migrator.sh +bash test-migrator.sh [--include-tkrzw-as-source] [PATH] ``` - Requires `../migrator/migrator` to be built (`cd ../migrator && make`). - Requires source databases in `dbs/` (run `test-compare-backends.sh` first). -- Output files are named `-to-.` (e.g. `tkrzw-to-sqlite3.db`). +- Output files are named `-to-.` (e.g. `sqlite3-to-lmdb.db`). - LevelDB outputs use a `.dir` directory instead of a file. -- Each migration is time-limited; set `TIMEOUT` to override (default: 300 s, the migration from `tkrzw` to any other format is really slow!): +- Per-migration stdout/stderr is saved to `dbs/migrated/logs/-to-.log`. +- Exits with a non-zero status if any migration fails or times out. +- Each migration is time-limited; set `TIMEOUT` to override (default: 300 s). + +**tkrzw as source is disabled by default** because iterating over a tkrzw +database is extremely slow (several minutes per destination backend). tkrzw is +always available as a *destination*. To also migrate from tkrzw: ```bash -bash test-migrator.sh +bash test-migrator.sh --include-tkrzw-as-source ``` -- Per-migration stdout/stderr is saved to `dbs/migrated/logs/-to-.log`. -- Exits with a non-zero status if any migration fails or times out. - ## Dependencies The following development libraries must be installed before building: diff --git a/testing/test-migrator.sh b/testing/test-migrator.sh index 30b1621..262dd91 100755 --- a/testing/test-migrator.sh +++ b/testing/test-migrator.sh @@ -12,14 +12,20 @@ # migrating so the run is always clean and reproducible. # # Usage: -# bash test-migrator.sh [PATH] +# bash test-migrator.sh [--include-tkrzw-as-source] [PATH] # # Arguments: # PATH — filesystem path that was indexed (default: /usr/share/doc) # Must match the path used when running test-compare-backends.sh. # +# Options: +# --include-tkrzw-as-source +# Also migrate FROM the tkrzw database. Disabled by default because +# tkrzw source iteration is extremely slow (several minutes per +# destination). tkrzw is always available as a migration destination. +# # Environment: -# TIMEOUT — seconds allowed per migration before it is killed (default: 120) +# TIMEOUT — seconds allowed per migration before it is killed (default: 300) # # Requirements: # - ../migrator/migrator must be built (cd ../migrator && make) @@ -29,7 +35,16 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -INDEX_PATH="${1:-/usr/share/doc}" + +INCLUDE_TKRZW_SOURCE=0 +POSITIONAL=() +for _arg in "$@"; do + case "$_arg" in + --include-tkrzw-as-source) INCLUDE_TKRZW_SOURCE=1 ;; + *) POSITIONAL+=("$_arg") ;; + esac +done +INDEX_PATH="${POSITIONAL[0]:-/usr/share/doc}" DBDIR="$SCRIPT_DIR/dbs" OUTDIR="$DBDIR/migrated" MIGRATOR="$SCRIPT_DIR/../migrator/migrator" @@ -85,6 +100,11 @@ for src in "${BACKENDS[@]}"; do fi if [[ "$src" == "tkrzw" ]]; then + if [[ "$INCLUDE_TKRZW_SOURCE" != "1" ]]; then + echo " [tkrzw] SKIP as source (pass --include-tkrzw-as-source to enable; iteration is very slow)" + skipped+=("tkrzw-as-source") + continue + fi echo " [tkrzw] WARNING: tkrzw source iteration is very slow — this may take several minutes per destination" fi @@ -170,7 +190,11 @@ done echo "" echo "=== Summary ===" total=$(( ${#BACKENDS[@]} * (${#BACKENDS[@]} - 1) )) -echo " Migrations attempted : $total" +skipped_pairs=$(( ${#skipped[@]} * (${#BACKENDS[@]} - 1) )) +attempted=$(( total - skipped_pairs )) +echo " Migrations possible : $total" +echo " Migrations skipped : $skipped_pairs" +echo " Migrations attempted : $attempted" echo " Migration failed : ${#migrate_failed[@]}" echo " JSON export failed : ${#json_failed[@]}" echo " JSON match : ${#diff_ok[@]}" From 774bd947adc59424acc737c84cd9476655912065 Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Sat, 4 Apr 2026 00:14:06 +0200 Subject: [PATCH 26/36] =?UTF-8?q?revert:=20restore=20all=20duc=20source=20?= =?UTF-8?q?files=20to=20master=20state=20=E2=80=94=20add-migrator-tool=20m?= =?UTF-8?q?ust=20not=20touch=20duc=20core?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog | 16 ---------------- INSTALL | 16 ++++++---------- configure.ac | 10 ++-------- src/duc/cmd-json.c | 25 +++++++++++-------------- src/duc/cmd-ui.c | 4 ++-- src/duc/ducrc.c | 41 ++++++++++++++--------------------------- src/duc/main.c | 6 +----- src/libduc/buffer.c | 6 +++--- src/libduc/db-tkrzw.c | 11 +---------- src/libduc/db.c | 17 +++++------------ src/libduc/duc.c | 31 +++++++------------------------ 11 files changed, 52 insertions(+), 131 deletions(-) diff --git a/ChangeLog b/ChangeLog index a5da9ad..9a2f8a9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,19 +1,3 @@ -1.5.0-rc2 (2025-06-02) - - fix: small update to fix potential CVE in buffer.c - - fix : improve detecting DB types when opening and giving out better - errors. - - fix: Issue 340 - JSON escaping fixes. - - fix: Issue 324 - Support compiling on Alpine Linux 3.20, needs - testing - - Basic compile testing on the following linux distros: - - Rocky Linux 8 - - Rocky Linux 9 - - Debian 12 - - Debian 11 - - Ubuntu 22.04 LTS - - Ubuntu 24.04 LTS - - - 1.5.0-rc1 (2024-09-03) - new: added support for tkrzw backend DB and made it the default - this DB is newer and under active support compared to diff --git a/INSTALL b/INSTALL index 6b5bcbe..5888b3e 100644 --- a/INSTALL +++ b/INSTALL @@ -31,12 +31,10 @@ To get the required dependencies on Debian or Ubuntu, run: build-essential libtkrzw-dev tkrzw-doc tkrzw-utils - On older RHEL or CentOS systems, you need to use tokyocabinet since - we haven't tested tkrzw there yet, and in any case it would have to - be built by hand: + On older RHEL or CentOS systems, you need to do: + + $ sudo yum install pango-devel cairo-devel tokyocabinet-devel - $ sudo yum install pango-devel cairo-devel tokyocabinet-devel - $ ./configure --with-db-backend=tokyocabinet RHEL 8 & 9 / Rockly Linux 8 & 9 / Alma Linux 8 & 9 @@ -47,8 +45,7 @@ To get the required dependencies on Debian or Ubuntu, run: Install tkrzw and other packages: - $ sudo yum install tkrzw tkrzw-devel tkrzw-doc tkrzw-libs pango-devel cairo-devel \ - libarchive-devel libzstd-devel + $ sudo yum install tkrzw tkrzw-devel tkrzw-doc tkrzw-libs pango-devel cairo-devel tokyocabinet-devel Configuration Options @@ -95,12 +92,12 @@ Database backends ----------------- Duc supports various key-value database backends: - - Tkrzw: tkrzw (default as of v1.5.0) - Tokyo Cabinet: tokyocabinet - LevelDB: leveldb - Sqlite3: sqlite3 - Lightning Memory-Mapped Database: lmdb - Kyoto Cabinet: kyotocabinet + - Tkrzw: tkrzw (default as of v1.5.0) Duc now uses Tkrzw by default: the performance is acceptable and it handles extremely large databases of volumes with terabytes of storage @@ -116,8 +113,7 @@ with different architectures. Notably, Tokyo Cabinet is built with non-standard options which break compatibility with other linux distributions, even on the same architecture [1]. If you are planning to share databases between different platforms (index machine A, -display on machine B) we recommend using another DB backend. Reports -of success would be appreciated. +display on machine B) we recommend using the sqlite3 backend. Note, Tokyo Cabiner, Kyoto Cabinet, LevelDB and LMDB are all being deprecated from future versions because the lack of development and diff --git a/configure.ac b/configure.ac index ad84a3b..d35c843 100644 --- a/configure.ac +++ b/configure.ac @@ -7,7 +7,7 @@ AC_PREREQ([2.13]) -AC_INIT([duc], [1.5.0-rc2], [duc@zevv.nl]) +AC_INIT([duc], [1.5.0-rc1], [duc@zevv.nl]) LIB_CURRENT=1 LIB_REVISION=0 @@ -83,7 +83,7 @@ case "${with_db_backend}" in AC_DEFINE([ENABLE_TKRZW], [1], [Enable tkrzw db backend]) ], [ AC_MSG_ERROR(Unable to find tkrzw) ]) AC_SUBST([TKRZW_LIBS]) - AC_SUBST([TKRZW_CFLAGS]) +p AC_SUBST([TKRZW_CFLAGS]) ;; leveldb) AC_CHECK_LIB([leveldb], [leveldb_open]) @@ -113,11 +113,6 @@ esac AC_DEFINE_UNQUOTED(DB_BACKEND, ["${with_db_backend}"], [Database backend]) -PKG_CHECK_MODULES([ZSTD],[libarchive]) -AC_DEFINE([DUC_TKRZW_COMP_ZSTD], ["RECORD_COMP_ZSTD"], ["Enable tkrzw db zstd comppression"]) -AC_DEFINE_UNQUOTED(TKRZW_ZSTD, ["${with_tkrzw_zstd}"], [tkrzw zstd compression support]) -AC_DEFINE([ENABLE_TKRZW_ZSTD], [1], [tkrzw with zstd]) - if test "${enable_cairo}" = "yes"; then PKG_CHECK_MODULES([CAIRO], [cairo],, [AC_MSG_ERROR([ @@ -209,7 +204,6 @@ AC_MSG_RESULT([ - Package version: $PACKAGE $VERSION - Prefix: ${prefix} - Database backend: ${with_db_backend} - - tkrzw ZSTD compression: ${with_tkrzw_zstd} - X11 support: ${enable_x11} - OpenGL support: ${enable_opengl} - UI (ncurses) support: ${enable_ui} diff --git a/src/duc/cmd-json.c b/src/duc/cmd-json.c index c279516..be26851 100644 --- a/src/duc/cmd-json.c +++ b/src/duc/cmd-json.c @@ -28,22 +28,19 @@ static void indent(int n) } } + static void print_escaped(const char *s) { - const char *p = s; - - while(*p) { - if(*p == '"' ) printf("\\\""); - else if(*p == '\\') printf("\\\\"); - else if(*p == '\b') printf("\\b"); - else if(*p == '\f') printf("\\f"); - else if(*p == '\n') printf("\\n"); - else if(*p == '\r') printf("\\r"); - else if(*p == '\t') printf("\\t"); - else if(*p < 0x20) printf("\\u%04x", *p); - else putchar(*p); - p++; - } + while(*s) { + switch(*s) { + case '"': printf("\""); break; + case '\t': putchar('\t'); break; + case '\n': putchar('\n'); break; + case '\r': putchar('\r'); break; + default: putchar(*s); break; + } + s++; + } } diff --git a/src/duc/cmd-ui.c b/src/duc/cmd-ui.c index ce3d36f..ce28046 100644 --- a/src/duc/cmd-ui.c +++ b/src/duc/cmd-ui.c @@ -185,14 +185,14 @@ static duc_dir *do_dir(duc *duc, duc_dir *dir, int depth) off_t size = duc_get_size(&e->size, st);; - int max_size_len = opt_bytes ? 12 : 7; + size_t max_size_len = opt_bytes ? 12 : 7; char class = duc_file_type_char(e->type); char siz[32]; duc_human_size(&e->size, st, opt_bytes, siz, sizeof siz); if(cur != i) attrset(attr_size); - printw("%*lu", max_size_len, (size_t) siz); + printw("%*s", max_size_len, siz); printw(" "); char *p = e->name; diff --git a/src/duc/ducrc.c b/src/duc/ducrc.c index 4603493..76ad69b 100644 --- a/src/duc/ducrc.c +++ b/src/duc/ducrc.c @@ -68,7 +68,7 @@ static char *trim(char *s) } -int handle_opt(struct ducrc *ducrc, char shortopt, const char *longopt, const char *val) +static void handle_opt(struct ducrc *ducrc, char shortopt, const char *longopt, const char *val) { struct ducrc_option **os = ducrc->option_list; struct ducrc_option *o = NULL; @@ -80,23 +80,21 @@ int handle_opt(struct ducrc *ducrc, char shortopt, const char *longopt, const ch void (*fn)(const char *val); /* Find option */ - + for(i=0; inoptions; i++) { - o = *os; - if(shortopt && shortopt == o->shortopt) goto found; - if(longopt && strcmp(longopt, o->longopt) == 0) goto found; - os++; + o = *os; + if(shortopt && shortopt == o->shortopt) goto found; + if(longopt && strcmp(longopt, o->longopt) == 0) goto found; + os++; } if(shortopt) { - fprintf(stderr, "Unknown short option '%c' in \n", shortopt); - return(-1); + fprintf(stderr, "Unknown option '%c'\n", shortopt); } else { - fprintf(stderr, "Unknown long option '%s' in \n", longopt); - return(-1); + fprintf(stderr, "Unknown option '%s'\n", longopt); } - return(0); + return; found: @@ -128,10 +126,7 @@ int handle_opt(struct ducrc *ducrc, char shortopt, const char *longopt, const ch fn = o->ptr; fn(val); break; - default: - return(-1); } - return(0); } @@ -141,13 +136,12 @@ int ducrc_read(struct ducrc *ducrc, const char *path) FILE *f = fopen(path, "r"); if(f == NULL) { - //duc_log(NULL, DUC_LOG_DBG, "Not reading configuration from '%s': %s", path, strerror(errno)); + //duc_log(NULL, DUC_LOG_DBG, "Not reading configuration from '%s': %s", path, strerror(errno)); return -1; } - //duc_log(NULL, DUC_LOG_DBG, "Reading configuration from '%s'",path,0); + char section[256] = ""; char buf[256]; - int res; while(fgets(buf, sizeof buf, f) != NULL) { @@ -183,10 +177,7 @@ int ducrc_read(struct ducrc *ducrc, const char *path) *p = '\0'; char *longopt = trim(l); char *val = trim(p + 1); - res = handle_opt(ducrc, 0, longopt, val); - if (res) { - printf(" Error parsing option %s in section %s of %s\n",longopt,section,path); - } + handle_opt(ducrc, 0, longopt, val); continue; } @@ -194,10 +185,7 @@ int ducrc_read(struct ducrc *ducrc, const char *path) char *longopt = trim(l); if(strlen(longopt) > 0) { - res = handle_opt(ducrc, 0, longopt, NULL); - if (res) { - printf(" Error longopt in section %s of %s\n",section,path); - } + handle_opt(ducrc, 0, longopt, NULL); } } } @@ -252,13 +240,12 @@ int ducrc_getopt(struct ducrc *ducrc, int *argc, char **argv[]) int c; int idx; - int res; if(*argc > 1) optind = 2; while( ( c = getopt_long(*argc, *argv, optstr, longopts, &idx)) != -1) { if(c == '?') return -1; - res = handle_opt(ducrc, c, c ? 0 : longopts[idx].name, optarg); + handle_opt(ducrc, c, c ? 0 : longopts[idx].name, optarg); } *argc -= optind; diff --git a/src/duc/main.c b/src/duc/main.c index 6aaea96..287abfe 100644 --- a/src/duc/main.c +++ b/src/duc/main.c @@ -422,11 +422,7 @@ static void show_version(void) #ifdef ENABLE_UI printf("ui "); #endif - printf(DB_BACKEND); -#ifdef ENABLE_TKRZW_ZSTD - printf(" (zstd)"); -#endif - printf("\n"); + printf(DB_BACKEND "\n"); exit(EXIT_SUCCESS); } diff --git a/src/libduc/buffer.c b/src/libduc/buffer.c index dcce82d..55683da 100644 --- a/src/libduc/buffer.c +++ b/src/libduc/buffer.c @@ -47,7 +47,7 @@ void buffer_free(struct buffer *b) // Add item to buffer, but grow by doubling if needed static int buffer_put(struct buffer *b, const void *data, size_t len) { - if(b->ptr + len <= b->len) { + if(b->ptr + len > b->max) { while(b->len + len > b->max) { b->max *= 2; } @@ -63,7 +63,7 @@ static int buffer_put(struct buffer *b, const void *data, size_t len) static int buffer_get(struct buffer *b, void *data, size_t len) { - if(b->ptr + len <= b->len) { + if(b->ptr <= b->len - len) { memcpy(data, b->data + b->ptr, len); b->ptr += len; return len; @@ -81,7 +81,7 @@ static int buffer_put_varint(struct buffer *b, uint64_t v) return l; } -// See varint.c for the algorithm for encoding integers into 1-9 bytes. + static int buffer_get_varint(struct buffer *b, uint64_t *v) { uint8_t buf[9]; diff --git a/src/libduc/db-tkrzw.c b/src/libduc/db-tkrzw.c index 90840d3..537e3ab 100644 --- a/src/libduc/db-tkrzw.c +++ b/src/libduc/db-tkrzw.c @@ -16,13 +16,6 @@ #include "private.h" #include "db.h" -// Enable compression using ZSTD if available -#ifdef DUC_TKRZW_COMP_ZSTD - #define DUC_TKRZW_REC_COMP "RECORD_COMP_ZSTD" -#else - #define DUC_TKRZW_REC_COMP "NONE" -#endif - struct db { TkrzwDBM* hdb; }; @@ -81,9 +74,7 @@ struct db *db_open(const char *path_db, int flags, duc_errno *e) if (flags & DUC_OPEN_RW) writeable = 1; if (flags & DUC_OPEN_COMPRESS) { /* Do no compression for now, need to update configure tests first */ - char comp[64]; - sprintf(comp,",record_comp_mode=%s",DUC_TKRZW_REC_COMP); - printf("opening tkzrw DB with compression: %s\n",DUC_TKRZW_REC_COMP); + char comp[] = ",record_comp_mode=RECORD_COMP_LZ4"; strcat(options,comp); } diff --git a/src/libduc/db.c b/src/libduc/db.c index c6abbdb..c18425b 100644 --- a/src/libduc/db.c +++ b/src/libduc/db.c @@ -117,27 +117,20 @@ char *duc_db_type_check(const char *path_db) /* read first MAGIC_LEN bytes of file then look for the strings, etc for each type of DB we support. */ size_t len = fread(buf, 1, sizeof(buf),f); - char kyotocabinet[] = { 0x4b,0x43,0x0a,0x0,0x10,0x0e,0x06,0xb4,0x31,0x08,0x0a,0x04,0x00,0x00,0x00,0x00 }; - if (memcmp(buf,kyotocabinet,16) == 0) { - return("kyotocabinet"); + if (strncmp(buf,"Kyoto CaBiNeT",13) == 0) { + return("Kyoto Cabinet"); } if (strncmp(buf,"ToKyO CaBiNeT",13) == 0) { - return("tokyocabinet"); + return("Tokyo Cabinet"); } if (strncmp(buf,"TkrzwHDB",8) == 0) { - return("tkrzw"); + return("Tkrzw HashDBM"); } if (strncmp(buf,"SQLite format 3",15) == 0) { - return("sqlite3"); - } - - char lmdb[] = { 0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x08,0x0,0x0,0x0,0x0,0x0, - 0xde,0xc0,0xef,0xbe,0x01,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0 }; - if (memcmp(buf,lmdb,32) == 0) { - return("lmdb"); + return("SQLite3"); } } diff --git a/src/libduc/duc.c b/src/libduc/duc.c index aa232a7..193305d 100644 --- a/src/libduc/duc.c +++ b/src/libduc/duc.c @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -60,7 +59,6 @@ void duc_set_log_callback(duc *duc, duc_log_callback cb) int duc_open(duc *duc, const char *path_db, duc_open_flags flags) { char tmp[DUC_PATH_MAX]; - int res = 0; /* An empty path means check the ENV path instead */ if(path_db == NULL) { @@ -96,11 +94,7 @@ int duc_open(duc *duc, const char *path_db, duc_open_flags flags) /* Append parent folder */ snprintf(tmp, sizeof tmp, "%s/duc", home); /* Create if needed */ - res = mkdir(tmp, 0700); - if (res != 0) { - duc_log(duc, DUC_LOG_FTL, "Error! Cannot create mkdir \"%s\", %s", tmp, strerror(errno)); - exit(1); - } + mkdir(tmp, 0700); /* Append file to folder*/ snprintf(tmp, sizeof tmp, "%s/duc/duc.db", home); path_db = tmp; @@ -113,11 +107,7 @@ int duc_open(duc *duc, const char *path_db, duc_open_flags flags) /* Append parent folder */ snprintf(tmp, sizeof tmp, "%s/.cache/duc", home); /* Create if needed */ - res = mkdir(tmp, 0700); - if (res != 0) { - duc_log(duc, DUC_LOG_FTL, "Error! Cannot create mkdir \"%s\", %s", tmp, strerror(errno)); - exit(1); - } + mkdir(tmp, 0700); /* Append file to folder*/ snprintf(tmp, sizeof tmp, "%s/.cache/duc/duc.db", home); path_db = tmp; @@ -130,18 +120,6 @@ int duc_open(duc *duc, const char *path_db, duc_open_flags flags) return -1; } - // Check that we can handle this Database is what we're - // compiled to support, but only if it exists... - struct stat sb; - int r = stat(path_db,&sb); - if (r == 0) { - char *db_type = duc_db_type_check(path_db); - if (db_type && (strcmp(db_type,DB_BACKEND) != 0)) { - duc_log(duc, DUC_LOG_FTL, "Error opening: %s - unsupported DB type _%s_, duc compiled for %s", path_db, db_type, DB_BACKEND); - return -1; - } - } - duc_log(duc, DUC_LOG_INF, "%s database \"%s\"", (flags & DUC_OPEN_RO) ? "Reading from" : "Writing to", path_db); @@ -156,6 +134,11 @@ int duc_open(duc *duc, const char *path_db, duc_open_flags flags) /* Now we can maybe do some quick checks to see if we * tried to open a non-supported DB type. */ + char *db_type = duc_db_type_check(path_db); + if (db_type && (strcmp(db_type,"unknown") == 0)) { + duc_log(duc, DUC_LOG_FTL, "Error opening: %s - unsupported DB type _%s_, duc compiled for %s", path_db, db_type, DB_BACKEND); + return -1; + } } return 0; } From 47b73e125c900346eeb13f9094c6181e2250d70d Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Sat, 4 Apr 2026 00:22:45 +0200 Subject: [PATCH 27/36] added copyright to scripts --- testing/build-all-backends.sh | 2 ++ testing/test-compare-backends.sh | 2 ++ testing/test-migrator.sh | 2 ++ 3 files changed, 6 insertions(+) diff --git a/testing/build-all-backends.sh b/testing/build-all-backends.sh index 4331866..ce094e5 100755 --- a/testing/build-all-backends.sh +++ b/testing/build-all-backends.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash # +# Copyright (c) 2026 George Ruinelli +# # build-all-backends.sh — Build duc for every supported database backend. # # For each backend (tkrzw, tokyocabinet, sqlite3, lmdb, leveldb, kyotocabinet) diff --git a/testing/test-compare-backends.sh b/testing/test-compare-backends.sh index 94d740c..c09a49e 100755 --- a/testing/test-compare-backends.sh +++ b/testing/test-compare-backends.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash # +# Copyright (c) 2026 George Ruinelli +# # test-compare-backends.sh — Index a path with every duc backend and compare JSON output. # # For each duc- binary found in the same directory, this script: diff --git a/testing/test-migrator.sh b/testing/test-migrator.sh index 262dd91..121f2ba 100755 --- a/testing/test-migrator.sh +++ b/testing/test-migrator.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash # +# Copyright (c) 2026 George Ruinelli +# # test_migrate-db-any-to-any.sh — Migrate every duc database in dbs/ to every other backend format. # # For each source database found in testing/dbs/ the script invokes the migrator From 20622ae748bdf375dccff4da56053ea901b4503a Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Sat, 4 Apr 2026 00:33:38 +0200 Subject: [PATCH 28/36] update readme --- migrator/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/migrator/README.md b/migrator/README.md index b9e47bd..6e8a269 100644 --- a/migrator/README.md +++ b/migrator/README.md @@ -116,6 +116,10 @@ filesystem path to the database file (or directory for LevelDB). --to lmdb:/var/cache/duc/duc.lmdb ``` +### Testing + +See the [testing/README.md](../testing/README.md) resp. [testing/test-migrator.sh](../testing/test-migrator.sh) which exercises the migrator against all backend combinations. + --- ## How It Works From 6f5741dd12af23bcbdeeb3d5327ae424d085955d Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Sat, 4 Apr 2026 00:36:55 +0200 Subject: [PATCH 29/36] docs: move db-formats.md to repo root; update references in migrator/README.md and root README.md --- README.md | 5 ++++- migrator/db-formats.md => db-formats.md | 0 migrator/README.md | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) rename migrator/db-formats.md => db-formats.md (100%) diff --git a/README.md b/README.md index 26561e4..074bd9e 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,9 @@ graphs showing you where your bytes are. Check the [Duc homepage](http://duc.zevv.nl) for more information, documentation and news. -![duc gui](/img/palette-rainbow.png) +![duc gui](/img/palette-rainbow.png) + +For a reference of all supported database backend formats (on-disk layout, +compression, tuning, quirks) see [db-formats.md](db-formats.md). diff --git a/migrator/db-formats.md b/db-formats.md similarity index 100% rename from migrator/db-formats.md rename to db-formats.md diff --git a/migrator/README.md b/migrator/README.md index 6e8a269..bb27f64 100644 --- a/migrator/README.md +++ b/migrator/README.md @@ -4,7 +4,7 @@ A standalone command-line tool that converts a duc index database from any supported backend format to any other, without losing data. For a detailed description of each backend's on-disk format, internal -structure, and quirks see **[db-formats.md](db-formats.md)**. +structure, and quirks see **[db-formats.md](../db-formats.md)**. --- From fb705e1684bd82c2a534a5cb5ca9a52e50577ccd Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Sat, 4 Apr 2026 00:39:03 +0200 Subject: [PATCH 30/36] docs: rename db-formats.md to DB-FORMATS.md; update all references --- db-formats.md => DB-FORMATS.md | 0 README.md | 2 +- migrator/README.md | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename db-formats.md => DB-FORMATS.md (100%) diff --git a/db-formats.md b/DB-FORMATS.md similarity index 100% rename from db-formats.md rename to DB-FORMATS.md diff --git a/README.md b/README.md index 074bd9e..559a630 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,6 @@ Check the [Duc homepage](http://duc.zevv.nl) for more information, documentation ![duc gui](/img/palette-rainbow.png) For a reference of all supported database backend formats (on-disk layout, -compression, tuning, quirks) see [db-formats.md](db-formats.md). +compression, tuning, quirks) see [DB-FORMATS.md](DB-FORMATS.md). diff --git a/migrator/README.md b/migrator/README.md index bb27f64..3aa9eee 100644 --- a/migrator/README.md +++ b/migrator/README.md @@ -4,7 +4,7 @@ A standalone command-line tool that converts a duc index database from any supported backend format to any other, without losing data. For a detailed description of each backend's on-disk format, internal -structure, and quirks see **[db-formats.md](../db-formats.md)**. +structure, and quirks see **[DB-FORMATS.md](../DB-FORMATS.md)**. --- From 3b3486a479d97af39df139ec209b93df5a54d275 Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Sat, 4 Apr 2026 00:39:19 +0200 Subject: [PATCH 31/36] docs: remove DB-FORMATS.md reference from root README --- README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.md b/README.md index 559a630..ad5f158 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,4 @@ Check the [Duc homepage](http://duc.zevv.nl) for more information, documentation ![duc gui](/img/palette-rainbow.png) -For a reference of all supported database backend formats (on-disk layout, -compression, tuning, quirks) see [DB-FORMATS.md](DB-FORMATS.md). - From cf2d51f220b523cead02b2d53a1399018c331d7c Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Sat, 4 Apr 2026 00:40:55 +0200 Subject: [PATCH 32/36] . --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ad5f158..26561e4 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,6 @@ graphs showing you where your bytes are. Check the [Duc homepage](http://duc.zevv.nl) for more information, documentation and news. -![duc gui](/img/palette-rainbow.png) +![duc gui](/img/palette-rainbow.png) From 67c1bc3fdc60eec68e95cd1faa39d6c9d1ef8c86 Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Sat, 4 Apr 2026 00:42:22 +0200 Subject: [PATCH 33/36] update doc --- DB-FORMATS.md | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/DB-FORMATS.md b/DB-FORMATS.md index a620aba..f59abc6 100644 --- a/DB-FORMATS.md +++ b/DB-FORMATS.md @@ -3,6 +3,18 @@ Reference for all database backends supported across duc versions, derived from the source implementations in `src/libduc/db-*.c` and `configure.ac`. + +## Summary Table + +| Backend | File/Dir | Single file | Compression | Version key | Default in | +|----------------|----------|-------------|-------------------|-------------|-------------| +| Tokyo Cabinet | File | Yes | Optional (deflate)| Yes | 1.4.6 | +| Kyoto Cabinet | File | Yes | Always (kct opts) | Yes | — | +| LevelDB | Dir | **No** | Always (Snappy) | No | — | +| SQLite3 | File | Yes | None | No | — | +| LMDB | File | Yes | None | No | — | +| Tkrzw | File | Yes | Optional (ZSTD) | Yes | 1.5.0-rc2 | + --- ## Tokyo Cabinet (`tokyocabinet`) @@ -120,16 +132,3 @@ the source implementations in `src/libduc/db-*.c` and `configure.ac`. the file. - Tkrzw is a successor/spiritual replacement for both Tokyo Cabinet and Kyoto Cabinet, providing a modern hash-based store with better compression options. - ---- - -## Summary Table - -| Backend | File/Dir | Single file | Compression | Version key | Default in | -|----------------|----------|-------------|-------------------|-------------|-------------| -| Tokyo Cabinet | File | Yes | Optional (deflate)| Yes | 1.4.6 | -| Kyoto Cabinet | File | Yes | Always (kct opts) | Yes | — | -| LevelDB | Dir | **No** | Always (Snappy) | No | — | -| SQLite3 | File | Yes | None | No | — | -| LMDB | File | Yes | None | No | — | -| Tkrzw | File | Yes | Optional (ZSTD) | Yes | 1.5.0-rc2 | From 0b0256364e0cc292ad8eae4ebd69e9f1525f24a1 Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Sat, 4 Apr 2026 00:44:33 +0200 Subject: [PATCH 34/36] update doc --- testing/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/README.md b/testing/README.md index 4293e6c..b70cac2 100644 --- a/testing/README.md +++ b/testing/README.md @@ -1,6 +1,6 @@ # duc — multi-backend testing -This directory contains scripts for building, cross-testing, and migrating `duc` databases across all supported backends. +This directory contains scripts for building, cross-testing, and test migration of `duc` databases across all supported backends. ## Backends From bf88d0c2c8e19e68c5713b4e2aa3805cbfbc5eb9 Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Sat, 4 Apr 2026 00:45:57 +0200 Subject: [PATCH 35/36] update doc --- testing/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/README.md b/testing/README.md index b70cac2..ed13ac9 100644 --- a/testing/README.md +++ b/testing/README.md @@ -1,6 +1,6 @@ # duc — multi-backend testing -This directory contains scripts for building, cross-testing, and test migration of `duc` databases across all supported backends. +This directory contains scripts for building, cross-testing, and testing migration of `duc` databases across all supported backends. ## Backends From 0b9b0d332ff55a771c3887170d1c741edb0c72fd Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Sat, 4 Apr 2026 09:05:56 +0200 Subject: [PATCH 36/36] =?UTF-8?q?revert:=20restore=20duc=20core=20files=20?= =?UTF-8?q?to=20v1.5.0-rc2=20state=20=E2=80=94=20add-migrator-tool=20must?= =?UTF-8?q?=20not=20touch=20duc=20core?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog | 16 ++++++++++++++++ INSTALL | 16 ++++++++++------ configure.ac | 10 ++++++++-- src/duc/cmd-json.c | 25 ++++++++++++++----------- src/duc/cmd-ui.c | 4 ++-- src/duc/ducrc.c | 41 +++++++++++++++++++++++++++-------------- src/duc/main.c | 6 +++++- src/libduc/buffer.c | 6 +++--- src/libduc/db-tkrzw.c | 11 ++++++++++- src/libduc/db.c | 17 ++++++++++++----- src/libduc/duc.c | 31 ++++++++++++++++++++++++------- 11 files changed, 131 insertions(+), 52 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9a2f8a9..a5da9ad 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +1.5.0-rc2 (2025-06-02) + - fix: small update to fix potential CVE in buffer.c + - fix : improve detecting DB types when opening and giving out better + errors. + - fix: Issue 340 - JSON escaping fixes. + - fix: Issue 324 - Support compiling on Alpine Linux 3.20, needs + testing + - Basic compile testing on the following linux distros: + - Rocky Linux 8 + - Rocky Linux 9 + - Debian 12 + - Debian 11 + - Ubuntu 22.04 LTS + - Ubuntu 24.04 LTS + - + 1.5.0-rc1 (2024-09-03) - new: added support for tkrzw backend DB and made it the default - this DB is newer and under active support compared to diff --git a/INSTALL b/INSTALL index 5888b3e..6b5bcbe 100644 --- a/INSTALL +++ b/INSTALL @@ -31,10 +31,12 @@ To get the required dependencies on Debian or Ubuntu, run: build-essential libtkrzw-dev tkrzw-doc tkrzw-utils - On older RHEL or CentOS systems, you need to do: - - $ sudo yum install pango-devel cairo-devel tokyocabinet-devel + On older RHEL or CentOS systems, you need to use tokyocabinet since + we haven't tested tkrzw there yet, and in any case it would have to + be built by hand: + $ sudo yum install pango-devel cairo-devel tokyocabinet-devel + $ ./configure --with-db-backend=tokyocabinet RHEL 8 & 9 / Rockly Linux 8 & 9 / Alma Linux 8 & 9 @@ -45,7 +47,8 @@ To get the required dependencies on Debian or Ubuntu, run: Install tkrzw and other packages: - $ sudo yum install tkrzw tkrzw-devel tkrzw-doc tkrzw-libs pango-devel cairo-devel tokyocabinet-devel + $ sudo yum install tkrzw tkrzw-devel tkrzw-doc tkrzw-libs pango-devel cairo-devel \ + libarchive-devel libzstd-devel Configuration Options @@ -92,12 +95,12 @@ Database backends ----------------- Duc supports various key-value database backends: + - Tkrzw: tkrzw (default as of v1.5.0) - Tokyo Cabinet: tokyocabinet - LevelDB: leveldb - Sqlite3: sqlite3 - Lightning Memory-Mapped Database: lmdb - Kyoto Cabinet: kyotocabinet - - Tkrzw: tkrzw (default as of v1.5.0) Duc now uses Tkrzw by default: the performance is acceptable and it handles extremely large databases of volumes with terabytes of storage @@ -113,7 +116,8 @@ with different architectures. Notably, Tokyo Cabinet is built with non-standard options which break compatibility with other linux distributions, even on the same architecture [1]. If you are planning to share databases between different platforms (index machine A, -display on machine B) we recommend using the sqlite3 backend. +display on machine B) we recommend using another DB backend. Reports +of success would be appreciated. Note, Tokyo Cabiner, Kyoto Cabinet, LevelDB and LMDB are all being deprecated from future versions because the lack of development and diff --git a/configure.ac b/configure.ac index d35c843..ad84a3b 100644 --- a/configure.ac +++ b/configure.ac @@ -7,7 +7,7 @@ AC_PREREQ([2.13]) -AC_INIT([duc], [1.5.0-rc1], [duc@zevv.nl]) +AC_INIT([duc], [1.5.0-rc2], [duc@zevv.nl]) LIB_CURRENT=1 LIB_REVISION=0 @@ -83,7 +83,7 @@ case "${with_db_backend}" in AC_DEFINE([ENABLE_TKRZW], [1], [Enable tkrzw db backend]) ], [ AC_MSG_ERROR(Unable to find tkrzw) ]) AC_SUBST([TKRZW_LIBS]) -p AC_SUBST([TKRZW_CFLAGS]) + AC_SUBST([TKRZW_CFLAGS]) ;; leveldb) AC_CHECK_LIB([leveldb], [leveldb_open]) @@ -113,6 +113,11 @@ esac AC_DEFINE_UNQUOTED(DB_BACKEND, ["${with_db_backend}"], [Database backend]) +PKG_CHECK_MODULES([ZSTD],[libarchive]) +AC_DEFINE([DUC_TKRZW_COMP_ZSTD], ["RECORD_COMP_ZSTD"], ["Enable tkrzw db zstd comppression"]) +AC_DEFINE_UNQUOTED(TKRZW_ZSTD, ["${with_tkrzw_zstd}"], [tkrzw zstd compression support]) +AC_DEFINE([ENABLE_TKRZW_ZSTD], [1], [tkrzw with zstd]) + if test "${enable_cairo}" = "yes"; then PKG_CHECK_MODULES([CAIRO], [cairo],, [AC_MSG_ERROR([ @@ -204,6 +209,7 @@ AC_MSG_RESULT([ - Package version: $PACKAGE $VERSION - Prefix: ${prefix} - Database backend: ${with_db_backend} + - tkrzw ZSTD compression: ${with_tkrzw_zstd} - X11 support: ${enable_x11} - OpenGL support: ${enable_opengl} - UI (ncurses) support: ${enable_ui} diff --git a/src/duc/cmd-json.c b/src/duc/cmd-json.c index be26851..c279516 100644 --- a/src/duc/cmd-json.c +++ b/src/duc/cmd-json.c @@ -28,19 +28,22 @@ static void indent(int n) } } - static void print_escaped(const char *s) { - while(*s) { - switch(*s) { - case '"': printf("\""); break; - case '\t': putchar('\t'); break; - case '\n': putchar('\n'); break; - case '\r': putchar('\r'); break; - default: putchar(*s); break; - } - s++; - } + const char *p = s; + + while(*p) { + if(*p == '"' ) printf("\\\""); + else if(*p == '\\') printf("\\\\"); + else if(*p == '\b') printf("\\b"); + else if(*p == '\f') printf("\\f"); + else if(*p == '\n') printf("\\n"); + else if(*p == '\r') printf("\\r"); + else if(*p == '\t') printf("\\t"); + else if(*p < 0x20) printf("\\u%04x", *p); + else putchar(*p); + p++; + } } diff --git a/src/duc/cmd-ui.c b/src/duc/cmd-ui.c index ce28046..ce3d36f 100644 --- a/src/duc/cmd-ui.c +++ b/src/duc/cmd-ui.c @@ -185,14 +185,14 @@ static duc_dir *do_dir(duc *duc, duc_dir *dir, int depth) off_t size = duc_get_size(&e->size, st);; - size_t max_size_len = opt_bytes ? 12 : 7; + int max_size_len = opt_bytes ? 12 : 7; char class = duc_file_type_char(e->type); char siz[32]; duc_human_size(&e->size, st, opt_bytes, siz, sizeof siz); if(cur != i) attrset(attr_size); - printw("%*s", max_size_len, siz); + printw("%*lu", max_size_len, (size_t) siz); printw(" "); char *p = e->name; diff --git a/src/duc/ducrc.c b/src/duc/ducrc.c index 76ad69b..4603493 100644 --- a/src/duc/ducrc.c +++ b/src/duc/ducrc.c @@ -68,7 +68,7 @@ static char *trim(char *s) } -static void handle_opt(struct ducrc *ducrc, char shortopt, const char *longopt, const char *val) +int handle_opt(struct ducrc *ducrc, char shortopt, const char *longopt, const char *val) { struct ducrc_option **os = ducrc->option_list; struct ducrc_option *o = NULL; @@ -80,21 +80,23 @@ static void handle_opt(struct ducrc *ducrc, char shortopt, const char *longopt, void (*fn)(const char *val); /* Find option */ - + for(i=0; inoptions; i++) { - o = *os; - if(shortopt && shortopt == o->shortopt) goto found; - if(longopt && strcmp(longopt, o->longopt) == 0) goto found; - os++; + o = *os; + if(shortopt && shortopt == o->shortopt) goto found; + if(longopt && strcmp(longopt, o->longopt) == 0) goto found; + os++; } if(shortopt) { - fprintf(stderr, "Unknown option '%c'\n", shortopt); + fprintf(stderr, "Unknown short option '%c' in \n", shortopt); + return(-1); } else { - fprintf(stderr, "Unknown option '%s'\n", longopt); + fprintf(stderr, "Unknown long option '%s' in \n", longopt); + return(-1); } - return; + return(0); found: @@ -126,7 +128,10 @@ static void handle_opt(struct ducrc *ducrc, char shortopt, const char *longopt, fn = o->ptr; fn(val); break; + default: + return(-1); } + return(0); } @@ -136,12 +141,13 @@ int ducrc_read(struct ducrc *ducrc, const char *path) FILE *f = fopen(path, "r"); if(f == NULL) { - //duc_log(NULL, DUC_LOG_DBG, "Not reading configuration from '%s': %s", path, strerror(errno)); + //duc_log(NULL, DUC_LOG_DBG, "Not reading configuration from '%s': %s", path, strerror(errno)); return -1; } - + //duc_log(NULL, DUC_LOG_DBG, "Reading configuration from '%s'",path,0); char section[256] = ""; char buf[256]; + int res; while(fgets(buf, sizeof buf, f) != NULL) { @@ -177,7 +183,10 @@ int ducrc_read(struct ducrc *ducrc, const char *path) *p = '\0'; char *longopt = trim(l); char *val = trim(p + 1); - handle_opt(ducrc, 0, longopt, val); + res = handle_opt(ducrc, 0, longopt, val); + if (res) { + printf(" Error parsing option %s in section %s of %s\n",longopt,section,path); + } continue; } @@ -185,7 +194,10 @@ int ducrc_read(struct ducrc *ducrc, const char *path) char *longopt = trim(l); if(strlen(longopt) > 0) { - handle_opt(ducrc, 0, longopt, NULL); + res = handle_opt(ducrc, 0, longopt, NULL); + if (res) { + printf(" Error longopt in section %s of %s\n",section,path); + } } } } @@ -240,12 +252,13 @@ int ducrc_getopt(struct ducrc *ducrc, int *argc, char **argv[]) int c; int idx; + int res; if(*argc > 1) optind = 2; while( ( c = getopt_long(*argc, *argv, optstr, longopts, &idx)) != -1) { if(c == '?') return -1; - handle_opt(ducrc, c, c ? 0 : longopts[idx].name, optarg); + res = handle_opt(ducrc, c, c ? 0 : longopts[idx].name, optarg); } *argc -= optind; diff --git a/src/duc/main.c b/src/duc/main.c index 287abfe..6aaea96 100644 --- a/src/duc/main.c +++ b/src/duc/main.c @@ -422,7 +422,11 @@ static void show_version(void) #ifdef ENABLE_UI printf("ui "); #endif - printf(DB_BACKEND "\n"); + printf(DB_BACKEND); +#ifdef ENABLE_TKRZW_ZSTD + printf(" (zstd)"); +#endif + printf("\n"); exit(EXIT_SUCCESS); } diff --git a/src/libduc/buffer.c b/src/libduc/buffer.c index 55683da..dcce82d 100644 --- a/src/libduc/buffer.c +++ b/src/libduc/buffer.c @@ -47,7 +47,7 @@ void buffer_free(struct buffer *b) // Add item to buffer, but grow by doubling if needed static int buffer_put(struct buffer *b, const void *data, size_t len) { - if(b->ptr + len > b->max) { + if(b->ptr + len <= b->len) { while(b->len + len > b->max) { b->max *= 2; } @@ -63,7 +63,7 @@ static int buffer_put(struct buffer *b, const void *data, size_t len) static int buffer_get(struct buffer *b, void *data, size_t len) { - if(b->ptr <= b->len - len) { + if(b->ptr + len <= b->len) { memcpy(data, b->data + b->ptr, len); b->ptr += len; return len; @@ -81,7 +81,7 @@ static int buffer_put_varint(struct buffer *b, uint64_t v) return l; } - +// See varint.c for the algorithm for encoding integers into 1-9 bytes. static int buffer_get_varint(struct buffer *b, uint64_t *v) { uint8_t buf[9]; diff --git a/src/libduc/db-tkrzw.c b/src/libduc/db-tkrzw.c index 537e3ab..90840d3 100644 --- a/src/libduc/db-tkrzw.c +++ b/src/libduc/db-tkrzw.c @@ -16,6 +16,13 @@ #include "private.h" #include "db.h" +// Enable compression using ZSTD if available +#ifdef DUC_TKRZW_COMP_ZSTD + #define DUC_TKRZW_REC_COMP "RECORD_COMP_ZSTD" +#else + #define DUC_TKRZW_REC_COMP "NONE" +#endif + struct db { TkrzwDBM* hdb; }; @@ -74,7 +81,9 @@ struct db *db_open(const char *path_db, int flags, duc_errno *e) if (flags & DUC_OPEN_RW) writeable = 1; if (flags & DUC_OPEN_COMPRESS) { /* Do no compression for now, need to update configure tests first */ - char comp[] = ",record_comp_mode=RECORD_COMP_LZ4"; + char comp[64]; + sprintf(comp,",record_comp_mode=%s",DUC_TKRZW_REC_COMP); + printf("opening tkzrw DB with compression: %s\n",DUC_TKRZW_REC_COMP); strcat(options,comp); } diff --git a/src/libduc/db.c b/src/libduc/db.c index c18425b..c6abbdb 100644 --- a/src/libduc/db.c +++ b/src/libduc/db.c @@ -117,20 +117,27 @@ char *duc_db_type_check(const char *path_db) /* read first MAGIC_LEN bytes of file then look for the strings, etc for each type of DB we support. */ size_t len = fread(buf, 1, sizeof(buf),f); - if (strncmp(buf,"Kyoto CaBiNeT",13) == 0) { - return("Kyoto Cabinet"); + char kyotocabinet[] = { 0x4b,0x43,0x0a,0x0,0x10,0x0e,0x06,0xb4,0x31,0x08,0x0a,0x04,0x00,0x00,0x00,0x00 }; + if (memcmp(buf,kyotocabinet,16) == 0) { + return("kyotocabinet"); } if (strncmp(buf,"ToKyO CaBiNeT",13) == 0) { - return("Tokyo Cabinet"); + return("tokyocabinet"); } if (strncmp(buf,"TkrzwHDB",8) == 0) { - return("Tkrzw HashDBM"); + return("tkrzw"); } if (strncmp(buf,"SQLite format 3",15) == 0) { - return("SQLite3"); + return("sqlite3"); + } + + char lmdb[] = { 0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x08,0x0,0x0,0x0,0x0,0x0, + 0xde,0xc0,0xef,0xbe,0x01,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0 }; + if (memcmp(buf,lmdb,32) == 0) { + return("lmdb"); } } diff --git a/src/libduc/duc.c b/src/libduc/duc.c index 193305d..aa232a7 100644 --- a/src/libduc/duc.c +++ b/src/libduc/duc.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -59,6 +60,7 @@ void duc_set_log_callback(duc *duc, duc_log_callback cb) int duc_open(duc *duc, const char *path_db, duc_open_flags flags) { char tmp[DUC_PATH_MAX]; + int res = 0; /* An empty path means check the ENV path instead */ if(path_db == NULL) { @@ -94,7 +96,11 @@ int duc_open(duc *duc, const char *path_db, duc_open_flags flags) /* Append parent folder */ snprintf(tmp, sizeof tmp, "%s/duc", home); /* Create if needed */ - mkdir(tmp, 0700); + res = mkdir(tmp, 0700); + if (res != 0) { + duc_log(duc, DUC_LOG_FTL, "Error! Cannot create mkdir \"%s\", %s", tmp, strerror(errno)); + exit(1); + } /* Append file to folder*/ snprintf(tmp, sizeof tmp, "%s/duc/duc.db", home); path_db = tmp; @@ -107,7 +113,11 @@ int duc_open(duc *duc, const char *path_db, duc_open_flags flags) /* Append parent folder */ snprintf(tmp, sizeof tmp, "%s/.cache/duc", home); /* Create if needed */ - mkdir(tmp, 0700); + res = mkdir(tmp, 0700); + if (res != 0) { + duc_log(duc, DUC_LOG_FTL, "Error! Cannot create mkdir \"%s\", %s", tmp, strerror(errno)); + exit(1); + } /* Append file to folder*/ snprintf(tmp, sizeof tmp, "%s/.cache/duc/duc.db", home); path_db = tmp; @@ -120,6 +130,18 @@ int duc_open(duc *duc, const char *path_db, duc_open_flags flags) return -1; } + // Check that we can handle this Database is what we're + // compiled to support, but only if it exists... + struct stat sb; + int r = stat(path_db,&sb); + if (r == 0) { + char *db_type = duc_db_type_check(path_db); + if (db_type && (strcmp(db_type,DB_BACKEND) != 0)) { + duc_log(duc, DUC_LOG_FTL, "Error opening: %s - unsupported DB type _%s_, duc compiled for %s", path_db, db_type, DB_BACKEND); + return -1; + } + } + duc_log(duc, DUC_LOG_INF, "%s database \"%s\"", (flags & DUC_OPEN_RO) ? "Reading from" : "Writing to", path_db); @@ -134,11 +156,6 @@ int duc_open(duc *duc, const char *path_db, duc_open_flags flags) /* Now we can maybe do some quick checks to see if we * tried to open a non-supported DB type. */ - char *db_type = duc_db_type_check(path_db); - if (db_type && (strcmp(db_type,"unknown") == 0)) { - duc_log(duc, DUC_LOG_FTL, "Error opening: %s - unsupported DB type _%s_, duc compiled for %s", path_db, db_type, DB_BACKEND); - return -1; - } } return 0; }