diff --git a/Makefile.cbm b/Makefile.cbm index 82821b8..a0515de 100644 --- a/Makefile.cbm +++ b/Makefile.cbm @@ -148,6 +148,7 @@ MCP_SRCS = src/mcp/mcp.c # Discover module (new) DISCOVER_SRCS = \ src/discover/language.c \ + src/discover/userconfig.c \ src/discover/gitignore.c \ src/discover/discover.c @@ -259,6 +260,7 @@ TEST_MCP_SRCS = \ TEST_DISCOVER_SRCS = \ tests/test_language.c \ + tests/test_userconfig.c \ tests/test_gitignore.c \ tests/test_discover.c diff --git a/src/discover/language.c b/src/discover/language.c index b7eb7e4..ea07e9c 100644 --- a/src/discover/language.c +++ b/src/discover/language.c @@ -3,8 +3,11 @@ * * Maps file extensions and special filenames to CBMLanguage enum values. * Handles .m disambiguation (Objective-C vs Magma vs MATLAB). + * Consults the process-global user config (set via cbm_set_user_lang_config) + * before the built-in lookup table. */ #include "discover/discover.h" +#include "discover/userconfig.h" #include "cbm.h" // CBMLanguage, CBM_LANG_* #include @@ -354,6 +357,15 @@ CBMLanguage cbm_language_for_extension(const char *ext) { return CBM_LANG_COUNT; } + /* Check user-defined overrides first */ + const cbm_userconfig_t *ucfg = cbm_get_user_lang_config(); + if (ucfg) { + CBMLanguage ulang = cbm_userconfig_lookup(ucfg, ext); + if (ulang != CBM_LANG_COUNT) { + return ulang; + } + } + for (size_t i = 0; i < EXT_TABLE_SIZE; i++) { if (strcmp(EXT_TABLE[i].ext, ext) == 0) { return EXT_TABLE[i].language; @@ -374,13 +386,30 @@ CBMLanguage cbm_language_for_filename(const char *filename) { } } - /* Fall back to extension-based lookup */ - const char *dot = strrchr(filename, '.'); - if (dot) { - return cbm_language_for_extension(dot); + /* Fall back to extension-based lookup. + * For compound extensions (e.g. ".blade.php") defined in the user config, + * scan from the first dot in the basename toward the last, checking user + * config at each position. Built-in extensions use the last dot only. */ + const char *last_dot = strrchr(filename, '.'); + if (!last_dot) { + return CBM_LANG_COUNT; } - return CBM_LANG_COUNT; + /* Probe user config for compound extensions (e.g. ".blade.php"). */ + const cbm_userconfig_t *ucfg = cbm_get_user_lang_config(); + if (ucfg) { + const char *p = strchr(filename, '.'); + while (p && p < last_dot) { + CBMLanguage lang = cbm_userconfig_lookup(ucfg, p); + if (lang != CBM_LANG_COUNT) { + return lang; + } + p = strchr(p + 1, '.'); + } + } + + /* Standard single-extension lookup (built-ins + user overrides). */ + return cbm_language_for_extension(last_dot); } const char *cbm_language_name(CBMLanguage lang) { diff --git a/src/discover/userconfig.c b/src/discover/userconfig.c new file mode 100644 index 0000000..d9dd749 --- /dev/null +++ b/src/discover/userconfig.c @@ -0,0 +1,392 @@ +/* + * userconfig.c — User-defined extension→language mappings. + * + * Reads extra_extensions from: + * Global: $XDG_CONFIG_HOME/codebase-memory-mcp/config.json + * (falls back to ~/.config/codebase-memory-mcp/config.json) + * Project: {repo_root}/.codebase-memory.json + * + * Project config wins over global. Unknown language values warn and are + * skipped (fail-open). Missing files are silently ignored. + */ +#include "discover/userconfig.h" +#include "foundation/log.h" + +#include + +#include +#include +#include +#include + +/* ── Process-global user config pointer ──────────────────────────── */ + +static const cbm_userconfig_t *g_userconfig = NULL; + +void cbm_set_user_lang_config(const cbm_userconfig_t *cfg) { + g_userconfig = cfg; +} + +const cbm_userconfig_t *cbm_get_user_lang_config(void) { + return g_userconfig; +} + +/* ── Language name → enum table ──────────────────────────────────── */ + +/* + * Reverse-mapping from lowercase language name strings to CBMLanguage. + * Covers all names exposed by cbm_language_name() plus common aliases. + */ +typedef struct { + const char *name; /* lowercase */ + CBMLanguage lang; +} lang_name_entry_t; + +static const lang_name_entry_t LANG_NAME_TABLE[] = { + {"go", CBM_LANG_GO}, + {"python", CBM_LANG_PYTHON}, + {"javascript", CBM_LANG_JAVASCRIPT}, + {"typescript", CBM_LANG_TYPESCRIPT}, + {"tsx", CBM_LANG_TSX}, + {"rust", CBM_LANG_RUST}, + {"java", CBM_LANG_JAVA}, + {"c++", CBM_LANG_CPP}, + {"cpp", CBM_LANG_CPP}, + {"c#", CBM_LANG_CSHARP}, + {"csharp", CBM_LANG_CSHARP}, + {"php", CBM_LANG_PHP}, + {"lua", CBM_LANG_LUA}, + {"scala", CBM_LANG_SCALA}, + {"kotlin", CBM_LANG_KOTLIN}, + {"ruby", CBM_LANG_RUBY}, + {"c", CBM_LANG_C}, + {"bash", CBM_LANG_BASH}, + {"sh", CBM_LANG_BASH}, + {"zig", CBM_LANG_ZIG}, + {"elixir", CBM_LANG_ELIXIR}, + {"haskell", CBM_LANG_HASKELL}, + {"ocaml", CBM_LANG_OCAML}, + {"objective-c", CBM_LANG_OBJC}, + {"objc", CBM_LANG_OBJC}, + {"swift", CBM_LANG_SWIFT}, + {"dart", CBM_LANG_DART}, + {"perl", CBM_LANG_PERL}, + {"groovy", CBM_LANG_GROOVY}, + {"erlang", CBM_LANG_ERLANG}, + {"r", CBM_LANG_R}, + {"html", CBM_LANG_HTML}, + {"css", CBM_LANG_CSS}, + {"scss", CBM_LANG_SCSS}, + {"yaml", CBM_LANG_YAML}, + {"toml", CBM_LANG_TOML}, + {"hcl", CBM_LANG_HCL}, + {"terraform", CBM_LANG_HCL}, + {"sql", CBM_LANG_SQL}, + {"dockerfile", CBM_LANG_DOCKERFILE}, + {"clojure", CBM_LANG_CLOJURE}, + {"f#", CBM_LANG_FSHARP}, + {"fsharp", CBM_LANG_FSHARP}, + {"julia", CBM_LANG_JULIA}, + {"vimscript", CBM_LANG_VIMSCRIPT}, + {"nix", CBM_LANG_NIX}, + {"common lisp", CBM_LANG_COMMONLISP}, + {"commonlisp", CBM_LANG_COMMONLISP}, + {"lisp", CBM_LANG_COMMONLISP}, + {"elm", CBM_LANG_ELM}, + {"fortran", CBM_LANG_FORTRAN}, + {"cuda", CBM_LANG_CUDA}, + {"cobol", CBM_LANG_COBOL}, + {"verilog", CBM_LANG_VERILOG}, + {"emacs lisp", CBM_LANG_EMACSLISP}, + {"emacslisp", CBM_LANG_EMACSLISP}, + {"json", CBM_LANG_JSON}, + {"xml", CBM_LANG_XML}, + {"markdown", CBM_LANG_MARKDOWN}, + {"makefile", CBM_LANG_MAKEFILE}, + {"cmake", CBM_LANG_CMAKE}, + {"protobuf", CBM_LANG_PROTOBUF}, + {"graphql", CBM_LANG_GRAPHQL}, + {"vue", CBM_LANG_VUE}, + {"svelte", CBM_LANG_SVELTE}, + {"meson", CBM_LANG_MESON}, + {"glsl", CBM_LANG_GLSL}, + {"ini", CBM_LANG_INI}, + {"matlab", CBM_LANG_MATLAB}, + {"lean", CBM_LANG_LEAN}, + {"form", CBM_LANG_FORM}, + {"magma", CBM_LANG_MAGMA}, + {"wolfram", CBM_LANG_WOLFRAM}, +}; + +#define LANG_NAME_TABLE_SIZE (sizeof(LANG_NAME_TABLE) / sizeof(LANG_NAME_TABLE[0])) + +/* + * Parse a language string (case-insensitive) to a CBMLanguage enum. + * Returns CBM_LANG_COUNT if the string is not recognized. + */ +static CBMLanguage lang_from_string(const char *s) { + if (!s || !s[0]) { + return CBM_LANG_COUNT; + } + + /* Build a lowercase copy for comparison */ + char lower[64]; + size_t i; + for (i = 0; i < sizeof(lower) - 1 && s[i]; i++) { + lower[i] = (char)tolower((unsigned char)s[i]); + } + lower[i] = '\0'; + + for (size_t j = 0; j < LANG_NAME_TABLE_SIZE; j++) { + if (strcmp(LANG_NAME_TABLE[j].name, lower) == 0) { + return LANG_NAME_TABLE[j].lang; + } + } + return CBM_LANG_COUNT; +} + +/* ── Config directory helper ─────────────────────────────────────── */ + +/* + * Get the XDG config dir for codebase-memory-mcp. + * Writes "/codebase-memory-mcp" into buf (up to bufsz bytes). + * Uses $XDG_CONFIG_HOME if set, else ~/.config. + */ +static void cbm_app_config_dir(char *buf, size_t bufsz) { + // NOLINT(concurrency-mt-unsafe) — called before worker threads + const char *xdg = getenv("XDG_CONFIG_HOME"); + if (xdg && xdg[0]) { + snprintf(buf, bufsz, "%s/codebase-memory-mcp", xdg); + } else { + const char *home = getenv("HOME"); // NOLINT(concurrency-mt-unsafe) + if (!home || !home[0]) { + home = "/tmp"; + } + snprintf(buf, bufsz, "%s/.config/codebase-memory-mcp", home); + } +} + +/* ── JSON parsing ────────────────────────────────────────────────── */ + +/* + * Parse extra_extensions from a yyjson object root. + * Appends valid entries to *entries / *count (growing via realloc). + * Project-level entries (from_project=true) are appended after global + * entries so that a later dedup pass can prefer project values. + * + * Returns 0 on success, -1 on alloc failure. + */ +static int parse_extra_extensions(yyjson_val *root, cbm_userext_t **entries, int *count, + const char *source_label) { + if (!yyjson_is_obj(root)) { + cbm_log_warn("userconfig.bad_root", "file", source_label); + return 0; + } + + yyjson_val *extra = yyjson_obj_get(root, "extra_extensions"); + if (!extra) { + return 0; /* key absent — fine */ + } + if (!yyjson_is_obj(extra)) { + cbm_log_warn("userconfig.bad_extra_extensions", "file", source_label); + return 0; + } + + yyjson_obj_iter iter; + yyjson_obj_iter_init(extra, &iter); + yyjson_val *key; + while ((key = yyjson_obj_iter_next(&iter)) != NULL) { + yyjson_val *val = yyjson_obj_iter_get_val(key); + + const char *ext_str = yyjson_get_str(key); + const char *lang_str = yyjson_get_str(val); + + if (!ext_str || !lang_str) { + cbm_log_warn("userconfig.skip_non_string", "file", source_label); + continue; + } + + /* Extension must start with '.' */ + if (ext_str[0] != '.') { + cbm_log_warn("userconfig.skip_bad_ext", "file", source_label, "ext", ext_str); + continue; + } + + CBMLanguage lang = lang_from_string(lang_str); + if (lang == CBM_LANG_COUNT) { + cbm_log_warn("userconfig.unknown_lang", "file", source_label, "lang", lang_str); + continue; /* fail-open: skip unknown languages */ + } + + /* Grow the array */ + cbm_userext_t *tmp = + realloc(*entries, (size_t)(*count + 1) * sizeof(cbm_userext_t)); + if (!tmp) { + return -1; + } + *entries = tmp; + + char *ext_copy = strdup(ext_str); + if (!ext_copy) { + return -1; + } + + (*entries)[*count].ext = ext_copy; + (*entries)[*count].lang = lang; + (*count)++; + } + return 0; +} + +/* + * Read a JSON file and parse extra_extensions from it. + * Silently ignores missing files. Logs warnings for corrupt JSON. + * Returns 0 on success (or absent file), -1 on alloc failure. + */ +static int load_config_file(const char *path, cbm_userext_t **entries, int *count) { + FILE *f = fopen(path, "rb"); + if (!f) { + return 0; /* file absent — silently ignore */ + } + + fseek(f, 0, SEEK_END); + long len = ftell(f); + fseek(f, 0, SEEK_SET); + + if (len <= 0 || len > 65536) { + fclose(f); + if (len > 65536) { + cbm_log_warn("userconfig.file_too_large", "path", path); + } + return 0; + } + + char *buf = malloc((size_t)len + 1); + if (!buf) { + fclose(f); + return -1; + } + + size_t nread = fread(buf, 1, (size_t)len, f); + fclose(f); + buf[nread] = '\0'; + + yyjson_doc *doc = yyjson_read(buf, nread, 0); + free(buf); + + if (!doc) { + cbm_log_warn("userconfig.corrupt_json", "path", path); + return 0; /* corrupt JSON — silently ignore (fail-open) */ + } + + yyjson_val *root = yyjson_doc_get_root(doc); + int rc = parse_extra_extensions(root, entries, count, path); + yyjson_doc_free(doc); + return rc; +} + +/* ── Public API ──────────────────────────────────────────────────── */ + +cbm_userconfig_t *cbm_userconfig_load(const char *repo_path) { + cbm_userconfig_t *cfg = calloc(1, sizeof(cbm_userconfig_t)); + if (!cfg) { + return NULL; + } + + cbm_userext_t *entries = NULL; + int count = 0; + + /* ── Step 1: Load global config ── */ + char global_dir[1024]; + cbm_app_config_dir(global_dir, sizeof(global_dir)); + + char global_path[1280]; + snprintf(global_path, sizeof(global_path), "%s/config.json", global_dir); + + if (load_config_file(global_path, &entries, &count) != 0) { + for (int i = 0; i < count; i++) { + free(entries[i].ext); + } + free(entries); + free(cfg); + return NULL; + } + + int global_count = count; /* entries[0..global_count) are from global */ + + /* ── Step 2: Load project config ── */ + if (repo_path && repo_path[0]) { + char project_path[1280]; + snprintf(project_path, sizeof(project_path), "%s/.codebase-memory.json", repo_path); + + if (load_config_file(project_path, &entries, &count) != 0) { + /* Free already-allocated entries */ + for (int i = 0; i < count; i++) { + free(entries[i].ext); + } + free(entries); + free(cfg); + return NULL; + } + } + + /* + * ── Step 3: Dedup — project entries win over global ── + * + * For any extension that appears in both global (indices 0..global_count) + * and project (indices global_count..count), remove the global entry by + * replacing it with the last global entry (order-insensitive dedup). + */ + for (int p = global_count; p < count; p++) { + for (int g = 0; g < global_count; g++) { + if (entries[g].ext && strcmp(entries[g].ext, entries[p].ext) == 0) { + /* Remove global entry: overwrite with last global entry */ + free(entries[g].ext); + entries[g] = entries[global_count - 1]; + entries[global_count - 1].ext = NULL; /* mark as consumed */ + global_count--; + g--; /* re-check this index */ + break; + } + } + } + + /* + * Compact: remove any NULL-ext slots left by the dedup step. + * (Those are the consumed "last global" entries.) + */ + int write_idx = 0; + for (int i = 0; i < count; i++) { + if (entries[i].ext != NULL) { + entries[write_idx++] = entries[i]; + } + } + count = write_idx; + + cfg->entries = entries; + cfg->count = count; + return cfg; +} + +CBMLanguage cbm_userconfig_lookup(const cbm_userconfig_t *cfg, const char *ext) { + if (!cfg || !ext || !ext[0]) { + return CBM_LANG_COUNT; + } + for (int i = 0; i < cfg->count; i++) { + if (cfg->entries[i].ext && strcmp(cfg->entries[i].ext, ext) == 0) { + return cfg->entries[i].lang; + } + } + return CBM_LANG_COUNT; +} + +void cbm_userconfig_free(cbm_userconfig_t *cfg) { + if (!cfg) { + return; + } + for (int i = 0; i < cfg->count; i++) { + free(cfg->entries[i].ext); + } + free(cfg->entries); + free(cfg); +} diff --git a/src/discover/userconfig.h b/src/discover/userconfig.h new file mode 100644 index 0000000..c45777f --- /dev/null +++ b/src/discover/userconfig.h @@ -0,0 +1,72 @@ +/* + * userconfig.h — User-defined file extension → language mappings. + * + * Reads extra_extensions from two optional JSON config files: + * Global: $XDG_CONFIG_HOME/codebase-memory-mcp/config.json + * (falls back to ~/.config/codebase-memory-mcp/config.json) + * Project: {repo_root}/.codebase-memory.json + * + * Project config wins over global. Unknown language values warn and are + * skipped (fail-open). Missing files are silently ignored. + * + * Format: + * {"extra_extensions": {".blade.php": "php", ".mjs": "javascript"}} + * + * The language string matching is case-insensitive. + */ +#ifndef CBM_USERCONFIG_H +#define CBM_USERCONFIG_H + +#include "cbm.h" /* CBMLanguage */ + +/* ── Types ──────────────────────────────────────────────────────── */ + +typedef struct { + char *ext; /* file extension including dot, e.g. ".blade.php" */ + CBMLanguage lang; /* resolved language enum */ +} cbm_userext_t; + +typedef struct { + cbm_userext_t *entries; /* heap-allocated array */ + int count; /* number of entries */ +} cbm_userconfig_t; + +/* ── API ────────────────────────────────────────────────────────── */ + +/* + * Load user config from global + project files, merge (project wins). + * repo_path: absolute path to the repository root (for project config). + * Returns a heap-allocated cbm_userconfig_t (caller must free via + * cbm_userconfig_free). Returns NULL only on allocation failure. + * Missing config files are silently ignored. + */ +cbm_userconfig_t *cbm_userconfig_load(const char *repo_path); + +/* + * Look up a file extension in the user config. + * ext: extension including dot, e.g. ".blade.php" + * Returns the mapped CBMLanguage, or CBM_LANG_COUNT if not found. + */ +CBMLanguage cbm_userconfig_lookup(const cbm_userconfig_t *cfg, const char *ext); + +/* Free a cbm_userconfig_t returned by cbm_userconfig_load. NULL-safe. */ +void cbm_userconfig_free(cbm_userconfig_t *cfg); + +/* ── Integration hook ───────────────────────────────────────────── */ + +/* + * Set the process-global user config that cbm_language_for_extension() + * will consult before the built-in table. + * cfg may be NULL to clear the override. + * Not thread-safe — call before spawning worker threads. + */ +void cbm_set_user_lang_config(const cbm_userconfig_t *cfg); + +/* + * Get the currently active process-global user config. + * Returns NULL if none has been set. + * Called internally by cbm_language_for_extension(). + */ +const cbm_userconfig_t *cbm_get_user_lang_config(void); + +#endif /* CBM_USERCONFIG_H */ diff --git a/src/pipeline/pipeline.c b/src/pipeline/pipeline.c index f5b7510..b553171 100644 --- a/src/pipeline/pipeline.c +++ b/src/pipeline/pipeline.c @@ -17,6 +17,7 @@ #include "graph_buffer/graph_buffer.h" #include "store/store.h" #include "discover/discover.h" +#include "discover/userconfig.h" #include "foundation/platform.h" #include "foundation/compat_fs.h" #include "foundation/log.h" @@ -44,6 +45,9 @@ struct cbm_pipeline { /* Indexing state (set during run) */ cbm_gbuf_t *gbuf; cbm_registry_t *registry; + + /* User-defined extension overrides (loaded once per run) */ + cbm_userconfig_t *userconfig; }; /* ── Timing helper ──────────────────────────────────────────────── */ @@ -97,6 +101,12 @@ void cbm_pipeline_free(cbm_pipeline_t *p) { free(p->db_path); free(p->project_name); /* gbuf, store, registry freed during/after run */ + /* Defensively free userconfig in case run() was never called or panicked */ + if (p->userconfig) { + cbm_set_user_lang_config(NULL); + cbm_userconfig_free(p->userconfig); + p->userconfig = NULL; + } free(p); } @@ -303,6 +313,10 @@ int cbm_pipeline_run(cbm_pipeline_t *p) { struct timespec t0; cbm_clock_gettime(CLOCK_MONOTONIC, &t0); + /* Load user-defined extension overrides (fail-open: NULL on error) */ + p->userconfig = cbm_userconfig_load(p->repo_path); + cbm_set_user_lang_config(p->userconfig); + /* Phase 1: Discover files */ cbm_discover_opts_t opts = { .mode = p->mode, @@ -314,6 +328,10 @@ int cbm_pipeline_run(cbm_pipeline_t *p) { int rc = cbm_discover(p->repo_path, &opts, &files, &file_count); if (rc != 0) { cbm_log_error("pipeline.err", "phase", "discover", "rc", itoa_buf(rc)); + cbm_discover_free(files, file_count); + cbm_set_user_lang_config(NULL); + cbm_userconfig_free(p->userconfig); + p->userconfig = NULL; return -1; } cbm_log_info("pipeline.discover", "files", itoa_buf(file_count), "elapsed_ms", @@ -321,6 +339,9 @@ int cbm_pipeline_run(cbm_pipeline_t *p) { if (check_cancel(p)) { cbm_discover_free(files, file_count); + cbm_set_user_lang_config(NULL); + cbm_userconfig_free(p->userconfig); + p->userconfig = NULL; return -1; } @@ -759,5 +780,9 @@ int cbm_pipeline_run(cbm_pipeline_t *p) { p->gbuf = NULL; cbm_registry_free(p->registry); p->registry = NULL; + /* Clear and free user extension config */ + cbm_set_user_lang_config(NULL); + cbm_userconfig_free(p->userconfig); + p->userconfig = NULL; return rc; } diff --git a/tests/test_main.c b/tests/test_main.c index 0fec7f4..070a170 100644 --- a/tests/test_main.c +++ b/tests/test_main.c @@ -26,6 +26,7 @@ extern void suite_store_search(void); extern void suite_cypher(void); extern void suite_mcp(void); extern void suite_language(void); +extern void suite_userconfig(void); extern void suite_gitignore(void); extern void suite_discover(void); extern void suite_graph_buffer(void); @@ -80,6 +81,7 @@ int main(void) { /* Discover (M2) */ RUN_SUITE(language); + RUN_SUITE(userconfig); RUN_SUITE(gitignore); RUN_SUITE(discover); diff --git a/tests/test_userconfig.c b/tests/test_userconfig.c new file mode 100644 index 0000000..efaf61d --- /dev/null +++ b/tests/test_userconfig.c @@ -0,0 +1,218 @@ +/* + * test_userconfig.c — Tests for user-defined extension→language mappings. + * + * Tests cbm_userconfig_load(), cbm_userconfig_lookup(), and the + * cbm_set_user_lang_config() / cbm_language_for_extension() integration. + */ +#include "../src/foundation/compat.h" +#include "../src/foundation/compat_fs.h" +#include "test_framework.h" +#include "discover/discover.h" +#include "discover/userconfig.h" + +#include +#include +#include + +/* ── Helpers ─────────────────────────────────────────────────────── */ + +/* Write a JSON file to path. Returns 0 on success. */ +static int write_json(const char *path, const char *json) { + FILE *f = fopen(path, "w"); + if (!f) { + return -1; + } + fputs(json, f); + fclose(f); + return 0; +} + +/* ── Tests: project config ───────────────────────────────────────── */ + +TEST(userconfig_project_basic) { + /* Write a .codebase-memory.json in a temp dir */ + char dir[256]; + snprintf(dir, sizeof(dir), "%s/uctest_proj_basic", cbm_tmpdir()); + cbm_mkdir_p(dir, 0755); /* from compat_fs.h via compat.h */ + + char proj[512]; + snprintf(proj, sizeof(proj), "%s/.codebase-memory.json", dir); + ASSERT_EQ( + write_json(proj, "{\"extra_extensions\":{\".blade.php\":\"php\",\".mjs\":\"javascript\"}}"), + 0); + + cbm_userconfig_t *cfg = cbm_userconfig_load(dir); + ASSERT_NOT_NULL(cfg); + + ASSERT_EQ(cbm_userconfig_lookup(cfg, ".blade.php"), CBM_LANG_PHP); + ASSERT_EQ(cbm_userconfig_lookup(cfg, ".mjs"), CBM_LANG_JAVASCRIPT); + ASSERT_EQ(cbm_userconfig_lookup(cfg, ".go"), CBM_LANG_COUNT); /* not in user config */ + + cbm_userconfig_free(cfg); + remove(proj); + PASS(); +} + +/* ── Tests: global config ────────────────────────────────────────── */ + +TEST(userconfig_global_via_env) { + /* Point XDG_CONFIG_HOME to a temp dir */ + char xdg_dir[256]; + snprintf(xdg_dir, sizeof(xdg_dir), "%s/uctest_global_xdg", cbm_tmpdir()); + + char app_dir[512]; + snprintf(app_dir, sizeof(app_dir), "%s/codebase-memory-mcp", xdg_dir); + cbm_mkdir_p(app_dir, 0755); + + char global_path[768]; + snprintf(global_path, sizeof(global_path), "%s/config.json", app_dir); + ASSERT_EQ( + write_json(global_path, "{\"extra_extensions\":{\".twig\":\"html\"}}"), + 0); + + /* Set env var, load, restore */ + setenv("XDG_CONFIG_HOME", xdg_dir, 1); + cbm_userconfig_t *cfg = cbm_userconfig_load(NULL); /* no project dir */ + unsetenv("XDG_CONFIG_HOME"); + + ASSERT_NOT_NULL(cfg); + ASSERT_EQ(cbm_userconfig_lookup(cfg, ".twig"), CBM_LANG_HTML); + + cbm_userconfig_free(cfg); + remove(global_path); + PASS(); +} + +/* ── Tests: project wins over global ────────────────────────────── */ + +TEST(userconfig_project_wins_over_global) { + /* Global says .xyz → python; project says .xyz → rust */ + char xdg_dir[256]; + snprintf(xdg_dir, sizeof(xdg_dir), "%s/uctest_priority_xdg", cbm_tmpdir()); + + char app_dir[512]; + snprintf(app_dir, sizeof(app_dir), "%s/codebase-memory-mcp", xdg_dir); + cbm_mkdir_p(app_dir, 0755); + + char global_path[768]; + snprintf(global_path, sizeof(global_path), "%s/config.json", app_dir); + ASSERT_EQ( + write_json(global_path, "{\"extra_extensions\":{\".xyz\":\"python\"}}"), + 0); + + char proj_dir[256]; + snprintf(proj_dir, sizeof(proj_dir), "%s/uctest_priority_proj", cbm_tmpdir()); + cbm_mkdir_p(proj_dir, 0755); + + char proj_path[512]; + snprintf(proj_path, sizeof(proj_path), "%s/.codebase-memory.json", proj_dir); + ASSERT_EQ( + write_json(proj_path, "{\"extra_extensions\":{\".xyz\":\"rust\"}}"), + 0); + + setenv("XDG_CONFIG_HOME", xdg_dir, 1); + cbm_userconfig_t *cfg = cbm_userconfig_load(proj_dir); + unsetenv("XDG_CONFIG_HOME"); + + ASSERT_NOT_NULL(cfg); + /* Project definition (rust) must win */ + ASSERT_EQ(cbm_userconfig_lookup(cfg, ".xyz"), CBM_LANG_RUST); + + cbm_userconfig_free(cfg); + remove(global_path); + remove(proj_path); + PASS(); +} + +/* ── Tests: unknown language values are skipped ──────────────────── */ + +TEST(userconfig_unknown_lang_skipped) { + char dir[256]; + snprintf(dir, sizeof(dir), "%s/uctest_unknown_lang", cbm_tmpdir()); + cbm_mkdir_p(dir, 0755); + + char proj[512]; + snprintf(proj, sizeof(proj), "%s/.codebase-memory.json", dir); + /* "klingon" is not a valid language; ".wasm" should be silently skipped */ + ASSERT_EQ( + write_json(proj, + "{\"extra_extensions\":{\".wasm\":\"klingon\",\".mjs\":\"javascript\"}}"), + 0); + + cbm_userconfig_t *cfg = cbm_userconfig_load(dir); + ASSERT_NOT_NULL(cfg); + + /* .wasm with unknown lang → not in config */ + ASSERT_EQ(cbm_userconfig_lookup(cfg, ".wasm"), CBM_LANG_COUNT); + /* .mjs with valid lang → present */ + ASSERT_EQ(cbm_userconfig_lookup(cfg, ".mjs"), CBM_LANG_JAVASCRIPT); + + cbm_userconfig_free(cfg); + remove(proj); + PASS(); +} + +/* ── Tests: missing files are silently ignored ───────────────────── */ + +TEST(userconfig_missing_files_ok) { + /* Point to a non-existent repo dir */ + cbm_userconfig_t *cfg = cbm_userconfig_load("/tmp/__nonexistent_repo_12345__"); + ASSERT_NOT_NULL(cfg); /* must not return NULL — just empty */ + ASSERT_EQ(cfg->count, 0); + cbm_userconfig_free(cfg); + PASS(); +} + +/* ── Tests: integration with cbm_language_for_extension ─────────── */ + +TEST(userconfig_integration_override) { + /* Verify that setting the global config makes cbm_language_for_extension + * respect the override. We map ".blade.php" → PHP, which is not in the + * built-in table. */ + char dir[256]; + snprintf(dir, sizeof(dir), "%s/uctest_integ", cbm_tmpdir()); + cbm_mkdir_p(dir, 0755); + + char proj[512]; + snprintf(proj, sizeof(proj), "%s/.codebase-memory.json", dir); + ASSERT_EQ( + write_json(proj, "{\"extra_extensions\":{\".blade.php\":\"php\"}}"), + 0); + + cbm_userconfig_t *cfg = cbm_userconfig_load(dir); + ASSERT_NOT_NULL(cfg); + + /* Before setting, .blade.php is unknown */ + ASSERT_EQ(cbm_language_for_extension(".blade.php"), CBM_LANG_COUNT); + + cbm_set_user_lang_config(cfg); + /* After setting, .blade.php → PHP */ + ASSERT_EQ(cbm_language_for_extension(".blade.php"), CBM_LANG_PHP); + /* Built-in extensions still work */ + ASSERT_EQ(cbm_language_for_extension(".go"), CBM_LANG_GO); + + /* Clean up global state */ + cbm_set_user_lang_config(NULL); + cbm_userconfig_free(cfg); + remove(proj); + PASS(); +} + +/* ── Tests: free is NULL-safe ────────────────────────────────────── */ + +TEST(userconfig_free_null) { + cbm_userconfig_free(NULL); /* must not crash */ + PASS(); +} + +/* ── Suite ──────────────────────────────────────────────────────── */ + +SUITE(userconfig) { + RUN_TEST(userconfig_project_basic); + RUN_TEST(userconfig_global_via_env); + RUN_TEST(userconfig_project_wins_over_global); + RUN_TEST(userconfig_unknown_lang_skipped); + RUN_TEST(userconfig_missing_files_ok); + RUN_TEST(userconfig_integration_override); + RUN_TEST(userconfig_free_null); +}