CoBrALab · gdevenyi · Jun 11, 2026 · Jun 11, 2026 · coderabbitai · Jun 11, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -12,7 +12,7 @@ for neuroimaging data. The design follows a pipeline pattern: functions accept a
 TSV string and return a processed TSV string.
 
 **Key characteristics:**
-- 807-line Bash library, functional/pipeline style
+- ~810-line Bash library, functional/pipeline style
 - AWK-based data processing for TSV filtering and column operations
 - Extensible custom entity support via JSON configurations
 - Zero-dependency core (`jq` optional, for JSON features and custom entities)
@@ -28,17 +28,17 @@ or run it directly.
 Directory tree → filename parsing → TSV table → filtering / extraction / iteration
                      ↓                   ↓
               glob patterns         AWK processing
-              (31 entities)        (column/row ops)
+              (35 entities)        (column/row ops)
 ```
 
 ### Core Parsing Flow
 
-1. **Pattern matching**: Bash extended-glob patterns match 31 standard BIDS
+1. **Pattern matching**: Bash extended-glob patterns match 35 standard BIDS
    entities (`sub`, `ses`, `task`, `run`, ...) plus suffixes and extensions.
 2. **Filename parsing**: regex-based entity extraction into associative arrays.
 3. **JSON sidecar matching**: exact filename matching only (no inheritance
    resolution).
-4. **Output**: TSV table with columns `derivatives`, `data_type`, one column per
+4. **Output**: TSV table with columns `derivatives`, `datatype`, one column per
    BIDS entity, `suffix`, `extension`, `path`.
 
 ### Pipeline (typical order)
@@ -52,29 +52,38 @@ Directory tree → filename parsing → TSV table → filtering / extraction / i
 
 ## Column Naming Convention (CRITICAL)
 
-Table columns use **FULL BIDS entity display names**, not the short keys used in
-filenames:
+BIDS nomenclature (per `schema.json`, `objects.entities`) distinguishes three things
+per entity — get these right:
 
-| filename key | column name      |
-|--------------|------------------|
-| `sub`        | `subject`        |
-| `ses`        | `session`        |
-| `acq`        | `acquisition`    |
-| `rec`        | `reconstruction` |
-| `dir`        | `direction`      |
-| `task`       | `task` (same)    |
-| `run`        | `run` (same)     |
+| concept              | schema source     | example   | used as            |
+|----------------------|-------------------|-----------|--------------------|
+| entity **key**       | `.name`           | `sub`     | filename token     |
+| entity **name**      | object key        | `subject` | **table column**   |
+| entity display name  | `.display_name`   | `Subject` | not used here      |
+
+Table columns use the entity **name** (the long form), not the entity **key** (the
+short token used in filenames):
+
+| entity key | column name (entity name) |
+|------------|---------------------------|
+| `sub`      | `subject`                 |
+| `ses`      | `session`                 |
+| `acq`      | `acquisition`             |
+| `rec`      | `reconstruction`          |
+| `dir`      | `direction`               |
+| `task`     | `task` (same)             |
+| `run`      | `run` (same)              |
 
 When passing column names to `--columns`, `--row-filter`, `--drop-na`, sort keys,
-or `libBIDSsh_table_column_to_array`, use the **column name** (e.g. `subject`).
-Passing a short key like `sub` will silently fail to match (it is neither a known
+or `libBIDSsh_table_column_to_array`, use the entity **name** (e.g. `subject`).
+Passing an entity key like `sub` will silently fail to match (it is neither a known
 column name nor a numeric index). Numeric column indices are also accepted.
 
 ### Core table structure
 
 Each row is one file. Columns:
 - `derivatives` — pipeline name if under a `derivatives/` folder, else `NA`
-- `data_type` — BIDS data type (`anat`, `func`, `dwi`, ...)
+- `datatype` — BIDS datatype (`anat`, `func`, `dwi`, ...)
 - BIDS entities — `subject`, `session`, `task`, `acquisition`, `run`, ...
 - `suffix` — file suffix (`bold`, `T1w`, `dwi`, ...)
 - `extension` — file extension
@@ -218,11 +227,11 @@ matching rows instead of keeping them.
 # Enable extended globbing
 shopt -s extglob nullglob globstar
 
-# Build BIDS entity patterns (31 standard entities, defined inline in the parser)
+# Build BIDS entity patterns (35 standard entities, defined inline in the parser)
 local entities=(
   "*(_sub-+([a-zA-Z0-9]))"
-  "*(_ses-+([a-zA-Z0-9]))"
-  # ... 29 more entities
+  "*(_tpl-+([a-zA-Z0-9]))"
+  # ... 33 more entities
 )
 
 # Find files
@@ -267,7 +276,7 @@ fi
 
 ### libBIDS.sh
 
-**Purpose**: Main library containing all functionality (807 lines).
+**Purpose**: Main library containing all functionality (~810 lines).
 
 **Approximate section map** (verify with `grep -n '^libBIDSsh_\|^_libBIDSsh_' libBIDS.sh`):
 - Version check + strict mode — top of file
@@ -301,8 +310,8 @@ to activate (the parser loads every `custom/*.json`).
 {
   "entities": [
     {
-      "name": "bp",
-      "display_name": "bodypart",
+      "key": "bp",
+      "name": "bodypart",
       "pattern": "*(_bp-+([a-zA-Z0-9]))"
     }
   ]
@@ -401,8 +410,8 @@ libBIDSsh_json_to_associative_array "file.json" metadata
 ### Adding custom entities
 
 1. Copy `custom/custom_entities.json.tpl` to `custom/custom_entities.json`.
-2. Define each entity's `name`, `display_name` (the column header), and `pattern`
-   (Bash extended-glob).
+2. Define each entity's `key` (short filename token), `name` (the column header),
+   and `pattern` (Bash extended-glob).
 3. Source the library and call `libBIDSsh_parse_bids_to_table`; custom entities are
    appended after the standard ones.
 
@@ -432,7 +441,7 @@ libBIDSsh_json_to_associative_array "file.json" metadata
 1. **Understand BIDS** — see the [BIDS specification](https://bids-specification.readthedocs.io/).
 2. **Read README.md** — usage overview and full API reference.
 3. **Read libBIDS.sh** — inline docstrings document every function and its args.
-4. **Mind the column-naming convention** — full display names, not short keys.
+4. **Mind the column-naming convention** — entity names (e.g. `subject`), not entity keys (e.g. `sub`).
 5. **Test manually** — run against `bids-examples/` datasets to verify changes.
 6. **Check custom entities** — review `custom/custom_entities.json.tpl`.
 </content>

diff --git a/README.md b/README.md
@@ -52,16 +52,19 @@ table_data=$(libBIDSsh_parse_bids_to_table "bids-examples/ds001")
 
 **Output columns:**
 
-The TSV columns use the full BIDS entity names (display names), not the short keys found in filenames.
+The TSV columns use the full BIDS entity **names** (e.g. `subject`), not the entity
+**keys** (the short tokens like `sub` found in filenames). In BIDS schema terms the
+key is the entity's `.name` field (`sub`) and the column header is the entity object
+name (`subject`).
 
 - `derivatives`: Pipeline name if in derivatives folder
-- `data_type`: BIDS data type (anat, func, dwi, etc.)
-- BIDS entities: `subject` (not sub), `session` (not ses), `task`, `acquisition` (not acq), `run`, etc.
+- `datatype`: BIDS datatype (anat, func, dwi, etc.)
+- BIDS entities: `subject` (key `sub`), `session` (key `ses`), `task`, `acquisition` (key `acq`), `run`, etc.
 - `suffix`: File suffix (bold, T1w, dwi, etc.)
 - `extension`: File extension
 - `path`: Full file path
 
-**Note:** When filtering or accessing columns, always use these full names (e.g., `subject`, `session`, `acquisition`).
+**Note:** When filtering or accessing columns, always use these entity names (e.g., `subject`, `session`, `acquisition`).
 
 ## Filtering and Subsetting
 
@@ -233,10 +236,10 @@ _libBIDSsh_parse_filename "sub-01_task-rest_bold.nii.gz" file_info
 
 **Populated fields:**
 
-- Individual BIDS entities using short keys (`sub`, `ses`, `task`, `acq`, etc.)
+- Individual BIDS entities using entity keys (`sub`, `ses`, `task`, `acq`, etc.)
 - `suffix`: File suffix
 - `extension`: File extension
-- `data_type`: Inferred data type
+- `datatype`: Inferred datatype
 - `derivatives`: Pipeline name if applicable
 - `path`: Full path
 - `_key_order`: Order of keys for iteration
@@ -276,7 +279,7 @@ table_data=$(libBIDSsh_parse_bids_to_table "$bids_path")
 
 # Filter for functional BOLD data
 func_table=$(libBIDSsh_table_filter "$table_data" \
-  -r "data_type:func" \
+  -r "datatype:func" \
   -r "suffix:bold")
 
 # Add JSON paths
@@ -300,19 +303,19 @@ done
 
 ### Adding non-BIDS entities 
 
-If your dataset uses an entity that is not part of the official BIDS specification, you can include them in the parsing logic via JSON file(s) in the `custom` directory:
+If your dataset uses an entity that is not part of the official BIDS specification, you can include them in the parsing logic via JSON file(s) in the `custom` directory. Each entity object uses `key` (the short filename token), `name` (the long column header), and `pattern` (a bash extended-glob):
 
 ```json
 {
   "entities": [
     {
-      "name": "foo",
-      "display_name": "fooval",
+      "key": "foo",
+      "name": "fooval",
       "pattern": "*(_foo-+([a-zA-Z0-9]))"
     },
     {
-      "name": "bar",
-      "display_name": "baridx",
+      "key": "bar",
+      "name": "baridx",
       "pattern": "*(_bar-+([0-9]))"
     }
     ...

diff --git a/custom/custom_entities.json.tpl b/custom/custom_entities.json.tpl
@@ -1,9 +1,9 @@
 {
   "entities": [
     {
-      "name": "bp",
-      "display_name": "bodypart",
+      "key": "bp",
+      "name": "bodypart",
       "pattern": "*(_bp-+([a-zA-Z0-9]))"
     }
   ]
-}
+}
diff --git a/generate_entity_patterns.sh b/generate_entity_patterns.sh
@@ -1,25 +1,57 @@
 #!/usr/bin/env bash
 
+# Regenerate every schema-derived block used by libBIDS.sh from schema.json.
+# schema.json is the authoritative BIDS specification source; treat it as the
+# single source of truth and paste the blocks below into libBIDS.sh verbatim.
+#
+# BIDS nomenclature (see schema objects.entities):
+#   entity key  = schema .name        (e.g. "sub")  -> filename token
+#   entity name = schema object key   (e.g. "subject") -> table column header
+# rules.entities provides the canonical filename ordering of entities.
+
 set -euo pipefail
 
-entities_order=($(jq -r .rules.entities.[] schema.json))
-entities_names_ordered=()
+# Entities, in canonical BIDS filename order (rules.entities).
+mapfile -t entity_names < <(jq -r '.rules.entities[]' schema.json)
+entity_keys=()
 
-for entity in ${entities_order[@]}; do
-  entity_name=$(jq -r .objects.entities.${entity}.name schema.json)
-  entities_names_ordered+=(${entity_name})
-  entity_format=$(jq -r .objects.entities.${entity}.format schema.json)
+echo "### entities=( ) glob patterns ###"
+for entity in "${entity_names[@]}"; do
+  entity_key=$(jq -r ".objects.entities.${entity}.name" schema.json)
+  entity_keys+=("${entity_key}")
+  entity_format=$(jq -r ".objects.entities.${entity}.format" schema.json)
   if [[ ${entity_format} == "label" ]]; then
-    echo \""*(_${entity_name}-+([a-zA-Z0-9]))"\"
+    echo "    \"*(_${entity_key}-+([a-zA-Z0-9]))\""
   elif [[ ${entity_format} == "index" ]]; then
-    echo \""*(_${entity_name}-+([0-9]))"\"
+    echo "    \"*(_${entity_key}-+([0-9]))\""
   else
     echo "Unrecognized entity_format ${entity_format}" 1>&2
     exit 1
   fi
 done
 
-printf "%s," ${entities_order[@]}
 echo
-printf "%s " ${entities_names_ordered[@]}
+echo "### entities_order (entity keys, space separated) ###"
+printf "%s " "${entity_keys[@]}"
+echo
+
+echo
+echo "### entities_name_order (entity names / column headers, tab separated) ###"
+printf "%s" "${entity_names[0]}"
+printf "\\\\t%s" "${entity_names[@]:1}"
+echo
+
+echo
+echo "### suffixes alternation ###"
+printf '_@(%s)\n' "$(jq -r '.objects.suffixes[].value' schema.json | paste -sd'|')"
+
+echo
+echo "### extensions alternation (drops '.*' and '/', strips trailing '/') ###"
+printf '@(%s)\n' "$(jq -r '.objects.extensions[].value' schema.json \
+  | grep -vxF -e '.*' -e '/' -e '' \
+  | sed 's:/$::' \
+  | paste -sd'|')"
+
 echo
+echo "### datatype regex (for _libBIDSsh_parse_filename) ###"
+printf '(%s)\n' "$(jq -r '.objects.datatypes | keys[]' schema.json | paste -sd'|')"