Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
8528f9c
core package requirements
tmsincomb Apr 2, 2024
29bb910
dummy fastpi service
tmsincomb Apr 2, 2024
ccb189f
mysql reformat to black
tmsincomb Apr 2, 2024
ed54046
native pyproject
tmsincomb Apr 3, 2024
80e5af5
syncing
May 7, 2024
11c9264
move fastapi to router.py
May 7, 2024
76b8072
flask dep
May 9, 2024
d09bbff
changes for server
May 21, 2024
2987252
pre-commit pass
tmsincomb May 21, 2024
d2df004
pydantic not needed
Jun 25, 2024
0d3b2c3
synced router with master
Jun 25, 2024
b01bae3
-X theirs with pre-commit ignore update to ingest.py
tmsincomb Apr 1, 2025
a6c144d
pre-commit try
tmsincomb Apr 28, 2025
dc0776f
accepted all incoming changes for hangnail merge
tmsincomb Apr 28, 2025
81487b7
added fastapi postgresql folder
tmsincomb Apr 28, 2025
542f773
remove ingest from blue ignore and found internal bug on why api was …
tmsincomb Apr 28, 2025
db38a8e
init for test_db
tmsincomb May 12, 2025
0e126e5
attempt at clone top 10 rows, failing
tmsincomb May 12, 2025
f2444fb
test postgres beta pass
tmsincomb Jul 2, 2025
afe15f4
test postgres alpha pass
tmsincomb Jul 7, 2025
8baa87c
f006 tests
tmsincomb Jul 7, 2025
f71e28a
f006 refactor
tmsincomb Jul 8, 2025
c5fc839
patch for root models
tmsincomb Jul 14, 2025
02b12e7
patch for leaf models
tmsincomb Jul 14, 2025
e956512
full tests passing
tmsincomb Jul 14, 2025
bb98b01
used local f006 data, ingested to test db and converted to CSVs
tmsincomb Jul 14, 2025
8497beb
fibers ingestion
tmsincomb Jul 18, 2025
c6564e3
tables.sql add site instance type
tgbugs Jul 24, 2025
0a2385d
tables.sql add record-index address type
tgbugs Jul 24, 2025
7828c87
tables.sql object_is_not_dataset distinguish object does not exist
tgbugs Jul 24, 2025
be62ba0
tables.sql values_{cat,quant} add unique constraint
tgbugs Jul 24, 2025
9fcf4db
tables.sql values_quant varchar -> text
tgbugs Jul 24, 2025
4dbd67b
api docs update support matrix for limit and count
tgbugs Jul 25, 2025
779cf2c
api update internal queries to always use capital AS
tgbugs Jul 25, 2025
c04bc75
api first pass at implementing limit and count
tgbugs Jul 25, 2025
d2517dd
api main_query remove left outer joins
tgbugs Jul 25, 2025
e57ab5d
api add desc/* endpoints to default union-cat-quant true
tgbugs Jul 25, 2025
48ec51e
api.org update union-cat-quant default values docs
tgbugs Jul 25, 2025
ff47c1d
ingest major updates for f006
tgbugs Jul 25, 2025
5117575
add inserts.sql to hold values needed for ingestion flows
tgbugs Jul 25, 2025
9e1889f
queries update orgstrap block with helper functions
tgbugs Jul 25, 2025
9be79e6
ingest remove debug breakpoint
tgbugs Jul 27, 2025
e746381
tables.sql check values desc_inst against desc_{cat,quant} domain
tgbugs Jul 27, 2025
199b8f2
inserts.sql placeholder aspects, test.sql volume/cross-section hierarchy
tgbugs Jul 27, 2025
bf4c0f9
test.sql update reva ft anat index domain for consistency
tgbugs Jul 27, 2025
cf4b456
ingest update fasc fib with more fasc values, fix demo ingest
tgbugs Jul 27, 2025
986e548
ingest allow path_from_blob to work when no sparcur paths exist
tgbugs Jul 28, 2025
4df4dbb
ingest extract demo fix souce path
tgbugs Jul 28, 2025
c73a982
ingest demo reorder ops to ensure cache is populated
tgbugs Jul 28, 2025
196d4b0
sync with ingest-f006 upstream branch
tmsincomb Jul 28, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -173,4 +173,10 @@ cython_debug/
# PyPI configuration file
.pypirc


csv_cache/
ingest_2025*
f006_csv_debug*
*.md
README.pdf
.DS_STORE
14 changes: 14 additions & 0 deletions README.org
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,17 @@ A database schema for storing arbitrary quantitative measurements.

See the [[file:./docs/api.org][REST API documentation]] for details on
the external query interface.

* Installation
add the passwords to ~/.pgpass as
```
host:port:*:quantdb-admin:password
host:port:*:quantdb-user:password
host:port:*:quantdb-test-admin:password
host:port:*:quantdb-test-user:password
```

* Notes
macOS will need the config.yaml in `Library/Application Support/quantdb/config.yaml`

`brew install emacs`
48 changes: 24 additions & 24 deletions bin/dbsetup
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
set -e
SOURCE="${BASH_SOURCE[0]}"
while [ -h "$SOURCE" ]; do # resolve all symlinks
DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
SOURCE="$(readlink "$SOURCE")"
[[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # resolve relative symlinks
DIR="$(cd -P "$(dirname "$SOURCE")" && pwd)"
SOURCE="$(readlink "$SOURCE")"
[[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # resolve relative symlinks
done
ABS_PATH="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
ABS_PATH="$(cd -P "$(dirname "$SOURCE")" && pwd)"

SQL="${ABS_PATH}/../sql"
RESOURCES="${ABS_PATH}/../resources"
Expand Down Expand Up @@ -37,33 +37,33 @@ TESTING=0

if test ${SETUP} -eq 0; then

# postgres setup (these need to fail loudly)
psql -U postgres -h ${HOST} -p $PORT -d postgres -f "${SQL}/postgres.sql" -v ON_ERROR_STOP=on -v test_database=${TEST_DATABASE} -v database=$DATABASE
# postgres setup (these need to fail loudly)
psql -U postgres -h ${HOST} -p $PORT -d postgres -f "${SQL}/postgres.sql" -v ON_ERROR_STOP=on -v test_database=${TEST_DATABASE} -v database=$DATABASE

if test ${PROD} -eq 0; then
psql -U postgres -h ${HOST} -p $PORT -d postgres -f "${SQL}/database.sql" -v ON_ERROR_STOP=on -v database=$DATABASE
else
psql -U postgres -h ${HOST} -p $PORT -d postgres -f "${SQL}/test_database.sql" -v ON_ERROR_STOP=on -v test_database=${TEST_DATABASE}
fi
if test ${PROD} -eq 0; then
psql -U postgres -h ${HOST} -p $PORT -d postgres -f "${SQL}/database.sql" -v ON_ERROR_STOP=on -v database=$DATABASE
else
psql -U postgres -h ${HOST} -p $PORT -d postgres -f "${SQL}/test_database.sql" -v ON_ERROR_STOP=on -v test_database=${TEST_DATABASE}
fi

#psql -U postgres -h ${HOST} -p $PORT -d $DATABASE -f "${SQL}/extensions.sql" -v ON_ERROR_STOP=on
#psql -U postgres -h ${HOST} -p $PORT -d $DATABASE -f "${SQL}/extensions.sql" -v ON_ERROR_STOP=on

# quantdb-admin setup
if test ${PROD} -eq 0; then
admin_user=quantdb-admin
qdb_user=quantdb-user
else
admin_user=quantdb-test-admin
qdb_user=quantdb-test-user
fi
# quantdb-admin setup
if test ${PROD} -eq 0; then
admin_user=quantdb-admin
qdb_user=quantdb-user
else
admin_user=quantdb-test-admin
qdb_user=quantdb-test-user
fi

psql -U ${admin_user} -h ${HOST} -p $PORT -d $DATABASE -f "${SQL}/schemas.sql" -v ON_ERROR_STOP=on
psql -U ${admin_user} -h ${HOST} -p $PORT -d $DATABASE -f "${SQL}/tables.sql" -v ON_ERROR_STOP=on
psql -U ${admin_user} -h ${HOST} -p $PORT -d $DATABASE -f "${SQL}/permissions.sql" -v ON_ERROR_STOP=on -v database=$DATABASE -v perm_user=${qdb_user}
psql -U ${admin_user} -h ${HOST} -p $PORT -d $DATABASE -f "${SQL}/schemas.sql" -v ON_ERROR_STOP=on
psql -U ${admin_user} -h ${HOST} -p $PORT -d $DATABASE -f "${SQL}/tables.sql" -v ON_ERROR_STOP=on
psql -U ${admin_user} -h ${HOST} -p $PORT -d $DATABASE -f "${SQL}/permissions.sql" -v ON_ERROR_STOP=on -v database=$DATABASE -v perm_user=${qdb_user}

fi

# testing
if test ${TESTING} -eq 0; then
psql -U quantdb-test-user -h ${HOST} -p $PORT -d $DATABASE -f "${SQL}/test.sql" -v ON_ERROR_STOP=on
psql -U quantdb-test-user -h ${HOST} -p $PORT -d $DATABASE -f "${SQL}/test.sql" -v ON_ERROR_STOP=on
fi
55 changes: 55 additions & 0 deletions bin/prepare_test_db.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/usr/bin/env bash
# quantdb-prepare-test-db.sh
# This script prepares a test PostgreSQL database with the same schema as the main DB and populates it with the first 10 rows of each table.
# Usage: ./bin/prepare_test_db.sh [MAIN_DB] [TEST_DB] [PGUSER] [PGHOST] [PGPORT]

set -e
eval "$(conda shell.bash hook)"
conda activate quantdb

# If you want to use the remote MAIN_DB connection info from client.py, extract it using Python
REMOTE_DB_URI=$(python -c "from quantdb.client import get_session; s = get_session(echo=False); print(str(s.get_bind().url))")
echo "Established Connection"

MAIN_DB=${1:-quantdb}
TEST_DB=${2:-quantdb_test}
PGUSER=${3:-quantdb-test-admin}
PGHOST=${4:-localhost}
PGPORT=${5:-5432}

# Parse the remote DB URI for pg_dump
REMOTE_PGUSER=$(echo "$REMOTE_DB_URI" | sed -E 's|.*://([^:]+):.*|\1|')
REMOTE_PGPASSWORD=$(python -c "from quantdb.config import auth; print(auth.get('db-password') or '')")
REMOTE_PGHOST=$(echo "$REMOTE_DB_URI" | sed -E 's|.*@([^:/]+).*|\1|')
REMOTE_PGPORT=$(echo "$REMOTE_DB_URI" | sed -E 's|.*:([0-9]+)/.*|\1|')
REMOTE_DBNAME=$(echo "$REMOTE_DB_URI" | sed -E 's|.*/([^?]+).*|\1|')

# Export password for pg_dump if available
if [ -n "$REMOTE_PGPASSWORD" ]; then
export PGPASSWORD="$REMOTE_PGPASSWORD"
fi

# Ensure the PGUSER has a database (needed for psql connection)
psql -U $PGUSER -h $PGHOST -p $PGPORT -d postgres -tc "SELECT 1 FROM pg_database WHERE datname = '$PGUSER'" | grep -q 1 || createdb -U $PGUSER -h $PGHOST -p $PGPORT $PGUSER

# Drop and recreate the test database
psql -U $PGUSER -h $PGHOST -p $PGPORT -c "DROP DATABASE IF EXISTS $TEST_DB;"
psql -U $PGUSER -h $PGHOST -p $PGPORT -c "CREATE DATABASE $TEST_DB;"

# Dump schema only from remote MAIN_DB and restore to local test DB
pg_dump -U $REMOTE_PGUSER -h $REMOTE_PGHOST -p $REMOTE_PGPORT -s $REMOTE_DBNAME | psql -U $PGUSER -h $PGHOST -p $PGPORT $TEST_DB

# Get all table names from the remote MAIN_DB
TABLES=$(psql -U $REMOTE_PGUSER -h $REMOTE_PGHOST -p $REMOTE_PGPORT -d $REMOTE_DBNAME -Atc "SELECT tablename FROM pg_tables WHERE schemaname='public';")

echo "Tables in remote DB: $TABLES"

# For each table, copy the first 10 rows from remote MAIN_DB to local test DB
for TABLE in $TABLES; do
COLS=$(psql -U $REMOTE_PGUSER -h $REMOTE_PGHOST -p $REMOTE_PGPORT -d $REMOTE_DBNAME -Atc "SELECT string_agg('"' || column_name || '"', ',') FROM information_schema.columns WHERE table_name='$TABLE' AND table_schema='public';")
psql -U $PGUSER -h $PGHOST -p $PGPORT -d $TEST_DB -c "INSERT INTO \"$TABLE\" ($COLS) SELECT $COLS FROM dblink('host=$REMOTE_PGHOST user=$REMOTE_PGUSER dbname=$REMOTE_DBNAME port=$REMOTE_PGPORT','SELECT $COLS FROM \"$TABLE\" LIMIT 10') AS t($COLS);" 2>/dev/null || true
done

unset PGPASSWORD

echo "Test database '$TEST_DB' prepared with schema and first 10 rows of each table from remote '$REMOTE_DBNAME'."
17 changes: 10 additions & 7 deletions docs/api.org
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ and =?object= argument, this means that the results set will
always be empty except in cases where a single object provided
both cat and quant values. You can pass =?union-cat-quant=true=
to change the behavior to =UNION=. The default behavior for
this may change per endpoint in which case we will document which
endpoints default to =UNION=.
this depends on the endpoint, see the =arg V endpoint= table
below to see which endpoints default to =UNION=.

All arguments that are provided must satisfy on at least one single
quantitative or categorical value, so e.g. ~?aspect=a&value-quant-min=1~
Expand Down Expand Up @@ -86,14 +86,16 @@ to limit the results of a query to the =terms= endpoint by passing
| value-quant-max | numeric | 1 |
|--------------------+-----------+-------|
| limit | integer | 1 |
| count | bool | 1 |
| union-cat-quant | bool | 1 |
| source-only | bool | 1 |
| include-unused | bool | 1 |
| prov | bool | 1 |

Invalid combinations are marked with an x.
Non-meaningful combinations are marked with a q.
Not-implemented are marked with ni.
Invalid combinations are marked with an =x=.
Non-meaningful combinations are marked with a =q=.
Not-implemented are marked with =ni=.
=true= by default are marked with =t=.

| arg V endpoint > | objects | values/inst | values/cat | values/quant | desc/inst | desc/cat | desc/quant | terms | units | aspects |
|--------------------+---------+-------------+------------+--------------+-----------+----------+------------+-------+-------+---------|
Expand Down Expand Up @@ -122,8 +124,9 @@ Not-implemented are marked with ni.
| value-quant-min | | | x | | | q | | q | | |
| value-quant-max | | | x | | | q | | q | | |
|--------------------+---------+-------------+------------+--------------+-----------+----------+------------+-------+-------+---------|
| limit | ni | ni | ni | ni | ni | ni | ni | ni | ni | ni |
| union-cat-quant | | | | | | | | | | |
| limit | | | | | | | | | | |
| count | ni | | | | ni | ni | ni | ni | ni | ni |
| union-cat-quant | t | t | | | t | t | t | | | |
| source-only | | x | x | x | x | x | x | x | x | x |
| include-unused | x | x | x | x | | | | | | |
| prov | x | | | | x | x | x | x | x | x |
Expand Down
87 changes: 87 additions & 0 deletions docs/f006_implementation_summary.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# F006 Implementation Summary

## Overview

The `f006.py` ingestion script has been updated to properly implement the QuantDB table population guide. It now creates all required tables in the correct order and uses `back_populate_tables` for leaf tables as recommended.

## Implementation Details

### 1. Root Tables (Created First)

All root tables are now properly created:

- **Addresses**: Created with types 'constant' and 'tabular-header'
- **Aspects**: Created with 'volume', 'length', and 'diameter' aspects
- **Units**: Created with 'mm3', 'mm', and 'um' units
- **ControlledTerms**: Created with 'microct' term
- **DescriptorsInst**: Created with 'human', 'nerve-volume', and 'nerve-cross-section' descriptors
- **Objects**: Created for both dataset and package objects

### 2. Intermediate Tables (Created After Root Tables)

All intermediate tables are properly populated:

- **DescriptorsCat**: Created for categorical descriptors (hasDataAboutItModality)
- **DescriptorsQuant**: Created for quantitative descriptors with proper dependencies:
- nerve-volume-mm3 (uses volume aspect and mm3 unit)
- nerve-cross-section-diameter-um (uses diameter aspect and um unit)
- **ValuesInst**: Created for subjects and samples with proper ID patterns
- **ObjDescInst**: Maps packages to instance descriptors
- **ObjDescCat**: Maps packages to categorical descriptors
- **ObjDescQuant**: Maps packages to quantitative descriptors

### 3. Leaf Tables (Created Last with back_populate_tables)

Both leaf tables are created using `back_populate_tables`:

- **ValuesCat**: Stores categorical values (modality = microct)
- **ValuesQuant**: Stores quantitative values (nerve volume measurements)

### Key Functions

1. **create_basic_descriptors()**: Creates all root and intermediate descriptor tables
2. **ingest_objects_table()**: Creates dataset and package objects
3. **ingest_instances_table()**: Creates subject and sample instances
4. **create_obj_desc_mappings()**: Creates ObjDesc* mapping tables
5. **create_leaf_values()**: Creates leaf tables using back_populate_tables

### Proper Use of back_populate_tables

The script correctly uses `back_populate_tables` for leaf tables:

```python
# For ValuesCat
values_cat = ValuesCat(...)
# Set all relationships
values_cat.controlled_terms = components["terms"]["microct"]
values_cat.descriptors_cat = components["modality_desc"]
values_cat.descriptors_inst = components["descriptors"]["nerve-volume"]
values_cat.values_inst = sample_instance
values_cat.obj_desc_cat = mapping
values_cat.obj_desc_inst = mapping
values_cat.objects = package

# Use back_populate_tables
result = back_populate_tables(session, values_cat)
```

### Population Order

The script follows the correct population order:

1. Root tables (Addresses, Aspects, Units, ControlledTerms, DescriptorsInst, Objects)
2. Intermediate tables (DescriptorsCat, DescriptorsQuant, ValuesInst, ObjDesc*)
3. Leaf tables with back_populate_tables (ValuesCat, ValuesQuant)

## Verification

The implementation has been verified by:

1. Static analysis of the code structure
2. Checking for presence of all required tables
3. Confirming use of back_populate_tables for leaf tables
4. Verifying correct population order

## Conclusion

The `f006.py` script now fully complies with the QuantDB table population guide, creating all necessary tables in the correct order and properly using `back_populate_tables` for complex relationship management in leaf tables.
48 changes: 47 additions & 1 deletion docs/queries.org
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# -*- orgstrap-cypher: sha256; orgstrap-norm-func-name: orgstrap-norm-func--dprp-1-0; orgstrap-block-checksum: 41c0973b8709171406d6ddef97a4070279c65a47610715ef044eda4953a8c072; -*-
# -*- orgstrap-cypher: sha256; orgstrap-norm-func-name: orgstrap-norm-func--dprp-1-0; orgstrap-block-checksum: ad0adb718dc286ea0f7ac5b6926eb433497ad7c7da7013081394ddf9bc1c9fc2; -*-
# [[orgstrap][jump to the orgstrap block for this file]]

#+name: orgstrap-shebang
Expand Down Expand Up @@ -57,14 +57,60 @@ select * from get_all_values_example() where agg_type in ('min', 'max') order by
* Bootstrap :noexport:
#+name: orgstrap
#+begin_src elisp :results none :exports none :lexical yes
(defun get-sql-insert-ocdn ()
(with-temp-buffer
(insert-file-contents-literally "../sql/inserts.sql")
(evil-ex-substitute (point-min) (point-max) '(";") " ON CONFLICT DO NOTHING;")
(buffer-string)))

(defun qdb-get-config (auth-var)
(with-temp-buffer
(let ((process-environment
(cons (format "PYTHONPATH=%s:${PYTHONPATH}" (expand-file-name ".."))
process-environment)))
(call-process
"python" nil (current-buffer) nil "-c"
(format "from quantdb.config import auth; print(auth.get('%s'), end='')" auth-var)))
(buffer-string)))

(defun org-babel-sql-expand-vars (body vars &optional sqlite)
"Expand the variables held in VARS in BODY.

If SQLITE has been provided, prevent passing a format to
`orgtbl-to-csv'. This prevents overriding the default format, which if
there were commas in the context of the table broke the table as an
argument mechanism."
(mapc
(lambda (pair)
(setq body
(replace-regexp-in-string
(format ":%s" (car pair)) ; XXX this is the issue change $ -> :
(let ((val (cdr pair)))
(if (listp val)
(let ((data-file (org-babel-temp-file "sql-data-")))
(with-temp-file data-file
(insert (orgtbl-to-csv
val (if sqlite
nil
'(:fmt (lambda (el) (if (stringp el)
el
(format "%S" el))))))))
data-file)
(if (stringp val) val (format "%S" val))))
body t t)))
vars)
body)

(defun fix-ocbe-queries () ; FIXME LOL yeah name collisions DO happen and YES this is why I implemented `defvar-local'
(setq-local
org-confirm-babel-evaluate
(lambda (lang body)
;;(message "%S" body)
(not
(or
(string-prefix-p "; elisp-safe-block" body)
(member lang '("sql")))))))

(fix-ocbe-queries)
#+end_src

Expand Down
Loading