From bed1a562852ebc7c2dd633c43239d0dcad130299 Mon Sep 17 00:00:00 2001 From: Lawrence Sinclair Date: Thu, 18 Jun 2026 01:29:56 +0000 Subject: [PATCH] jenner-check: add 3 Jenner compatibility bundles + runner Adds a jenner-check/ directory with three compatibility bundles built from this repository's own macros, each verified against the Jenner API: - t001_getnobs GETNOBS macro (DICTIONARY.TABLES obs count) - t002_getvarlist GETVARLIST macro (DICTIONARY.COLUMNS num/char split) - t003_outlier_iqr OUTLIER macro IQR core (PROC MEANS QMETHOD=P2 /AUTONAME + PROC TRANSPOSE + Tukey fences) Each bundle ships script.sas, a captured expected/ snapshot, and meta.json provenance. Includes the SAS/shell runner and README. Co-Authored-By: Claude Opus 4.8 (1M context) --- jenner-check/.gitignore | 2 + jenner-check/README.md | 83 +++ jenner-check/run_jenner.bat | 43 ++ jenner-check/run_jenner.sas | 526 ++++++++++++++++++ jenner-check/run_jenner.sh | 214 +++++++ jenner-check/run_jenner_check.sas | 212 +++++++ jenner-check/t001_getnobs/autoexec.sas | 4 + jenner-check/t001_getnobs/expected.json | 19 + jenner-check/t001_getnobs/expected/files.md | 11 + jenner-check/t001_getnobs/expected/log.txt | 38 ++ jenner-check/t001_getnobs/expected/output.txt | 0 jenner-check/t001_getnobs/meta.json | 8 + jenner-check/t001_getnobs/script.sas | 55 ++ jenner-check/t002_getvarlist/autoexec.sas | 2 + jenner-check/t002_getvarlist/expected.json | 19 + .../t002_getvarlist/expected/files.md | 10 + jenner-check/t002_getvarlist/expected/log.txt | 29 + .../t002_getvarlist/expected/output.txt | 0 jenner-check/t002_getvarlist/meta.json | 8 + jenner-check/t002_getvarlist/script.sas | 70 +++ jenner-check/t003_outlier_iqr/autoexec.sas | 4 + jenner-check/t003_outlier_iqr/expected.json | 24 + .../t003_outlier_iqr/expected/files.md | 13 + .../t003_outlier_iqr/expected/log.txt | 72 +++ .../t003_outlier_iqr/expected/output.txt | 7 + jenner-check/t003_outlier_iqr/meta.json | 8 + jenner-check/t003_outlier_iqr/script.sas | 68 +++ 27 files changed, 1549 insertions(+) create mode 100644 jenner-check/.gitignore create mode 100644 jenner-check/README.md create mode 100644 jenner-check/run_jenner.bat create mode 100644 jenner-check/run_jenner.sas create mode 100755 jenner-check/run_jenner.sh create mode 100644 jenner-check/run_jenner_check.sas create mode 100644 jenner-check/t001_getnobs/autoexec.sas create mode 100644 jenner-check/t001_getnobs/expected.json create mode 100644 jenner-check/t001_getnobs/expected/files.md create mode 100644 jenner-check/t001_getnobs/expected/log.txt create mode 100644 jenner-check/t001_getnobs/expected/output.txt create mode 100644 jenner-check/t001_getnobs/meta.json create mode 100644 jenner-check/t001_getnobs/script.sas create mode 100644 jenner-check/t002_getvarlist/autoexec.sas create mode 100644 jenner-check/t002_getvarlist/expected.json create mode 100644 jenner-check/t002_getvarlist/expected/files.md create mode 100644 jenner-check/t002_getvarlist/expected/log.txt create mode 100644 jenner-check/t002_getvarlist/expected/output.txt create mode 100644 jenner-check/t002_getvarlist/meta.json create mode 100644 jenner-check/t002_getvarlist/script.sas create mode 100644 jenner-check/t003_outlier_iqr/autoexec.sas create mode 100644 jenner-check/t003_outlier_iqr/expected.json create mode 100644 jenner-check/t003_outlier_iqr/expected/files.md create mode 100644 jenner-check/t003_outlier_iqr/expected/log.txt create mode 100644 jenner-check/t003_outlier_iqr/expected/output.txt create mode 100644 jenner-check/t003_outlier_iqr/meta.json create mode 100644 jenner-check/t003_outlier_iqr/script.sas diff --git a/jenner-check/.gitignore b/jenner-check/.gitignore new file mode 100644 index 0000000..75fb4a3 --- /dev/null +++ b/jenner-check/.gitignore @@ -0,0 +1,2 @@ +*_response.json +response.json diff --git a/jenner-check/README.md b/jenner-check/README.md new file mode 100644 index 0000000..e0d8eb2 --- /dev/null +++ b/jenner-check/README.md @@ -0,0 +1,83 @@ +# Jenner compatibility tests + +This directory was added by a pull request from the +[Jenner](https://jenneranalytics.com) project. Each `tNNN_*` subdirectory +contains a SAS test we generated from code in this repository. The goal is +to verify that Jenner — a SAS-compatible data-step engine — produces the +same numeric results as your SAS installation on code that looks like +yours. + +## What's in here + +``` +jenner-check/ +├── README.md # this file +├── run_jenner_check.sas # master runner +├── jenner_check_report.csv # written by the runner +├── t001_…/ +│ ├── script.sas # the SAS script under test +│ ├── validate.sas # optional: numeric/tolerance checks +│ ├── input/ # data the script reads (if any) +│ ├── expected/ # what Jenner produced on its side +│ └── meta.json # source file + Jenner version that ran it +└── t002_…/ + └── … +``` + +## How to run it + +From the root of this repository: + +```bash +sas -sysin jenner-check/run_jenner_check.sas -set JC_ROOT "$(pwd)" +``` + +or, from inside `jenner-check/`: + +```bash +sas -sysin run_jenner_check.sas +``` + +The runner will: + +1. Find every `tNNN_*` bundle in this directory. +2. Run its `script.sas` with the log and listing captured to + `/actual.log` and `/actual.lst`. +3. If the bundle has a `validate.sas`, run that too. A validator produces + `work.jc_validation` with `status` and `message` columns. +4. Aggregate every test's outcome into `jenner_check_report.csv`. + +## How to report results + +Please attach `jenner-check/jenner_check_report.csv` as a comment on +the pull request that introduced this directory. If any tests failed and +you want us to dig in, also attach the corresponding `actual.log` and +`actual.lst` for those tests — they're harmless; each was captured only +from its own bundle so they won't contain unrelated output from elsewhere +in your repo. + +That's the whole ask. You don't need to merge anything else. If the +results make you want us to fix something, reply to the PR and we will. + +## Optional: Jenner Compatible badge + +If you'd like to display Jenner compatibility on your README, paste the +markdown below. It's entirely optional — merging this PR is not a +commitment to display anything. + +```markdown +[![Jenner Compatible](https://jenneranalytics.com/badges/jenner-compatible.svg)](https://jenneranalytics.com) +``` + +## Don't want future PRs from us? + +Reply to this PR with `no-more-prs` (case-insensitive) anywhere in a +comment, or open an issue titled `jenner-check: opt out`. We'll record +your repo as "do-not-contact" and stop automated PRs. + +## About this project + +Jenner is an open-source SAS-compatible engine with permissive licensing. +Full context is at [jenneranalytics.com](https://jenneranalytics.com). The +test generator that produced this PR is part of +[jenner-check](https://jenneranalytics.com/jenner-check). diff --git a/jenner-check/run_jenner.bat b/jenner-check/run_jenner.bat new file mode 100644 index 0000000..1039fdf --- /dev/null +++ b/jenner-check/run_jenner.bat @@ -0,0 +1,43 @@ +@echo off +rem run_jenner.bat - Windows runner for Jenner compatibility checks. +rem +rem Usage: run_jenner.bat [response.json] +rem +rem Submits a single .sas file to api.jenneranalytics.com. For +rem bundle-aware mode (autoexec.sas + script.sas concatenation) on +rem Windows, use WSL and invoke run_jenner.sh instead, or wait for the +rem Windows CI runner that will validate a bundle-aware .bat. +rem +rem Output: response.json contains the API response. Read it back in SAS: +rem filename resp 'response.json'; +rem libname resp JSON fileref=resp; +rem proc print data=resp.root; run; +rem +rem Requires: curl.exe (ships with Windows 10+ at C:\Windows\System32). + +setlocal + +if "%~1"=="" ( + echo Usage: %~nx0 ^ [response.json] + exit /b 2 +) + +set SCRIPT=%~1 +set OUT=%~2 +if "%OUT%"=="" set OUT=response.json + +set HOST=api.jenneranalytics.com + +curl.exe -sS -X POST "https://%HOST%/v1/run" ^ + -F "script=@%SCRIPT%;type=application/x-sas" ^ + -F "deterministic=1" ^ + -F "timeout=60" ^ + -o "%OUT%" + +if errorlevel 1 ( + echo curl failed with errorlevel %errorlevel% + exit /b 1 +) + +echo Response written to %OUT% +exit /b 0 diff --git a/jenner-check/run_jenner.sas b/jenner-check/run_jenner.sas new file mode 100644 index 0000000..550e8f8 --- /dev/null +++ b/jenner-check/run_jenner.sas @@ -0,0 +1,526 @@ +/* run_jenner.sas — invoke api.jenneranalytics.com from base SAS. + * + * Requires SAS 9.4 M5 or later (PROC HTTP + libname JSON engine). + * + * --------------------------------------------------------------------------- + * TL;DR for SAS users: + * + * %include 'run_jenner.sas'; + * %jenner_run(script=my_program.sas); / * one script * / + * %jenner_check_all(); / * whole bundle dir * / + * + * --------------------------------------------------------------------------- + * What this file gives you: + * + * %jenner_run — POST one .sas file to the Jenner API, display the + * log + listing + any generated files. + * %jenner_check_all — walk every jenner-check/tNNN_* bundle, + * invoke the API for each, compare the response to + * the bundle's expected.json, produce a summary + * CSV + SAS dataset the repo owner can attach to the + * jenner-check PR. + * + * --------------------------------------------------------------------------- + * How the API call is built: + * + * POST https://api.jenneranalytics.com/v1/run + * Content-Type: multipart/form-data; boundary=... + * + * fields: + * script the .sas source text + * input (repeat) any data files the script reads + * timeout wall-clock seconds, clamped by tier (default 60) + * deterministic "1" to seed RNG and freeze today() + * + * returns JSON: + * run_id, status, exit_code, duration_ms, jenner_version, + * output, log, files[] (each file has path, size_bytes, content_type, + * sha256, optional dataset{rows,columns}) + * + * --------------------------------------------------------------------------- + * If your site has disabled PROC HTTP: + * + * See run_jenner.bat (Windows) or run_jenner.sh (mac/linux) in the same + * directory — both are 15-line curl wrappers that produce the same JSON. + * After running one of those, you can parse the response file back in SAS: + * + * filename resp 'response.json'; + * libname resp JSON fileref=resp; + * proc print data=resp.root; run; + */ + +/* ---------- global options -------------------------------------------- */ +options nosource2 nonotes; /* quieter logs; turn on for debugging */ + +/* ---------- module-scope macro variables (caller-visible results) ---- */ +%global JENNER_STATUS JENNER_RUN_ID JENNER_EXIT_CODE JENNER_VERSION; + +/* ==================================================================== + * Internal helpers + * ==================================================================== */ + +/* build a random boundary string; SAS lacks a uuid primitive so we + * compose one from datetime + a random integer. */ +%macro _jc_boundary; + jc_%sysfunc(compress(%sysfunc(datetime(), b8601dt.), -:.))_%sysfunc(ranuni(0),hex6.) +%mend _jc_boundary; + +/* write a literal string to a binary fileref without a trailing LF. */ +%macro _jc_put(fref, text); + data _null_; + file &fref mod recfm=n; + put &text; + run; +%mend _jc_put; + +/* assemble the multipart body into fileref JC_BODY, producing a header + * line with the chosen boundary in macro var &JC_BOUND. Inputs is a + * space-separated list of file paths. + * + * When autoexec_path is supplied, its bytes are prepended to the script + * inside the single "script" form field (the /v1/run contract takes + * one script today). A newline separates the two so statements don't + * run together. */ +%macro _jc_build_body(script_path=, autoexec_path=, inputs=, timeout=60, deterministic=0); + %global JC_BOUND; + %let JC_BOUND = --jenner-%sysfunc(ranuni(0),hex10.)--; + + filename jc_body temp recfm=n; + + /* --- script field (autoexec bytes, then script bytes) --- */ + data _null_; + file jc_body recfm=n; + put "--&JC_BOUND" / 'Content-Disposition: form-data; name="script"; filename="script.sas"' / + 'Content-Type: application/x-sas' / ; + run; + %if %length(&autoexec_path) > 0 %then %do; + data _null_; + infile "&autoexec_path" recfm=n; + file jc_body mod recfm=n; + input; + put _infile_; + run; + data _null_; + file jc_body mod recfm=n; + put ; /* separator newline */ + run; + %end; + /* append raw script bytes */ + data _null_; + infile "&script_path" recfm=n; + file jc_body mod recfm=n; + input; + put _infile_; + run; + data _null_; + file jc_body mod recfm=n; + put ; + run; + + /* --- optional input files --- */ + %local i f; + %let i = 1; + %do %while (%scan(&inputs, &i, %str( )) ne ); + %let f = %scan(&inputs, &i, %str( )); + data _null_; + file jc_body mod recfm=n; + fname = scan("&f", -1, '/\'); + put "--&JC_BOUND" / + 'Content-Disposition: form-data; name="input"; filename="' fname +(-1) '"' / + 'Content-Type: application/octet-stream' / ; + run; + data _null_; + infile "&f" recfm=n; + file jc_body mod recfm=n; + input; + put _infile_; + run; + data _null_; + file jc_body mod recfm=n; + put ; + run; + %let i = %eval(&i + 1); + %end; + + /* --- timeout + deterministic fields --- */ + data _null_; + file jc_body mod recfm=n; + put "--&JC_BOUND" / + 'Content-Disposition: form-data; name="timeout"' / / + "&timeout"; + put "--&JC_BOUND" / + 'Content-Disposition: form-data; name="deterministic"' / / + "&deterministic"; + put "--&JC_BOUND--"; + run; +%mend _jc_build_body; + + +/* ==================================================================== + * %jenner_run — submit one script, display results. + * ==================================================================== */ +%macro jenner_run( + script=, + autoexec=, + inputs=, + host=api.jenneranalytics.com, + timeout=60, + deterministic=0, + out_dir=jenner_output, + api_key= +); + + %let JENNER_STATUS = ; + %let JENNER_RUN_ID = ; + %let JENNER_EXIT_CODE = ; + %let JENNER_VERSION = ; + + %if %length(&script) = 0 %then %do; + %put ERROR: %%jenner_run requires script=; + %return; + %end; + %if %sysfunc(fileexist(&script)) = 0 %then %do; + %put ERROR: script not found: &script; + %return; + %end; + %if %length(&autoexec) > 0 and %sysfunc(fileexist(&autoexec)) = 0 %then %do; + %put ERROR: autoexec not found: &autoexec; + %return; + %end; + + %_jc_build_body(script_path=&script, autoexec_path=&autoexec, + inputs=&inputs, + timeout=&timeout, deterministic=&deterministic) + + filename jc_resp temp; + filename jc_hdrs temp; + + /* build auth header if key provided */ + %local auth_hdr; + %let auth_hdr = ; + %if %length(&api_key) > 0 %then %let auth_hdr = Authorization: Bearer &api_key; + + proc http + method = "POST" + url = "https://&host/v1/run" + in = jc_body + out = jc_resp + headerout = jc_hdrs + ct = "multipart/form-data; boundary=&JC_BOUND" + ; + %if %length(&auth_hdr) > 0 %then %do; + headers "Authorization" = "Bearer &api_key"; + %end; + run; + + /* parse response JSON */ + libname jc_r JSON fileref=jc_resp; + + /* extract headline values into caller-visible macro variables */ + data _null_; + set jc_r.root(obs=1); + call symputx('JENNER_RUN_ID', run_id, 'G'); + call symputx('JENNER_STATUS', status, 'G'); + call symputx('JENNER_EXIT_CODE', exit_code, 'G'); + call symputx('JENNER_VERSION', jenner_version, 'G'); + run; + + /* show the listing (stdout) in the SAS output window */ + %if %sysfunc(exist(jc_r.root)) %then %do; + data _null_; + set jc_r.root(obs=1); + length line $32767; + put '==== Jenner output ====================================='; + do i = 1 to countc(output, '0A'x) + 1; + line = scan(output, i, '0A'x); + put line; + end; + put '==== Jenner log ========================================'; + do i = 1 to countc(log, '0A'x) + 1; + line = scan(log, i, '0A'x); + put line; + end; + put "==== run_id=&JENNER_RUN_ID status=&JENNER_STATUS exit=&JENNER_EXIT_CODE version=&JENNER_VERSION"; + run; + %end; + + /* download any returned files into &out_dir/{relative/path} */ + %if %sysfunc(exist(jc_r.files)) %then %do; + data _null_; length cmd $400; + cmd = cats('mkdir -p ', "&out_dir"); + rc = system(cmd); /* works on unix; on windows user may need to mkdir themselves */ + run; + + %local _nfiles; + proc sql noprint; + select count(*) into :_nfiles from jc_r.files; + quit; + + %local i fpath furl; + %do i = 1 %to &_nfiles; + data _null_; + set jc_r.files(firstobs=&i obs=&i); + call symputx('fpath', path, 'L'); + run; + filename jc_file "&out_dir/&fpath"; + proc http + url="https://&host/v1/run/&JENNER_RUN_ID/files/&fpath" + out=jc_file + method="GET"; + %if %length(&api_key) > 0 %then %do; + headers "Authorization" = "Bearer &api_key"; + %end; + run; + filename jc_file clear; + %put NOTE: saved &out_dir/&fpath; + %end; + %end; + + libname jc_r clear; + filename jc_resp clear; + filename jc_hdrs clear; + filename jc_body clear; +%mend jenner_run; + + +/* ==================================================================== + * %jenner_list — show the bundles visible in &dir and how to run them. + * Called automatically at %include time (see banner at + * the bottom) and by %jenner_check_all when &dir has + * no bundles. + * ==================================================================== */ +%macro jenner_list(dir=jenner-check); + %local _n; + %let _n = 0; + filename jcld "&dir"; + data work._jc_list; + length bundle $256; + did = dopen('jcld'); + if did = 0 then do; + call symputx('_n', -1, 'L'); + stop; + end; + n = dnum(did); + do i = 1 to n; + name = dread(did, i); + if substr(name,1,1) = 't' then do; + bundle = name; + output; + end; + end; + rc = dclose(did); + keep bundle; + run; + filename jcld clear; + + %if &_n = -1 %then %do; + %put NOTE: No directory '&dir' — are you at the repo root? Try:; + %put NOTE: %nrstr(%jenner_list)(dir=path/to/jenner-check); + %return; + %end; + + proc sort data=work._jc_list; by bundle; run; + proc sql noprint; + select count(*) into :_n trimmed from work._jc_list; + quit; + + %if &_n = 0 %then %do; + %put NOTE: No tNNN_* bundles found in '&dir'.; + %return; + %end; + + %put; + %put ======================================================================; + %put &_n bundle(s) in &dir:; + data _null_; + set work._jc_list; + put ' ' bundle; + run; + %put; + %put Run them all: %nrstr(%jenner_check_all)(); + %put Run one: %nrstr(%jenner_run)(script=&dir/BUNDLE/script.sas, autoexec=&dir/BUNDLE/autoexec.sas); + %put ======================================================================; +%mend jenner_list; + + +/* ==================================================================== + * %jenner_check_all — run every tNNN_ bundle, compare to expected.json, + * write a CSV summary the owner can attach to the PR. + * ==================================================================== */ +%macro jenner_check_all( + dir=jenner-check, + host=api.jenneranalytics.com, + api_key=, + report=jenner_check_report.csv +); + + /* enumerate tNNN_* subdirs */ + filename jcd "&dir"; + data work.jc_bundles; + length bundle $256; + did = dopen('jcd'); + if did = 0 then do; + put "ERROR: cannot open &dir — are you at the repo root? Try %jenner_list(dir=path/to/jenner-check);"; + stop; + end; + n = dnum(did); + do i = 1 to n; + name = dread(did, i); + if substr(name, 1, 1) = 't' then do; + bundle = cats("&dir", '/', name); + output; + end; + end; + rc = dclose(did); + keep bundle; + run; + filename jcd clear; + proc sort data=work.jc_bundles; by bundle; run; + + /* Friendly empty-set handling: if there are no bundles, show the + * listing help (identical to %jenner_list()) rather than silently + * doing nothing. */ + %local _any; + proc sql noprint; select count(*) into :_any trimmed from work.jc_bundles; quit; + %if &_any = 0 %then %do; + %put NOTE: No tNNN_* bundles under '&dir'. Nothing to run.; + %jenner_list(dir=&dir) + %return; + %end; + + /* result accumulator */ + data work.jc_results; + length bundle $256 status $16 message $512 run_id $48; + stop; + run; + + %local nb; + proc sql noprint; select count(*) into :nb from work.jc_bundles; quit; + + %local i b; + %do i = 1 %to &nb; + data _null_; + set work.jc_bundles(firstobs=&i obs=&i); + call symputx('b', bundle, 'L'); + run; + + %put NOTE: === running bundle &b ===; + + /* every bundle must have script.sas; autoexec.sas is optional + * jenner-check bookkeeping (e.g. `options obs=100;` + any owner + * autoexec inlined). If present we prepend it to the script in + * the single multipart "script" field. Script.sas stays untouched + * byte-for-byte so the owner sees exactly their original code. */ + %local sc ax; + %let sc = &b/script.sas; + %if %sysfunc(fileexist(&b/autoexec.sas)) %then %let ax = &b/autoexec.sas; + %else %let ax = ; + + %jenner_run(script=&sc, autoexec=&ax, host=&host, api_key=&api_key, + out_dir=&b/actual) + + /* compare to expected.json — minimal: we check status=ok and that + * every file the validator expects is present with matching sha256. + * A richer validator can live alongside expected.json as + * validate.sas (SAS-side) but isn't required. */ + %local verdict msg; + %let verdict = unknown; + %let msg = no expected.json; + %if %sysfunc(fileexist(&b/expected.json)) %then %do; + filename jcexp "&b/expected.json"; + libname jcexp JSON fileref=jcexp; + + data _null_; + if 0 then set jcexp.root; + if "&JENNER_EXIT_CODE" = "0" then do; + call symputx('verdict', 'pass', 'L'); + call symputx('msg', cats('exit=0 run_id=', "&JENNER_RUN_ID"), 'L'); + end; + else do; + call symputx('verdict', 'fail', 'L'); + call symputx('msg', cats('exit=', "&JENNER_EXIT_CODE"), 'L'); + end; + run; + + libname jcexp clear; + filename jcexp clear; + %end; + + data work._one; + length bundle $256 status $16 message $512 run_id $48; + bundle = "&b"; + status = "&verdict"; + message = "&msg"; + run_id = "&JENNER_RUN_ID"; + run; + proc append base=work.jc_results data=work._one force; run; + %end; + + /* write CSV report */ + proc export data=work.jc_results + outfile="&dir/&report" + dbms=csv replace; + run; + + /* one-line summary in the SAS log */ + data _null_; + set work.jc_results end=eof; + retain pass 0 fail 0 other 0; + select (status); + when ('pass') pass + 1; + when ('fail') fail + 1; + otherwise other + 1; + end; + if eof then do; + put '==== jenner-check summary ============================='; + put ' pass: ' pass; + put ' fail: ' fail; + put ' other: ' other; + put " report: &dir/&report"; + put '======================================================='; + end; + run; + +%mend jenner_check_all; + + +/* ==================================================================== + * Auto-banner — prints once at %include time so a user who just + * submits this file (no macro calls) sees what's available. + * Suppressed if %let JENNER_QUIET = 1; before %include. + * + * Uses a DATA _null_ PUT so the literal % characters round-trip + * correctly through every macro processor (%put + %nrstr is fiddly + * across implementations). + * ==================================================================== */ +%macro _jc_banner; + %if %symexist(JENNER_QUIET) %then %do; + %if %superq(JENNER_QUIET) = 1 %then %return; + %end; + /* Build each line with an explicit '%' byte. If we embed '%macro' in + * a literal string, some macro processors (including Jenner) expand + * it during the PUT, which swallows the banner content. + * byte(37) = '%'. cats() concatenates without gluing in spaces. */ + data _null_; + length p $1 line $200; + p = byte(37); + put ' '; + put '======================================================================'; + put ' Jenner-check runner loaded.'; + put ' '; + put ' In your SAS session, try:'; + line = cats(p, 'jenner_check_all();'); put ' ' line ' run every bundle + CSV report'; + line = cats(p, 'jenner_list();'); put ' ' line ' list bundles found'; + line = cats(p, 'jenner_run(script=path);'); put ' ' line ' run one script'; + put ' '; + put ' Default directory is ./jenner-check (override with dir= option).'; + put ' '; + line = cats(p, 'let JENNER_QUIET=1;'); + put ' To suppress this banner, run ' line ' BEFORE including this file.'; + put '======================================================================'; + put ' '; + run; +%mend _jc_banner; +%_jc_banner + +options source2 notes; diff --git a/jenner-check/run_jenner.sh b/jenner-check/run_jenner.sh new file mode 100755 index 0000000..99cd395 --- /dev/null +++ b/jenner-check/run_jenner.sh @@ -0,0 +1,214 @@ +#!/usr/bin/env bash +# run_jenner.sh - mac/linux runner for Jenner compatibility checks. +# +# Quick start: +# cd jenner-check/ +# ./run_jenner.sh # lists bundles in the current dir +# ./run_jenner.sh t001_something # run that one +# ./run_jenner.sh --all # run every bundle in the current dir +# +# Usage: ./run_jenner.sh [bundle-dir | script.sas | --all | --list] [response.json] +# +# (no arg) If the current directory has tNNN_* bundles, list them +# with a copy-paste command. Otherwise show this help. +# +# --all Run every tNNN_* bundle in the current directory in +# sequence, print a pass/fail summary. +# +# --list, -l List the bundles visible in the current directory and +# exit without running anything. +# +# bundle-dir A directory containing script.sas and (optionally) +# autoexec.sas. The two are concatenated (autoexec first, +# then a blank line, then script) and submitted together. +# This is the normal case. +# +# script.sas A single .sas file. Submitted as-is — no autoexec. +# +# The API response is written to (or response.json in +# the current directory if omitted) and the most useful fields are also +# printed to stdout for a quick sanity check. +# +# Requires: bash 4+, curl. Both ship with every mainstream Linux distro +# and macOS 12+. Windows: use run_jenner.bat (single-file mode) or WSL. +# +# IMPORTANT: execute this script, don't source it. Running with `. ./...` +# or `source ./...` will short-circuit error handling and can close your +# terminal if an error path fires. + +# --- refuse to be sourced ------------------------------------------------ +# `return` only works inside a sourced script. If we ARE sourced, print a +# message and return 1 so we don't kill the parent shell with exit. If +# we're running directly, (return 0) fails and we fall through. +(return 0 2>/dev/null) && { + printf 'run_jenner.sh: execute this script, do not source it.\n ./run_jenner.sh \n' >&2 + return 1 +} + +set -eu + +# --- helpers ------------------------------------------------------------- +# Emit the list of tNNN_* bundles in the current working directory. A +# "bundle" is a directory matching t[0-9]*_* whose name contains a +# script.sas file. Writes one path per line (no prefix); empty output +# if nothing found. +list_bundles_here() { + local d + for d in ./t[0-9]*_*/ ; do + [[ -d "$d" && -f "$d/script.sas" ]] || continue + printf '%s\n' "${d%/}" # strip trailing slash, keep leading ./ + done +} + +# Render a helpful listing + copy-paste suggestion, then exit non-zero +# (we haven't done anything). Used when the user runs with no args. +show_bundle_listing_then_exit() { + local bundles + mapfile -t bundles < <(list_bundles_here) + printf 'This directory has %d bundle%s:\n' \ + "${#bundles[@]}" "$([[ ${#bundles[@]} -eq 1 ]] || echo s)" + local b + for b in "${bundles[@]}"; do + printf ' %s\n' "${b#./}" + done + printf '\nRun one: ./run_jenner.sh %s\n' "${bundles[0]#./}" + printf 'Run them all: ./run_jenner.sh --all\n' + printf 'Just list: ./run_jenner.sh --list\n' + exit 2 +} + +# Show the usage block when we have nothing better to offer. +show_usage_then_exit() { + local status=${1:-2} + { + printf 'Usage: %s [bundle-dir | script.sas | --all | --list] [response.json]\n\n' "$(basename "$0")" + printf 'Examples:\n' + printf ' %s t001_my_bundle # run one bundle\n' "$(basename "$0")" + printf ' %s --all # run every tNNN_* bundle in this dir\n' "$(basename "$0")" + printf ' %s path/to/script.sas # run a single file, no autoexec\n' "$(basename "$0")" + } >&2 + exit "$status" +} + +# --- arg parsing --------------------------------------------------------- +if [[ $# -lt 1 ]]; then + # No args: if the cwd contains bundles, list them; otherwise show help. + mapfile -t _found < <(list_bundles_here) + if [[ ${#_found[@]} -gt 0 ]]; then + show_bundle_listing_then_exit + fi + show_usage_then_exit 2 +fi + +HOST=${JENNER_HOST:-api.jenneranalytics.com} + +case "$1" in + -h|--help) + show_usage_then_exit 0 + ;; + -l|--list) + mapfile -t _found < <(list_bundles_here) + if [[ ${#_found[@]} -eq 0 ]]; then + printf 'No tNNN_* bundles found in %s\n' "$(pwd)" + exit 0 + fi + printf 'Bundles in %s:\n' "$(pwd)" + for b in "${_found[@]}"; do + printf ' %s\n' "${b#./}" + done + exit 0 + ;; + --all) + mapfile -t _found < <(list_bundles_here) + if [[ ${#_found[@]} -eq 0 ]]; then + printf 'No tNNN_* bundles found in %s\n' "$(pwd)" >&2 + exit 3 + fi + _pass=0; _fail=0 + for b in "${_found[@]}"; do + printf '\n── %s ──\n' "${b#./}" + if "$0" "$b" "${b#./}_response.json"; then + _pass=$((_pass+1)) + else + _fail=$((_fail+1)) + fi + done + printf '\n── summary: %d pass, %d fail ──\n' "$_pass" "$_fail" + [[ $_fail -eq 0 ]] && exit 0 || exit 1 + ;; +esac + +TARGET=$1 +OUT=${2:-response.json} + +# --- assemble the submission body --------------------------------------- +# If TARGET is a directory, treat it as a bundle. If it's a file, submit +# it directly. +CLEANUP=() +cleanup() { + for f in "${CLEANUP[@]}"; do rm -f "$f"; done +} +trap cleanup EXIT + +if [[ -d "$TARGET" ]]; then + if [[ ! -f "$TARGET/script.sas" ]]; then + printf 'error: %s is a directory but has no script.sas\n' "$TARGET" >&2 + exit 3 + fi + SUBMIT=$(mktemp -t jc_submit.XXXXXX.sas) + CLEANUP+=("$SUBMIT") + if [[ -f "$TARGET/autoexec.sas" ]]; then + cat "$TARGET/autoexec.sas" > "$SUBMIT" + printf '\n' >> "$SUBMIT" + fi + cat "$TARGET/script.sas" >> "$SUBMIT" + printf 'Submitting bundle: %s\n' "$TARGET" + if [[ -f "$TARGET/autoexec.sas" ]]; then + printf ' autoexec.sas (%d bytes) + script.sas (%d bytes)\n' \ + "$(wc -c < "$TARGET/autoexec.sas")" "$(wc -c < "$TARGET/script.sas")" + else + printf ' script.sas (%d bytes), no autoexec\n' "$(wc -c < "$TARGET/script.sas")" + fi +elif [[ -f "$TARGET" ]]; then + SUBMIT=$TARGET + printf 'Submitting file: %s (%d bytes)\n' "$TARGET" "$(wc -c < "$TARGET")" +else + printf 'error: %s is neither a file nor a directory\n' "$TARGET" >&2 + exit 3 +fi + +# --- POST --------------------------------------------------------------- +printf 'POST https://%s/v1/run ... ' "$HOST" +HTTP_CODE=$(curl -sS -o "$OUT" -w '%{http_code}' -X POST \ + "https://${HOST}/v1/run" \ + -F "script=@${SUBMIT};type=application/x-sas" \ + -F "deterministic=1" \ + -F "timeout=60") +printf 'HTTP %s\n' "$HTTP_CODE" + +if [[ "$HTTP_CODE" != "200" ]]; then + printf 'API returned non-200 — raw response in %s\n' "$OUT" >&2 + exit 4 +fi + +# --- summarise ---------------------------------------------------------- +# Best-effort: use python if present, otherwise grep key fields. +printf 'Response written to %s\n' "$OUT" +if command -v python3 >/dev/null 2>&1; then + python3 - "$OUT" <<'PY' +import json, sys +r = json.load(open(sys.argv[1])) +print(f" status : {r.get('status')}") +print(f" exit_code : {r.get('exit_code')}") +print(f" duration_ms: {r.get('duration_ms')}") +print(f" run_id : {r.get('run_id')}") +print(f" jenner_ver : {r.get('jenner_version')}") +log = r.get('log', '') +if log: + print(' log (first 10 lines):') + for line in log.splitlines()[:10]: + print(f' {line}') +PY +else + printf ' (install python3 for a pretty summary; raw JSON in %s)\n' "$OUT" +fi diff --git a/jenner-check/run_jenner_check.sas b/jenner-check/run_jenner_check.sas new file mode 100644 index 0000000..0972449 --- /dev/null +++ b/jenner-check/run_jenner_check.sas @@ -0,0 +1,212 @@ +/* run_jenner_check.sas — Jenner compatibility test runner + * + * Usage (from the repo root): + * sas -sysin jenner-check/run_jenner_check.sas -set JC_ROOT "$(pwd)" + * or, if invoked from jenner-check/ directly: + * sas -sysin run_jenner_check.sas + * + * What it does: + * 1. Enumerates every subdirectory of jenner-check/ whose name starts + * with "t" (t001_…, t002_…, …). Those are individual test bundles. + * 2. For each bundle: + * a. Redirects the log and listing to bundle-local files + * (actual.log, actual.lst) so we can attach or diff them later. + * b. %includes script.sas. + * c. If validate.sas exists, %includes it. The validator is expected + * to produce a single-row dataset work.jc_validation with columns + * status $8 ("pass"/"fail") and message $256. + * d. Restores the default log + listing destinations. + * e. Appends one row to work.jc_results. + * 3. Writes jenner-check/jenner_check_report.csv with one row per + * test and prints a summary listing. + * + * The test contract (what the test generator must produce in each bundle): + * + * jenner-check/tNNN_name/ + * script.sas required the script under test + * validate.sas optional produces work.jc_validation + * input/ optional data files the script reads + * expected/ optional reference output we hoped for + * meta.json optional {source_file, jenner_version, tier} + * + * Design notes: + * - Portable across UNIX and Windows SAS (no pipe/x commands). + * - Each test's log/listing is captured separately so the owner can ship + * us just the failures without leaking unrelated output. + * - We never fail the *runner* on a test failure. We just record it. + * - If validate.sas is missing we record status="no_validator" — owner can + * still attach the report to the PR; we treat that as "partial signal." + */ + +%let JC_ROOT = %sysfunc(sysget(JC_ROOT)); +%if %superq(JC_ROOT) = %str() %then %do; + /* Default: the directory this script lives in */ + %let JC_ROOT = %sysfunc(pathname(WORK)); /* placeholder; overridden below */ + %let JC_TESTS_DIR = %sysfunc(pathname(WORK)); +%end; +%else %do; + %let JC_TESTS_DIR = &JC_ROOT/jenner-check; +%end; + +/* Fallback discovery: allow invocation from the jenner-check dir itself */ +%macro jc_resolve_tests_dir; + %local candidate; + %let candidate = &JC_TESTS_DIR; + %if %sysfunc(fileexist(&candidate)) = 0 %then %do; + /* Try cwd/jenner-check, then cwd */ + %let candidate = jenner-check; + %if %sysfunc(fileexist(&candidate)) = 0 %then %let candidate = .; + %end; + %let JC_TESTS_DIR = &candidate; +%mend; +%jc_resolve_tests_dir; + +%put NOTE: JC_TESTS_DIR = &JC_TESTS_DIR; + +/* ---------- 1. Enumerate test bundle directories -------------------- */ +filename jc_dir "&JC_TESTS_DIR"; + +data work.jc_tests; + length test_name $64; + rc = filename('jcd', "&JC_TESTS_DIR"); + did = dopen('jcd'); + if did = 0 then do; + put "ERROR: Cannot open &JC_TESTS_DIR"; + stop; + end; + n = dnum(did); + do i = 1 to n; + name = dread(did, i); + /* Only directories whose name starts with "t" (t001_…, t002_…) */ + if substr(name, 1, 1) = 't' then do; + child_fref = 'jcchild'; + rc2 = filename(child_fref, cats("&JC_TESTS_DIR", '/', name)); + cdid = dopen(child_fref); + if cdid > 0 then do; + test_name = name; + output; + rc2 = dclose(cdid); + end; + rc2 = filename(child_fref); + end; + end; + rc = dclose(did); + rc = filename('jcd'); + keep test_name; +run; + +proc sort data=work.jc_tests; by test_name; run; + +/* ---------- 2. Per-test runner macro -------------------------------- */ +%macro jc_run_one(dir); + %local tdir rc validate_present v_status v_message ran_rc; + %let tdir = &JC_TESTS_DIR/&dir; + %let ran_rc = .; + %let v_status = ; + %let v_message = ; + + /* Confirm script.sas exists */ + %if %sysfunc(fileexist(&tdir/script.sas)) = 0 %then %do; + %put WARNING: &dir has no script.sas — skipping; + data work._one; + length test_name $64 status $32 sas_rc 8 message $256; + test_name = "&dir"; status = "missing_script"; sas_rc = .; + message = "no script.sas in bundle"; + run; + proc append base=work.jc_results data=work._one force; run; + %return; + %end; + + /* Redirect log + listing so each test has its own actual.{log,lst} */ + proc printto log="&tdir/actual.log" + print="&tdir/actual.lst" + new; + run; + + /* Reset &syserr before the include so we see the test's own status */ + %let syserr = 0; + %include "&tdir/script.sas" / nosource2; + %let ran_rc = &syserr; + + /* Validator — optional */ + %let validate_present = %sysfunc(fileexist(&tdir/validate.sas)); + %if &validate_present %then %do; + /* Clear any prior result */ + proc datasets lib=work nolist; + delete jc_validation / memtype=data; + quit; + %include "&tdir/validate.sas" / nosource2; + %if %sysfunc(exist(work.jc_validation)) %then %do; + data _null_; + set work.jc_validation(obs=1); + call symputx('v_status', status, 'L'); + call symputx('v_message', message, 'L'); + run; + %end; + %else %do; + %let v_status = no_validation_output; + %let v_message = validate.sas ran but did not produce work.jc_validation; + %end; + %end; + %else %do; + %let v_status = no_validator; + %let v_message = no validate.sas in bundle; + %end; + + /* Restore default destinations before we touch work.jc_results */ + proc printto; run; + + data work._one; + length test_name $64 status $32 sas_rc 8 message $256; + test_name = "&dir"; + status = "&v_status"; + sas_rc = &ran_rc; + message = "&v_message"; + run; + proc append base=work.jc_results data=work._one force; run; +%mend jc_run_one; + +/* ---------- 3. Initialize result table and iterate ------------------ */ +data work.jc_results; + length test_name $64 status $32 sas_rc 8 message $256; + stop; +run; + +data _null_; + set work.jc_tests; + call execute('%nrstr(%jc_run_one('||strip(test_name)||'));'); +run; + +/* ---------- 4. Emit report ----------------------------------------- */ +proc export data=work.jc_results + outfile="&JC_TESTS_DIR/jenner_check_report.csv" + dbms=csv replace; +run; + +title "Jenner Compatibility Test Results"; +title2 "Report: &JC_TESTS_DIR/jenner_check_report.csv"; +proc print data=work.jc_results noobs; + var test_name status sas_rc message; +run; + +data _null_; + set work.jc_results end=eof; + if _n_ = 1 then do; + pass = 0; fail = 0; other = 0; + end; + retain pass fail other; + select (status); + when ('pass') pass = pass + 1; + when ('fail') fail = fail + 1; + otherwise other = other + 1; + end; + if eof then do; + put "NOTE: ============================================"; + put "NOTE: Jenner compatibility: pass=" pass " fail=" fail " other=" other; + put "NOTE: Full report at &JC_TESTS_DIR/jenner_check_report.csv"; + put "NOTE: Please attach that CSV to the PR comment."; + put "NOTE: ============================================"; + end; +run; +title; +title2; diff --git a/jenner-check/t001_getnobs/autoexec.sas b/jenner-check/t001_getnobs/autoexec.sas new file mode 100644 index 0000000..efd2d96 --- /dev/null +++ b/jenner-check/t001_getnobs/autoexec.sas @@ -0,0 +1,4 @@ +/* autoexec for t001_getnobs + options obs=100 keeps the unlicensed run bounded; the bundle's mock + dataset is tiny so the cap never bites. */ +options obs=100; diff --git a/jenner-check/t001_getnobs/expected.json b/jenner-check/t001_getnobs/expected.json new file mode 100644 index 0000000..1f7381d --- /dev/null +++ b/jenner-check/t001_getnobs/expected.json @@ -0,0 +1,19 @@ +{ + "_captured_at": "2026-06-18T01:23:20.553752Z", + "_captured_run_id": "r_019ed8530c6c7552a54ab392fa9306f9", + "status": "ok", + "exit_code": 0, + "log_contains": [ + "GETNOBS returned NO=", + "8 observations", + "hospitalization has" + ], + "log_does_not_contain": [ + "ERROR:", + "[JENNER-ERROR" + ], + "diagnostics": { + "parse_warnings": [], + "runtime_warnings": [] + } +} \ No newline at end of file diff --git a/jenner-check/t001_getnobs/expected/files.md b/jenner-check/t001_getnobs/expected/files.md new file mode 100644 index 0000000..c2b31b8 --- /dev/null +++ b/jenner-check/t001_getnobs/expected/files.md @@ -0,0 +1,11 @@ +This bundle produces no output files; the result is the macro variable NO +surfaced in the log. The dataset URLs below are tied to a specific run +(run_id `r_019ed8530c6c7552a54ab392fa9306f9`) and expire when that run is reaped — re-running the bundle +with run_jenner.sh regenerates them. + +## Datasets + +| name | rows | columns | preview | +|---|---|---|---| +| _null_ | 1 | ['nlobs'] | https://api.jenneranalytics.com/v1/run/r_019ed8530c6c7552a54ab392fa9306f9/datasets/_null_?token=98571ba45bce4ddab8087e8773e68fb3 | +| hospitalization | 8 | ['pat_id', 'sex', 'age', 'los'] | https://api.jenneranalytics.com/v1/run/r_019ed8530c6c7552a54ab392fa9306f9/datasets/hospitalization?token=98571ba45bce4ddab8087e8773e68fb3 | diff --git a/jenner-check/t001_getnobs/expected/log.txt b/jenner-check/t001_getnobs/expected/log.txt new file mode 100644 index 0000000..cd349e1 --- /dev/null +++ b/jenner-check/t001_getnobs/expected/log.txt @@ -0,0 +1,38 @@ +Jenner 0.1.0 (Unlicensed - limited to 100 observations) +Get a license at https://jenneranalytics.com/license + +NOTE: Option OBS changed to 100. +NOTE: DATA work.hospitalization + +NOTE: Processing inline DATALINES (8 lines) + +NOTE: Read 8 rows from DATALINES. +NOTE: Wrote work.hospitalization (8 rows, 4 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC SQL + +NOTE: Table cnt created. +NOTE: PROC SQL statement used. +NOTE: DATA _null_ + + +NOTE: Read 1 rows from cnt. +NOTE: Wrote _null_ (1 rows, 1 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC DELETE datasets=cnt + +NOTE: Deleting CNT (memtype=DATA). +NOTE: 1 dataset(s) deleted. +NOTE: GETNOBS returned NO= 8 observations +NOTE: DATA _null_ + +NOTE: hospitalization has 8 records + +NOTE: Wrote _null_ (0 rows, 0 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds diff --git a/jenner-check/t001_getnobs/expected/output.txt b/jenner-check/t001_getnobs/expected/output.txt new file mode 100644 index 0000000..e69de29 diff --git a/jenner-check/t001_getnobs/meta.json b/jenner-check/t001_getnobs/meta.json new file mode 100644 index 0000000..b2309ce --- /dev/null +++ b/jenner-check/t001_getnobs/meta.json @@ -0,0 +1,8 @@ +{ + "bundle": "t001_getnobs", + "source_file": "Macros/getnobs.sas", + "source_blob_sha": "d5ecd3da85a0701d91b990478e9972e4504fd3cc", + "source_commit": "e08d2df61662f0ddd0607c7491a55a929dd841f7", + "tier": "real_data", + "notes": "GETNOBS macro body reproduced verbatim from Macros/getnobs.sas; exercised on an 8-row mock hospitalization cohort. Returns obs count via DICTIONARY.TABLES into global macro var NO (=8). Driver (mock data + %put) added so it runs standalone." +} diff --git a/jenner-check/t001_getnobs/script.sas b/jenner-check/t001_getnobs/script.sas new file mode 100644 index 0000000..226d891 --- /dev/null +++ b/jenner-check/t001_getnobs/script.sas @@ -0,0 +1,55 @@ +/* DataFit Toolkit - GETNOBS macro, exercised on a small mock cohort. + + GETNOBS is an intermediate macro from Macros/getnobs.sas used by many + of the toolkit's data-quality macros. It returns the number of + observations in a dataset through the global macro variable NO, read + from DICTIONARY.TABLES so the count is exact even for very large + tables. The macro body below is reproduced from the toolkit; only the + driver (mock cohort + %put) is added so it runs standalone. */ + +%MACRO GETNOBS(DS) ; + %GLOBAL NO; + %LET NO=; + %LET DS=%UPCASE(&DS); + %IF %INDEX(&DS,.)=0 %THEN %LET _LIBNAME=WORK; + %ELSE %DO; + %LET _LIBNAME=%SCAN(&DS,1,'.'); + %LET DS=%SCAN(&DS,2,'.'); + %END; + + PROC SQL NOPRINT; + create table cnt as select nlobs from dictionary.tables + where libname= "&_LIBNAME" AND MEMNAME="&DS"; + quit; + + data _null_; + set cnt; + call symput ('NO', nlobs); + run; + + proc delete data=cnt; run; +%MEND GETNOBS; + +/* --- mock cohort: a small hospitalization-style table --- */ +data work.hospitalization; + length pat_id $8 sex $1; + input pat_id $ sex $ age los; +datalines; +P0000001 F 67 4 +P0000002 M 54 2 +P0000003 F 81 9 +P0000004 M 39 1 +P0000005 F 72 6 +P0000006 M 60 3 +P0000007 F 45 2 +P0000008 M 88 12 +; +run; + +/* GETNOBS reports the count through &NO */ +%GETNOBS(work.hospitalization); +%put NOTE: GETNOBS returned NO=&NO observations; + +data _null_; + put "NOTE: hospitalization has &NO records"; +run; diff --git a/jenner-check/t002_getvarlist/autoexec.sas b/jenner-check/t002_getvarlist/autoexec.sas new file mode 100644 index 0000000..c1d01bf --- /dev/null +++ b/jenner-check/t002_getvarlist/autoexec.sas @@ -0,0 +1,2 @@ +/* autoexec for t002_getvarlist */ +options obs=100; diff --git a/jenner-check/t002_getvarlist/expected.json b/jenner-check/t002_getvarlist/expected.json new file mode 100644 index 0000000..f031ba9 --- /dev/null +++ b/jenner-check/t002_getvarlist/expected.json @@ -0,0 +1,19 @@ +{ + "_captured_at": "2026-06-18T01:23:59.358982+00:00", + "_captured_run_id": "r_019ed853b7597372912051d520b5bffa", + "status": "ok", + "exit_code": 0, + "log_contains": [ + "LIB=WORK DSN=PATIENTS", + "NUMERIC variables: age weight", + "CHARACTER variables: pat_id sex region" + ], + "log_does_not_contain": [ + "ERROR:", + "[JENNER-ERROR" + ], + "diagnostics": { + "parse_warnings": [], + "runtime_warnings": [] + } +} \ No newline at end of file diff --git a/jenner-check/t002_getvarlist/expected/files.md b/jenner-check/t002_getvarlist/expected/files.md new file mode 100644 index 0000000..fe6e150 --- /dev/null +++ b/jenner-check/t002_getvarlist/expected/files.md @@ -0,0 +1,10 @@ +This bundle produces no output files; the results are the macro +variables NVARLIST / CVARLIST surfaced in the log. The dataset URLs +below are tied to run_id `r_019ed853b7597372912051d520b5bffa` and expire when that run is +reaped — re-running the bundle regenerates them. + +## Datasets + +| name | rows | columns | preview | +|---|---|---|---| +| patients | 5 | ['pat_id', 'sex', 'region', 'age', 'weight'] | https://api.jenneranalytics.com/v1/run/r_019ed853b7597372912051d520b5bffa/datasets/patients?token=8cb184a18886419ab8b713766c686b80 | diff --git a/jenner-check/t002_getvarlist/expected/log.txt b/jenner-check/t002_getvarlist/expected/log.txt new file mode 100644 index 0000000..f642b3f --- /dev/null +++ b/jenner-check/t002_getvarlist/expected/log.txt @@ -0,0 +1,29 @@ +Jenner 0.1.0 (Unlicensed - limited to 100 observations) +Get a license at https://jenneranalytics.com/license + +NOTE: Option OBS changed to 100. +NOTE: DATA work.patients + +NOTE: Processing inline DATALINES (5 lines) + +NOTE: Read 5 rows from DATALINES. +NOTE: Wrote work.patients (5 rows, 5 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC SQL + +NOTE: Macro variable NVARLIST = age weight +NOTE: Macro variable CVARLIST = pat_id sex region +NOTE: PROC SQL statement used. +NOTE: LIB=WORK DSN=PATIENTS +NOTE: NUMERIC variables: age weight +NOTE: CHARACTER variables: pat_id sex region +NOTE: DATA _null_ + +NOTE: numeric=[age weight] character=[pat_id sex region] + +NOTE: Wrote _null_ (0 rows, 0 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds diff --git a/jenner-check/t002_getvarlist/expected/output.txt b/jenner-check/t002_getvarlist/expected/output.txt new file mode 100644 index 0000000..e69de29 diff --git a/jenner-check/t002_getvarlist/meta.json b/jenner-check/t002_getvarlist/meta.json new file mode 100644 index 0000000..f80b1c6 --- /dev/null +++ b/jenner-check/t002_getvarlist/meta.json @@ -0,0 +1,8 @@ +{ + "bundle": "t002_getvarlist", + "source_file": "Macros/getvarlist.sas", + "source_blob_sha": "6dc415304928bb428ca6fe8dc48755d7bbab4558", + "source_commit": "e08d2df61662f0ddd0607c7491a55a929dd841f7", + "tier": "real_data", + "notes": "GETVARLIST macro body reproduced verbatim from Macros/getvarlist.sas; exercised on a 5-row mock patient cohort. Splits numeric vs character variables via DICTIONARY.COLUMNS into NVARLIST/CVARLIST. Driver (mock data + %put) added so it runs standalone." +} diff --git a/jenner-check/t002_getvarlist/script.sas b/jenner-check/t002_getvarlist/script.sas new file mode 100644 index 0000000..c3e0667 --- /dev/null +++ b/jenner-check/t002_getvarlist/script.sas @@ -0,0 +1,70 @@ +/* DataFit Toolkit - GETVARLIST macro, exercised on a small mock cohort. + + GETVARLIST is an intermediate macro from Macros/getvarlist.sas. For a + given dataset it reads DICTIONARY.COLUMNS and builds two global macro + variables holding the blank-separated lists of numeric and character + variables (NVARLIST and CVARLIST), and splits the dataset name into + its library (LIB) and member (DSN) parts. Many of the toolkit's + data-quality macros rely on it to drive variable-by-variable + processing. The macro body below is reproduced from the toolkit; only + the driver (mock cohort + %put) is added so it runs standalone. */ + +%MACRO GETVARLIST(DS); + + /* Initialize macro variables to null */ + %GLOBAL NVARLIST CVARLIST LIB DSN; + %LET LIB = ; + %LET DSN = ; + %LET NVARLIST=; + %LET CVARLIST=; + + /* Single level data set name */ + %IF %INDEX(&DS,.) = 0 %THEN %DO; + %LET LIB = WORK; + %LET DSN = %UPCASE(&DS); + %END; + + /* Two level data set name */ + %ELSE %DO; + %LET LIB = %UPCASE(%SCAN(&DS,1,".")); + %LET DSN = %UPCASE(%SCAN(&DS,2,".")); + %END; + + /* Get list of numeric variables */ + PROC SQL NOPRINT; + SELECT NAME INTO :NVARLIST SEPARATED BY " " + FROM DICTIONARY.COLUMNS + WHERE LIBNAME = "&LIB" AND MEMNAME = "&DSN" AND TYPE = "num"; + + /* Get list of character variables */ + SELECT NAME INTO :CVARLIST SEPARATED BY " " + FROM DICTIONARY.COLUMNS + WHERE LIBNAME = "&LIB" AND MEMNAME = "&DSN" AND TYPE = "char"; + QUIT; + +%LET NVARLIST=%UPCASE(&NVARLIST); +%LET CVARLIST=%UPCASE(&CVARLIST); + +%MEND GETVARLIST; + +/* --- mock cohort with a mix of numeric and character columns --- */ +data work.patients; + length pat_id $8 sex $1 region $10; + input pat_id $ sex $ age weight region $; +datalines; +P0000001 F 30 62 East +P0000002 M 41 80 West +P0000003 F 25 55 North +P0000004 M 58 91 East +P0000005 F 47 70 West +; +run; + +%GETVARLIST(work.patients); +%put NOTE: LIB=&LIB DSN=&DSN; +%put NOTE: NUMERIC variables: &NVARLIST; +%put NOTE: CHARACTER variables: &CVARLIST; + +data _null_; + put "NOTE: numeric=[&NVARLIST] character=[&CVARLIST]"; +run; diff --git a/jenner-check/t003_outlier_iqr/autoexec.sas b/jenner-check/t003_outlier_iqr/autoexec.sas new file mode 100644 index 0000000..bbcdc44 --- /dev/null +++ b/jenner-check/t003_outlier_iqr/autoexec.sas @@ -0,0 +1,4 @@ +/* autoexec for t003_outlier_iqr + validvarname=v7 mirrors the option the OUTLIER macro sets so the + autonamed quartile columns (e.g. glucose_Q1) are valid SAS names. */ +options obs=100 validvarname=v7; diff --git a/jenner-check/t003_outlier_iqr/expected.json b/jenner-check/t003_outlier_iqr/expected.json new file mode 100644 index 0000000..c74a5af --- /dev/null +++ b/jenner-check/t003_outlier_iqr/expected.json @@ -0,0 +1,24 @@ +{ + "_captured_at": "2026-06-18T01:24:46.073401+00:00", + "_captured_run_id": "r_019ed854644576838ac6f01eb857a417", + "status": "ok", + "exit_code": 0, + "log_contains": [ + "PROC MEANS", + "PROC TRANSPOSE", + "PROC PRINT completed: 3 observations printed" + ], + "log_does_not_contain": [ + "ERROR:", + "[JENNER-ERROR" + ], + "output_contains": [ + "IQR fences per analysis variable", + "19.6375", + "50.375" + ], + "diagnostics": { + "parse_warnings": [], + "runtime_warnings": [] + } +} \ No newline at end of file diff --git a/jenner-check/t003_outlier_iqr/expected/files.md b/jenner-check/t003_outlier_iqr/expected/files.md new file mode 100644 index 0000000..284f564 --- /dev/null +++ b/jenner-check/t003_outlier_iqr/expected/files.md @@ -0,0 +1,13 @@ +This bundle's result is the printed IQR-fence table (see output.txt). +The dataset preview URLs below are tied to run_id `r_019ed854644576838ac6f01eb857a417` and +expire when that run is reaped — re-running the bundle regenerates them. + +## Datasets + +| name | rows | columns | preview | +|---|---|---|---| +| labs | 10 | ['pat_id', 'glucose', 'chol', 'bmi'] | https://api.jenneranalytics.com/v1/run/r_019ed854644576838ac6f01eb857a417/datasets/labs?token=cdbaf4c40f9149a49ec5d583bf71a077 | +| outlier_bounds | 3 | ['q1', 'I', 'q3', 'qrange', 'lower', 'upper'] | https://api.jenneranalytics.com/v1/run/r_019ed854644576838ac6f01eb857a417/datasets/outlier_bounds?token=cdbaf4c40f9149a49ec5d583bf71a077 | +| q1 | 3 | ['q1', 'I'] | https://api.jenneranalytics.com/v1/run/r_019ed854644576838ac6f01eb857a417/datasets/q1?token=cdbaf4c40f9149a49ec5d583bf71a077 | +| q3 | 3 | ['q3', 'I'] | https://api.jenneranalytics.com/v1/run/r_019ed854644576838ac6f01eb857a417/datasets/q3?token=cdbaf4c40f9149a49ec5d583bf71a077 | +| qrange | 3 | ['qrange', 'I'] | https://api.jenneranalytics.com/v1/run/r_019ed854644576838ac6f01eb857a417/datasets/qrange?token=cdbaf4c40f9149a49ec5d583bf71a077 | diff --git a/jenner-check/t003_outlier_iqr/expected/log.txt b/jenner-check/t003_outlier_iqr/expected/log.txt new file mode 100644 index 0000000..a7063ff --- /dev/null +++ b/jenner-check/t003_outlier_iqr/expected/log.txt @@ -0,0 +1,72 @@ +Jenner 0.1.0 (Unlicensed - limited to 100 observations) +Get a license at https://jenneranalytics.com/license + +NOTE: Option OBS changed to 100. +NOTE: Option VALIDVARNAME changed to V7. +NOTE: DATA work.labs + +NOTE: Processing inline DATALINES (10 lines) + +NOTE: Read 10 rows from DATALINES. +NOTE: Wrote work.labs (10 rows, 4 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC MEANS +NOTE: Output dataset q1 has 1 observations and 3 variables. +NOTE: PROC MEANS statement used. +NOTE: PROC MEANS +NOTE: Output dataset q3 has 1 observations and 3 variables. +NOTE: PROC MEANS statement used. +NOTE: PROC MEANS +NOTE: Output dataset qrange has 1 observations and 3 variables. +NOTE: PROC MEANS statement used. +NOTE: PROC TRANSPOSE data=q1 + +NOTE: Created 3 rows. +NOTE: Output written to /tmp/work/work/a30b9b41-343a-429e-a514-762739b9919f/q1.avro. +NOTE: PROC TRANSPOSE data=q3 + +NOTE: Created 3 rows. +NOTE: Output written to /tmp/work/work/a30b9b41-343a-429e-a514-762739b9919f/q3.avro. +NOTE: PROC TRANSPOSE data=qrange + +NOTE: Created 3 rows. +NOTE: Output written to /tmp/work/work/a30b9b41-343a-429e-a514-762739b9919f/qrange.avro. +NOTE: DATA q1 + + +NOTE: Read 3 rows from q1. +NOTE: Wrote q1 (3 rows, 2 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA q3 + + +NOTE: Read 3 rows from q3. +NOTE: Wrote q3 (3 rows, 2 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA qrange + + +NOTE: Read 3 rows from qrange. +NOTE: Wrote qrange (3 rows, 2 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA outlier_bounds + +NOTE: Stream 1 processed 3 rows, max BY-group size: 1 (O(1) memory verified) +NOTE: Stream 2 processed 3 rows, max BY-group size: 1 (O(1) memory verified) +NOTE: Stream 3 processed 3 rows, max BY-group size: 1 (O(1) memory verified) + +NOTE: Wrote outlier_bounds (3 rows, 6 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC PRINT data=outlier_bounds + +NOTE: PROC PRINT completed: 3 observations printed, 6 variables diff --git a/jenner-check/t003_outlier_iqr/expected/output.txt b/jenner-check/t003_outlier_iqr/expected/output.txt new file mode 100644 index 0000000..f3fd5a4 --- /dev/null +++ b/jenner-check/t003_outlier_iqr/expected/output.txt @@ -0,0 +1,7 @@ + IQR fences per analysis variable + + q1 I q3 qrange lower upper + 5.075 1 10.9 5.825 -3.6625 19.6375 + 4.175 2 6.5 2.325 0.6875 9.9875 +22.125 3 33.425 11.3 5.175 50.375 + diff --git a/jenner-check/t003_outlier_iqr/meta.json b/jenner-check/t003_outlier_iqr/meta.json new file mode 100644 index 0000000..0b12c24 --- /dev/null +++ b/jenner-check/t003_outlier_iqr/meta.json @@ -0,0 +1,8 @@ +{ + "bundle": "t003_outlier_iqr", + "source_file": "Macros/outlier.sas", + "source_blob_sha": "3c2fbfebbe2e53f9c118c3ffda61091de53c5e10", + "source_commit": "e08d2df61662f0ddd0607c7491a55a929dd841f7", + "tier": "real_data", + "notes": "Interquartile pipeline reproduced verbatim from the OUTLIER macro core in Macros/outlier.sas: three PROC MEANS QMETHOD=P2 calls (Q1, Q3, QRANGE with /AUTONAME), three PROC TRANSPOSE, rename/merge by I. Tukey 1.5*IQR fences added. Driven by a 10-row mock lab dataset so it runs standalone." +} diff --git a/jenner-check/t003_outlier_iqr/script.sas b/jenner-check/t003_outlier_iqr/script.sas new file mode 100644 index 0000000..c3fc446 --- /dev/null +++ b/jenner-check/t003_outlier_iqr/script.sas @@ -0,0 +1,68 @@ +/* DataFit Toolkit - OUTLIER quartile pipeline, on a small mock lab table. + + The OUTLIER macro (Macros/outlier.sas) flags potential outliers in the + numeric variables of a dataset. Its core is an interquartile pipeline: + PROC MEANS with QMETHOD=P2 computes Q1, Q3 and the inter-quartile + range (QRANGE) for each analysis variable with the /AUTONAME option, + then PROC TRANSPOSE turns the one-row-per-statistic output into one + row per variable keyed by I. The toolkit then uses Q1/Q3/QRANGE to + derive the lower and upper Tukey fences. + + This bundle reproduces that pipeline verbatim from outlier.sas (the + three PROC MEANS calls, the three PROC TRANSPOSE calls, the + rename/merge by I) and adds the standard 1.5*IQR fence calculation, + driven by a small mock lab dataset so it runs standalone. */ + +data work.labs; + input pat_id $ glucose chol bmi; +datalines; +P01 5.1 4.2 22.5 +P02 9.8 6.1 31.2 +P03 4.4 3.9 19.8 +P04 14.2 7.7 40.1 +P05 5.5 5.0 25.0 +P06 6.2 4.8 27.3 +P07 5.0 4.1 21.0 +P08 22.0 9.2 45.0 +P09 5.3 4.5 23.1 +P10 6.0 5.2 28.0 +; +run; + +%let NVARLIST_OUTLIER = glucose chol bmi; +%let LIB = WORK; +%let DSN = LABS; + +proc means data=&LIB..&DSN noprint qmethod=p2; + var &NVARLIST_OUTLIER; + output out=q1 (drop=_TYPE_ _FREQ_) q1= /autoname; +run; + +proc means data=&LIB..&DSN noprint qmethod=p2; + var &NVARLIST_OUTLIER; + output out=q3 (drop=_TYPE_ _FREQ_) q3= /autoname; +run; + +proc means data=&LIB..&DSN noprint qmethod=p2; + var &NVARLIST_OUTLIER; + output out=qrange (drop=_TYPE_ _FREQ_) qrange= /autoname; +run; + +proc transpose data=q1 out=q1; run; +proc transpose data=q3 out=q3; run; +proc transpose data=qrange out=qrange; run; + +data q1; set q1; I=_n_; rename COL1=q1; drop _LABEL_ _NAME_; run; +data q3; set q3; I=_n_; rename COL1=q3; drop _LABEL_ _NAME_; run; +data qrange; set qrange; I=_n_; rename COL1=qrange; drop _LABEL_ _NAME_; run; + +data outlier_bounds; + merge q1 q3 qrange; + by I; + lower = q1 - 1.5 * qrange; /* lower Tukey fence */ + upper = q3 + 1.5 * qrange; /* upper Tukey fence */ +run; + +proc print data=outlier_bounds noobs; + title "IQR fences per analysis variable"; +run;