AEADataEditor · larsvilhuber · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026
diff --git a/bitbucket-pipelines.yml b/bitbucket-pipelines.yml
@@ -39,6 +39,8 @@ pipelines:
           - name: openICPSRID   
           - name: jiraticket
           - name: ZenodoID
+          # Accepts: numeric ID, full URL, DOI, or community request URL.
+          # Leave blank if jiraticket is set (orchestrator will query Jira).
           #- name: DataverseID
           #- name: OSFID
           - name: ProcessStata
@@ -87,18 +89,18 @@ pipelines:
             - if [ -z $openICPSRID ]; then openICPSRID=$openicpsr; fi
             - if [ -z $ZenodoID ]; then ZenodoID=$zenodo; fi
             - projectID="${openICPSRID}"
-            - projectID="${projectID:-zenodo-$ZenodoID}"
-            - if [ -z "$jiraticket" ] && [ -n "${openICPSRID:-}" ]; then jiraticket=$(python3 tools/jira_find_task_by_icpsr.py "$openICPSRID" 2>/dev/null || true); else echo "Jira ticket not set"; fi
+            - if [ -z "$jiraticket" ] && [ -n "${openICPSRID:-}" ]; then jiraticket=$(python3.12 tools/jira_find_task_by_icpsr.py "$openICPSRID" 2>/dev/null || true); else echo "Jira ticket not set"; fi
             - echo "Using Jira case $jiraticket"
             - ./tools/update_config.sh
             - ./automations/70_publish_comment.sh 1-populate-from-icpsr started
-            - if [ -d $projectID ]; then \rm -rf $projectID; fi
-            - if [ ! -z $openICPSRID ]; then python3 tools/download_openicpsr-private.py $openICPSRID; fi
-            - if [ ! -z $ZenodoID ]; then python3 tools/download_zenodo_draft.py $ZenodoID; fi
+            - if [ -d "${projectID:-__none__}" ]; then \rm -rf $projectID; fi
+            - if [ ! -z $openICPSRID ]; then python3.12 tools/download_openicpsr-private.py $openICPSRID; fi
+            - if [ ! -z "$ZenodoID" ] || [ ! -z "$jiraticket" ]; then zenodo_dir=$(python3.12 tools/download_zenodo.py ${ZenodoID:+--zenodo-id "$ZenodoID"} ${jiraticket:+--jira-ticket "$jiraticket"} --print-id 2>&1 | tail -1); fi
+            - if [ -z "$projectID" ] && [ ! -z "${zenodo_dir:-}" ]; then projectID="$zenodo_dir"; fi
             - ./automations/00_unpack_zip.sh  $projectID
             - mkdir cache
             - if [ ! -z $openICPSRID ]; then mv *.zip cache/; fi
-            - if [ ! -z $ZenodoID ]; then zip -rp cache/${projectID}.zip $projectID/* ; fi
+            - if [ ! -z "${zenodo_dir:-}" ]; then zip -rp cache/${projectID}.zip $projectID/* ; fi
             - ./automations/00_prepare_aux.sh
             - ./automations/01_check_file_sizes.sh $projectID
             - ./automations/02_list_data_files.sh $projectID
@@ -118,6 +120,7 @@ pipelines:
             - if [ -z $openICPSRID ]; then openICPSRID=$openicpsr; fi
             - if [ -z $ZenodoID ]; then ZenodoID=$zenodo; fi
             - projectID="${openICPSRID}"
+            - if [ ! -z "$ZenodoID" ] && echo "$ZenodoID" | grep -q '/'; then ZenodoID=$(echo "$ZenodoID" | python3.12 -c "import sys,re; s=sys.stdin.read().strip().rstrip('/'); m=re.search(r'zenodo\.org/(?:records?|deposit)/(\d+)', s) or re.search(r'zenodo\.(\d+)', s); print(m.group(1) if m else s)"); fi
             - projectID="${projectID:-zenodo-$ZenodoID}"
             - if [ -f cache/$projectID.zip ]; then echo "✅ Found $projectID.zip in cache"; else echo "⚠️  Did not find $projectID.zip in cache. 🛑 You may need to use the BIG pipeline!!! "; exit 2; fi
       - parallel: # we will run these in parallel
@@ -133,6 +136,7 @@ pipelines:
               - if [ -z $openICPSRID ]; then openICPSRID=$openicpsr; fi
               - if [ -z $ZenodoID ]; then ZenodoID=$zenodo; fi
               - projectID="${openICPSRID}"
+              - if [ ! -z "$ZenodoID" ] && echo "$ZenodoID" | grep -q '/'; then ZenodoID=$(echo "$ZenodoID" | python3.12 -c "import sys,re; s=sys.stdin.read().strip().rstrip('/'); m=re.search(r'zenodo\.org/(?:records?|deposit)/(\d+)', s) or re.search(r'zenodo\.(\d+)', s); print(m.group(1) if m else s)"); fi
               - projectID="${projectID:-zenodo-$ZenodoID}"
               - chmod a+rx ./automations/*.sh
               - ./automations/00_preliminaries.sh $ProcessStata $projectID
@@ -152,6 +156,7 @@ pipelines:
               - if [ -z $openICPSRID ]; then openICPSRID=$openicpsr; fi
               - if [ -z $ZenodoID ]; then ZenodoID=$zenodo; fi
               - projectID="${openICPSRID}"
+              - if [ ! -z "$ZenodoID" ] && echo "$ZenodoID" | grep -q '/'; then ZenodoID=$(echo "$ZenodoID" | python3.12 -c "import sys,re; s=sys.stdin.read().strip().rstrip('/'); m=re.search(r'zenodo\.org/(?:records?|deposit)/(\d+)', s) or re.search(r'zenodo\.(\d+)', s); print(m.group(1) if m else s)"); fi
               - projectID="${projectID:-zenodo-$ZenodoID}"
               - chmod a+rx ./automations/*.sh
               - ./automations/00_preliminaries.sh $ProcessPii $projectID
@@ -168,6 +173,7 @@ pipelines:
               - if [ -z $openICPSRID ]; then openICPSRID=$openicpsr; fi
               - if [ -z $ZenodoID ]; then ZenodoID=$zenodo; fi
               - projectID="${openICPSRID}"
+              - if [ ! -z "$ZenodoID" ] && echo "$ZenodoID" | grep -q '/'; then ZenodoID=$(echo "$ZenodoID" | python3.12 -c "import sys,re; s=sys.stdin.read().strip().rstrip('/'); m=re.search(r'zenodo\.org/(?:records?|deposit)/(\d+)', s) or re.search(r'zenodo\.(\d+)', s); print(m.group(1) if m else s)"); fi
               - projectID="${projectID:-zenodo-$ZenodoID}"
               - chmod a+rx ./automations/*.sh
               - ./automations/00_preliminaries.sh $ProcessR $projectID
@@ -183,6 +189,7 @@ pipelines:
               - if [ -z $openICPSRID ]; then openICPSRID=$openicpsr; fi
               - if [ -z $ZenodoID ]; then ZenodoID=$zenodo; fi
               - projectID="${openICPSRID}"
+              - if [ ! -z "$ZenodoID" ] && echo "$ZenodoID" | grep -q '/'; then ZenodoID=$(echo "$ZenodoID" | python3.12 -c "import sys,re; s=sys.stdin.read().strip().rstrip('/'); m=re.search(r'zenodo\.org/(?:records?|deposit)/(\d+)', s) or re.search(r'zenodo\.(\d+)', s); print(m.group(1) if m else s)"); fi
               - projectID="${projectID:-zenodo-$ZenodoID}"
               - chmod a+rx ./automations/*.sh
               - ./automations/00_preliminaries.sh $ProcessPython $projectID
@@ -199,6 +206,7 @@ pipelines:
               - if [ -z $openICPSRID ]; then openICPSRID=$openicpsr; fi
               - if [ -z $ZenodoID ]; then ZenodoID=$zenodo; fi
               - projectID="${openICPSRID}"
+              - if [ ! -z "$ZenodoID" ] && echo "$ZenodoID" | grep -q '/'; then ZenodoID=$(echo "$ZenodoID" | python3.12 -c "import sys,re; s=sys.stdin.read().strip().rstrip('/'); m=re.search(r'zenodo\.org/(?:records?|deposit)/(\d+)', s) or re.search(r'zenodo\.(\d+)', s); print(m.group(1) if m else s)"); fi
               - projectID="${projectID:-zenodo-$ZenodoID}"
               - chmod a+rx ./automations/*.sh
               - ./automations/00_preliminaries.sh $ProcessJulia $projectID
@@ -218,6 +226,7 @@ pipelines:
               - if [ -z $openICPSRID ]; then openICPSRID=$openicpsr; fi
               - if [ -z $ZenodoID ]; then ZenodoID=$zenodo; fi
               - projectID="${openICPSRID}"
+              - if [ ! -z "$ZenodoID" ] && echo "$ZenodoID" | grep -q '/'; then ZenodoID=$(echo "$ZenodoID" | python3.12 -c "import sys,re; s=sys.stdin.read().strip().rstrip('/'); m=re.search(r'zenodo\.org/(?:records?|deposit)/(\d+)', s) or re.search(r'zenodo\.(\d+)', s); print(m.group(1) if m else s)"); fi
               - projectID="${projectID:-zenodo-$ZenodoID}"
               - chmod a+rx ./automations/*.sh
               - ./automations/00_preliminaries.sh yes $projectID
@@ -246,9 +255,10 @@ pipelines:
             - if [ -z $openICPSRID ]; then openICPSRID=$openicpsr; else echo "openICPSRID not set"; fi
             - if [ -z $ZenodoID ]; then ZenodoID=$zenodo; else echo "ZenodoID not set"; fi
             - projectID="${openICPSRID}"
+            - if [ ! -z "$ZenodoID" ] && echo "$ZenodoID" | grep -q '/'; then ZenodoID=$(echo "$ZenodoID" | python3.12 -c "import sys,re; s=sys.stdin.read().strip().rstrip('/'); m=re.search(r'zenodo\.org/(?:records?|deposit)/(\d+)', s) or re.search(r'zenodo\.(\d+)', s); print(m.group(1) if m else s)"); fi
             - projectID="${projectID:-zenodo-$ZenodoID}"
             - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-            - if [ -z "$jiraticket" ] && [ -n "${openICPSRID:-}" ]; then jiraticket=$(python3 tools/jira_find_task_by_icpsr.py "$openICPSRID" 2>/dev/null || true); else echo "Jira ticket not set"; fi
+            - if [ -z "$jiraticket" ] && [ -n "${openICPSRID:-}" ]; then jiraticket=$(python3.12 tools/jira_find_task_by_icpsr.py "$openICPSRID" 2>/dev/null || true); else echo "Jira ticket not set"; fi
             - echo "Using Jira case $jiraticket"
             - ./tools/update_config.sh
             - if [ -d cache ]; then ls -lR cache/*; fi
@@ -426,8 +436,10 @@ pipelines:
     w-big-populate-from-icpsr: #name of this pipeline
       - variables:          #list variable names under here
           # These do not need to have a value, if "config.yml" is filled out.
-          - name: openICPSRID  
-          - name: ZenodoID 
+          - name: openICPSRID
+          - name: ZenodoID
+          # Accepts: numeric ID, full URL, DOI, or community request URL.
+          # Leave blank if jiraticket is set (orchestrator will query Jira).
           - name: jiraticket
       - step:
           name: Download and commit
@@ -443,13 +455,13 @@ pipelines:
             - if [ -z $openICPSRID ]; then openICPSRID=$openicpsr; fi
             - if [ -z $ZenodoID ]; then ZenodoID=$zenodo; fi
             - projectID="${openICPSRID}"
-            - projectID="${projectID:-zenodo-$ZenodoID}"
-            - if [ -z "$jiraticket" ] && [ -n "${openICPSRID:-}" ]; then jiraticket=$(python3 tools/jira_find_task_by_icpsr.py "$openICPSRID" 2>/dev/null || true); fi
+            - if [ -z "$jiraticket" ] && [ -n "${openICPSRID:-}" ]; then jiraticket=$(python3.12 tools/jira_find_task_by_icpsr.py "$openICPSRID" 2>/dev/null || true); fi
             - ./tools/update_config.sh
             - ./automations/70_publish_comment.sh w-big-populate-from-icpsr started
-            - if [ -d $projectID ]; then \rm -rf $projectID; fi
-            - if [ ! -z $openICPSRID ]; then python3 tools/download_openicpsr-private.py $openICPSRID; fi
-            - if [ ! -z $ZenodoID ]; then python3 tools/download_zenodo_draft.py $ZenodoID; fi
+            - if [ -d "${projectID:-__none__}" ]; then \rm -rf $projectID; fi
+            - if [ ! -z $openICPSRID ]; then python3.12 tools/download_openicpsr-private.py $openICPSRID; fi
+            - if [ ! -z "$ZenodoID" ] || [ ! -z "$jiraticket" ]; then zenodo_dir=$(python3.12 tools/download_zenodo.py ${ZenodoID:+--zenodo-id "$ZenodoID"} ${jiraticket:+--jira-ticket "$jiraticket"} --print-id 2>&1 | tail -1); fi
+            - if [ -z "$projectID" ] && [ ! -z "${zenodo_dir:-}" ]; then projectID="$zenodo_dir"; fi
             - chmod a+rx ./automations/*.sh
             - ./automations/00_prepare_aux.sh
             - ./automations/00_unpack_zip.sh  $projectID

diff --git a/docs/96-90-download_zenodo.md b/docs/96-90-download_zenodo.md
@@ -0,0 +1,71 @@
+(help-download_zenodo)=
+# download_zenodo.py — Zenodo download orchestrator
+
+::::{warning}
+
+This documentation was AI-generated by Claude Code and should be reviewed for accuracy. Please report any errors or inconsistencies.
+
+::::
+
+## Description
+
+Single entry point for all Zenodo downloads.  Parses any Zenodo URL, DOI,
+or record ID, determines whether the target is a public record or a private
+draft/community request, and delegates to the appropriate script.
+
+When no `--zenodo-id` is given, the orchestrator queries the Jira ticket for
+the "Replication package URL".
+
+## Usage
+
+```bash
+# Explicit ID or URL
+python3.12 tools/download_zenodo.py --zenodo-id 10848594
+python3.12 tools/download_zenodo.py --zenodo-id https://zenodo.org/records/10848594
+python3.12 tools/download_zenodo.py --zenodo-id https://zenodo.org/communities/aeajournals/requests/61cff0cb-b3ca-48aa-bfe6-5b17dc8eb665
+
+# From Jira ticket
+python3.12 tools/download_zenodo.py --jira-ticket AEAREP-8983
+
+# In a pipeline (capture directory name)
+zenodo_dir=$(python3.12 tools/download_zenodo.py --zenodo-id "$ZenodoID" --print-id 2>&1 | tail -1)
+```
+
+## Options
+
+| Option | Description |
+|--------|-------------|
+| `--zenodo-id URL_OR_ID` | Zenodo record ID, URL, DOI, or community request URL.  Skips Jira lookup. |
+| `--jira-ticket KEY` | Jira issue key; used when `--zenodo-id` is absent |
+| `--print-id` | Print `zenodo-NNNNN` to stdout (last line) for pipeline capture |
+| `--dry-run` | Pass through to the selected download script |
+| `--sandbox` | Use `sandbox.zenodo.org` |
+
+## URL Routing
+
+| URL pattern | Script called |
+|-------------|--------------|
+| `/records/NNNNN`, `/record/NNNNN`, `10.5281/zenodo.NNNNN`, bare ID | `download_zenodo_public.py` |
+| `/deposit/NNNNN` | `download_zenodo_draft.py` |
+| `/communities/.../requests/{uuid}` | `download_zenodo_draft.py` (resolves UUID → record ID via API) |
+
+## Environment Variables
+
+| Variable | Purpose |
+|----------|---------|
+| `JIRA_USERNAME`, `JIRA_API_KEY` | Required when `--jira-ticket` is used |
+| `ZENODO_ACCESS_TOKEN` | Required for draft/private downloads |
+| `CI` | Auto-commit behaviour in pipelines |
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | Success |
+| 1 | Error |
+| 2 | Replication URL from Jira is not a Zenodo URL |
+
+## See Also
+
+- `tools/download_zenodo_public.py`
+- `tools/download_zenodo_draft.py`
diff --git a/docs/96-90-download_zenodo_draft.md b/docs/96-90-download_zenodo_draft.md
@@ -107,4 +107,21 @@ You need a Zenodo access token to access draft deposits:
 - Reports detailed error messages for API issues
 - Validates checksums for downloaded files
 
-This tool is essential for working with unpublished Zenodo deposits in research workflows that require access to draft materials.
+This tool is essential for working with unpublished Zenodo deposits in research workflows that require access to draft materials.
+
+## Community Request URLs
+
+Draft deposits under community review can be addressed using the request URL:
+
+```text
+https://zenodo.org/communities/<community>/requests/<uuid>
+```
+
+The script calls `GET /api/requests/{uuid}` to resolve the deposit record ID,
+then proceeds with the normal draft download.  An access token is required.
+
+```bash
+python3.12 tools/download_zenodo_draft.py \
+  https://zenodo.org/communities/aeajournals/requests/61cff0cb-b3ca-48aa-bfe6-5b17dc8eb665 \
+  --access-token $ZENODO_ACCESS_TOKEN
+```
diff --git a/docs/96-90-download_zenodo_public.md b/docs/96-90-download_zenodo_public.md
@@ -1,5 +1,6 @@
 (help-download_zenodo_public)=
-# download_zenodo_public.sh - Download files from public Zenodo repositories
+
+# download_zenodo_public.py — Download files from public Zenodo records
 
 ::::{warning}
 
@@ -9,93 +10,54 @@ This documentation was AI-generated by Claude Code and should be reviewed for ac
 
 ## Description
 
-This script downloads all files from a public Zenodo record using the zenodo_get command-line tool. It's designed for replication workflows where researchers need to download published datasets, code, and supplementary materials from Zenodo repositories for analysis and verification.
+Pure-Python script that downloads all files from a published Zenodo record,
+then writes SHA-256, MD5, and metadata manifests to `generated/` using the
+same format as `download_zenodo_draft.py`.
+
+The legacy shell wrapper `download_zenodo_public.sh` is retained for
+backwards compatibility but is deprecated; use the Python script instead.
 
 ## Usage
 
 ```bash
-./download_zenodo_public.sh <RECORD_ID>
-bash tools/download_zenodo_public.sh <RECORD_ID>
+python3.12 tools/download_zenodo_public.py RECORD_ID_OR_URL
+python3.12 tools/download_zenodo_public.py --dry-run RECORD_ID_OR_URL
 ```
 
 ## Arguments
 
-- **RECORD_ID** - Zenodo record identifier, can be:
-  - Numeric record ID (e.g., "1234567")
-  - Full Zenodo URL (e.g., "https://zenodo.org/record/1234567")
-  - Zenodo DOI (e.g., "10.5281/zenodo.1234567")
+- **RECORD_ID_OR_URL** — Zenodo identifier in any of these forms:
+  - Numeric ID: `12345678`
+  - Record URL: `https://zenodo.org/records/12345678`
+  - Legacy URL: `https://zenodo.org/record/12345678`
+  - DOI string: `10.5281/zenodo.12345678`
+  - DOI URL:    `https://doi.org/10.5281/zenodo.12345678`
 
-## Examples
+## Options
 
-```bash
-# Using Zenodo record ID
-./download_zenodo_public.sh 1234567
+| Option         | Description                       |
+| -------------- | --------------------------------- |
+| `--output DIR` | Parent directory (default: `.`)   |
+| `--dry-run`    | List files without downloading    |
+| `--sandbox`    | Use `sandbox.zenodo.org`          |
 
-# Using full Zenodo URL (script extracts ID automatically)
-./download_zenodo_public.sh https://zenodo.org/record/1234567
+## Output
 
-# Using Zenodo DOI (script extracts ID automatically)
-./download_zenodo_public.sh 10.5281/zenodo.1234567
 ```
-
-## Requirements
-
-- **zenodo_get**: Zenodo command-line client (`pip install zenodo_get`)
-- Internet connection to access Zenodo API
-- Read/write permissions in current directory
-
-## Features
-
-- Flexible input parsing (extracts record ID from URLs and DOIs)
-- Creates organized directory structure: `zenodo-[RECORD_ID]`
-- Downloads all files from the specified Zenodo record
-- Prevents overwriting existing downloads
-- Simple error handling and validation
-
-## Behavior
-
-- Parses input to extract Zenodo record ID
-- Creates target directory named "zenodo-[RECORD_ID]"
-- Checks if directory already exists (prevents accidental overwrites)
-- Downloads all files using zenodo_get tool
-- Maintains original file names and organization
-
-## Output Structure
-
-```
-Input: 1234567 (or https://zenodo.org/record/1234567)
-Output directory: ./zenodo-1234567/
-Contents: All files from the Zenodo record
+zenodo-12345678/           ← downloaded files
+generated/
+  manifest.zenodo-12345678.YYYY-MM-DD.sha256
+  manifest.zenodo-12345678.YYYY-MM-DD.md5
+  metadata.zenodo-12345678.txt
 ```
 
-## Error Handling
-
-- Validates command-line arguments (requires exactly one argument)
-- Checks for existing output directory
-- Reports download failures from zenodo_get
-- Exits with error code 2 on validation failures
-
-## Dependencies
-
-### zenodo_get installation:
-```bash
-pip install zenodo_get
-# or
-pip install -r requirements.txt  # if included in project requirements
-```
-
-## Zenodo API
-
-- Uses zenodo_get which interfaces with Zenodo's REST API
-- Works with public records (no authentication required)
-- Supports both published and pre-published public records
+## Environment Variables
 
-## How It Works
+| Variable | Purpose                                        |
+| -------- | ---------------------------------------------- |
+| `CI`     | Suppresses progress; auto-commits with `[skip ci]` |
 
-1. **Input Parsing**: Extracts numeric record ID from various input formats
-2. **Directory Creation**: Creates organized output directory
-3. **Validation**: Checks for existing downloads to prevent overwrites
-4. **Download**: Uses zenodo_get to download all files from the record
-5. **Organization**: Maintains original file structure and names
+## See Also
 
-This tool is essential for reproducible research workflows that rely on datasets and code hosted in Zenodo repositories.
+- `tools/download_zenodo_draft.py` — for draft or community-review deposits
+- `tools/download_zenodo.py` — orchestrator (recommended entry point)