Skip to content

Commit 15ce5d6

Browse files
Merge pull request #387 from RobokopU24/environment-config-refactor
Environment & Config Refactor
2 parents 37fa773 + a050f3c commit 15ce5d6

40 files changed

Lines changed: 473 additions & 447 deletions

.dockerignore

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
.git
2+
.env
3+
.idea
4+
.DS_Store
5+
.pytest_cache
6+
.venv
7+
__pycache__
8+
*.egg-info
9+
dist

.env

Lines changed: 0 additions & 5 deletions
This file was deleted.

.env.example

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Copy or rename this file to ".env" to use it for environment variable configurations.
2+
#
3+
# ATTENTION: The only required environment variables are ORION_STORAGE and ORION_GRAPHS. The rest are optional and it's
4+
# usually fine to leave them commented out or delete them, as the ORION config module will assign defaults.
5+
6+
# ---- Storage & Output ----
7+
8+
# Directory for source data downloads and ingest pipeline files
9+
ORION_STORAGE=~/ORION_storage/
10+
11+
# Directory for final graph releases
12+
ORION_GRAPHS=~/ORION_graphs/
13+
14+
# Directory for log files (if unset, logs go to stdout only)
15+
# ORION_LOGS=
16+
17+
# Base URL utilized to generate URI identifiers utilized by metadata.
18+
# For example, ROBOKOP graphs use https://robokop.renci.org/
19+
# ORION_OUTPUT_URL=https://localhost
20+
21+
# ---- Graph Spec ----
22+
23+
# Local graph spec filename (set one of ORION_GRAPH_SPEC or ORION_GRAPH_SPEC_URL, not both)
24+
# ORION_GRAPH_SPEC=example-graph-spec.yaml
25+
26+
# URL pointing to a remote graph spec file
27+
# ORION_GRAPH_SPEC_URL=
28+
29+
# ---- Mode ----
30+
31+
# Enable test/debug mode (sets log level to DEBUG and runs ingests with a smaller subset of data if possible)
32+
# ORION_TEST_MODE=false
33+
34+
# ---- Biolink Model ----
35+
36+
# Biolink model version (optional - don't set this and ORION will use the latest)
37+
# BL_VERSION=v4.3.4
38+
39+
# ---- Normalization URLs ----
40+
41+
# Edge normalization / BioLink Lookup URL
42+
# EDGE_NORMALIZATION_URL=https://bl-lookup-sri.renci.org
43+
44+
# Node normalization URL
45+
# NODE_NORMALIZATION_URL=https://nodenormalization-sri.renci.org
46+
47+
# ---- LitCoin / Bagel (may be removed in the future) ----
48+
49+
# Name resolution service URL
50+
# NAMERES_URL=https://name-resolution-sri.renci.org
51+
52+
# SapBERT service URL
53+
# SAPBERT_URL=https://babel-sapbert.apps.renci.org
54+
55+
# Shared source data path for LitCoin pipeline
56+
# SHARED_SOURCE_DATA_PATH=/tmp/shared_data
57+
58+
# LitCoin predicate mapping service URL
59+
# LITCOIN_PRED_MAPPING_URL=https://pred-mapping.apps.renci.org
60+
61+
# Bagel service URL
62+
# BAGEL_URL=https://bagel.apps.renci.org
63+
64+
# Bagel service credentials
65+
# BAGEL_SERVICE_USERNAME=
66+
# BAGEL_SERVICE_PASSWORD=
67+
68+
# OpenAI credentials for LitCoin GPT features
69+
# OPENAI_API_KEY=
70+
# OPENAI_API_ORGANIZATION=

.github/workflows/release.yml

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,29 +11,28 @@ jobs:
1111
push_to_registry:
1212
name: Push Docker image to GitHub Packages tagged with "latest" and version number.
1313
runs-on: ubuntu-latest
14+
permissions:
15+
contents: read
16+
packages: write
1417
steps:
1518
- name: Check out the repo
1619
uses: actions/checkout@v4
17-
- name: Get the version
18-
id: get_version
19-
run: echo ::set-output name=VERSION::${GITHUB_REF/refs\/tags\//}
2020
- name: Login to ghcr
21-
uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a
21+
uses: docker/login-action@v3
2222
with:
2323
registry: ${{ env.REGISTRY }}
2424
username: ${{ github.actor }}
2525
password: ${{ secrets.GITHUB_TOKEN }}
2626
- name: Extract metadata (tags, labels) for Docker
2727
id: meta
28-
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
28+
uses: docker/metadata-action@v5
2929
with:
3030
images:
3131
ghcr.io/${{ github.repository }}
3232
- name: Push to GitHub Packages
33-
uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671
33+
uses: docker/build-push-action@v6
3434
with:
3535
context: .
3636
push: true
3737
tags: ${{ steps.meta.outputs.tags }}
38-
labels: ${{ steps.meta.outputs.labels }}
39-
build-args: VERSION=${{ steps.get_version.outputs.VERSION }}
38+
labels: ${{ steps.meta.outputs.labels }}

.github/workflows/test.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,8 @@ jobs:
1414
- name: create env params
1515
run: |
1616
echo "ROBOKOP_HOME=$PWD" >> $GITHUB_ENV
17-
mkdir -p $PWD/tests/workspace/logs
1817
mkdir -p $PWD/tests/workspace/storage
1918
mkdir -p $PWD/tests/workspace/graphs
20-
echo "ORION_LOGS=$PWD/tests/workspace/logs" >> $GITHUB_ENV
2119
echo "ORION_STORAGE=$PWD/tests/workspace/storage" >> $GITHUB_ENV
2220
echo "ORION_GRAPHS=$PWD/tests/workspace/graphs" >> $GITHUB_ENV
2321

README.md

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -42,31 +42,34 @@ After installation, the following commands are available (prefix with `uv run` i
4242

4343
### Configuring ORION
4444

45-
ORION uses three directories for its data, configured via environment variables:
45+
ORION is configured via environment variables, which can be set directly or through an `.env` file.
4646

47-
| Variable | Purpose |
48-
|---|--------------------------------------|
49-
| `ORION_STORAGE` | Data ingest pipeline storage |
50-
| `ORION_GRAPHS` | Knowledge graph outputs |
51-
| `ORION_LOGS` | Log files |
52-
53-
You can set these up manually or use the provided script:
47+
In most cases, you can simply use this provided script to set up a local environment. It will create directories for ORION outputs next to where ORION was installed and set env vars pointing to them.
5448

5549
```bash
56-
source ./set_up_test_env.sh
50+
source ./set_up_dev_env.sh
5751
```
5852

59-
#### Graph Spec
53+
For more customization and settings, use an .env file. Copy or rename the `.env.example` file to `.env`.
6054

61-
A Graph Spec yaml file defines which sources to include in a knowledge graph. Set one of the following environment variables (not both):
55+
Then uncommment and edit `.env` as desired to set values for your environment.
6256

63-
```bash
64-
# Option 1: Name of a file in the graph_specs/ directory
65-
export ORION_GRAPH_SPEC=example-graph-spec.yaml
57+
| Variable | Purpose | Default |
58+
|---|------------------------------------------------------------|---|
59+
| `ORION_STORAGE` | Path to a directory for data ingest pipeline storage | (required) |
60+
| `ORION_GRAPHS` | Path to a directory for Knowledge Graph outputs | (required) |
61+
| `ORION_LOGS` | Path to a Log file directory (if unset, logs go to stdout) | `None` |
62+
| `ORION_GRAPH_SPEC` | Graph Spec filename from `graph_specs/` | `example-graph-spec.yaml` |
63+
| `ORION_GRAPH_SPEC_URL` | URL to a remote Graph Spec file | |
6664

67-
# Option 2: URL pointing to a Graph Spec yaml file
68-
export ORION_GRAPH_SPEC_URL=https://stars.renci.org/var/data_services/graph_specs/default-graph-spec.yaml
69-
```
65+
Configuration is managed by [pydantic-settings](https://docs.pydantic.dev/latest/concepts/pydantic_settings/) — environment variables override `.env` file values, and sensible defaults are provided where possible. See `orion/config.py` for the full list of settings.
66+
67+
#### Graph Spec
68+
69+
A Graph Spec yaml file defines which sources to include in a knowledge graph. Set one of the following (not both):
70+
71+
- `ORION_GRAPH_SPEC` - name of a file in the `graph_specs/` directory
72+
- `ORION_GRAPH_SPEC_URL` - URL pointing to a Graph Spec yaml file
7073

7174
Here is a simple Graph Spec example:
7275

@@ -100,6 +103,8 @@ See the `graph_specs/` directory for more examples.
100103

101104
### Running with Docker
102105

106+
Make sure environment variables are set or an `.env` file is configured with at least `ORION_STORAGE`, and `ORION_GRAPHS` pointing to valid host directories. The compose file reads these env vars and mounts those directories as volumes in the container.
107+
103108
Build the image:
104109

105110
```bash
@@ -115,19 +120,19 @@ docker compose up
115120
Build a specific graph:
116121

117122
```bash
118-
docker compose run --rm orion orion-build Example_Graph
123+
docker compose run orion orion-build Example_Graph
119124
```
120125

121126
Run the ingest pipeline for a single data source:
122127

123128
```bash
124-
docker compose run --rm orion orion-ingest DrugCentral
129+
docker compose run orion orion-ingest DrugCentral
125130
```
126131

127132
See available data sources and options:
128133

129134
```bash
130-
docker compose run --rm orion orion-ingest -h
135+
docker compose run orion orion-ingest -h
131136
```
132137

133138
### Development

docker-compose-worker.yml

Lines changed: 9 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -5,40 +5,24 @@ services:
55
dockerfile: Dockerfile
66
container_name: orion-worker
77
command: [celery, "-A", "celery_worker.celery_app", "worker", "--loglevel=info", "-Q", "orion"]
8+
env_file:
9+
- path: .env
10+
required: false
811
environment:
9-
- CELERY_BROKER_URL=redis://redis:6379/0
10-
- CELERY_RESULT_BACKEND=redis://redis:6379/0
11-
- SHARED_SOURCE_DATA_PATH=/tmp/shared_data
12+
# override paths from env, use paths volumes are mounted to inside the container
1213
- ORION_STORAGE=/ORION_storage
1314
- ORION_GRAPHS=/ORION_graphs
1415
- ORION_LOGS=/ORION_logs
15-
- BAGEL_SERVICE_USERNAME=fake-username-do-not-commit-a-real-one!!!
16-
- BAGEL_SERVICE_PASSWORD=fake-password-do-not-commit-a-real-one!!!
17-
- ORION_GRAPH_SPEC
18-
- ORION_GRAPH_SPEC_URL
19-
- ORION_OUTPUT_URL
20-
- EDGE_NORMALIZATION_ENDPOINT
21-
- NODE_NORMALIZATION_ENDPOINT
22-
- NAMERES_URL
23-
- SAPBERT_URL
24-
- LITCOIN_PRED_MAPPING_URL
25-
- BL_VERSION
26-
- PHAROS_DB_HOST
27-
- PHAROS_DB_USER
28-
- PHAROS_DB_PASSWORD
29-
- PHAROS_DB_NAME
30-
- PHAROS_DB_PORT
31-
- DRUGCENTRAL_DB_HOST
32-
- DRUGCENTRAL_DB_USER
33-
- DRUGCENTRAL_DB_PASSWORD
34-
- DRUGCENTRAL_DB_NAME
35-
- DRUGCENTRAL_DB_PORT
16+
- SHARED_SOURCE_DATA_PATH=/tmp/shared_data
17+
# specific to celery
18+
- CELERY_BROKER_URL=redis://redis:6379/0
19+
- CELERY_RESULT_BACKEND=redis://redis:6379/0
3620
volumes:
3721
- .:/ORION
38-
- "${SHARED_SOURCE_DATA_PATH}:/tmp/shared_data"
3922
- "${ORION_STORAGE}:/ORION_storage"
4023
- "${ORION_GRAPHS}:/ORION_graphs"
4124
- "${ORION_LOGS}:/ORION_logs"
25+
- "${SHARED_SOURCE_DATA_PATH}:/tmp/shared_data"
4226
user: 1000:7474
4327
networks:
4428
- app-network

docker-compose.yml

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,33 +3,15 @@ services:
33
build:
44
context: .
55
command: [orion-build, all]
6+
env_file:
7+
- path: .env
8+
required: false
69
environment:
10+
# override paths from env, use paths volumes are mounted to inside the container
711
- ORION_STORAGE=/ORION_storage
812
- ORION_GRAPHS=/ORION_graphs
9-
- ORION_LOGS=/ORION_logs
10-
- ORION_GRAPH_SPEC
11-
- ORION_GRAPH_SPEC_URL
12-
- ORION_OUTPUT_URL
13-
- EDGE_NORMALIZATION_ENDPOINT
14-
- NODE_NORMALIZATION_ENDPOINT
15-
- NAMERES_URL
16-
- SAPBERT_URL
17-
- BL_VERSION
18-
- PHAROS_DB_HOST
19-
- PHAROS_DB_USER
20-
- PHAROS_DB_PASSWORD
21-
- PHAROS_DB_NAME
22-
- PHAROS_DB_PORT
23-
- DRUGCENTRAL_DB_HOST
24-
- DRUGCENTRAL_DB_USER
25-
- DRUGCENTRAL_DB_PASSWORD
26-
- DRUGCENTRAL_DB_NAME
27-
- DRUGCENTRAL_DB_PORT
2813
volumes:
2914
- .:/ORION
3015
- "${ORION_STORAGE}:/ORION_storage"
3116
- "${ORION_GRAPHS}:/ORION_graphs"
32-
- "${ORION_LOGS}:/ORION_logs"
3317
user: 7474:7474
34-
35-

docs/ORION.ipynb

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,11 @@
8585
{
8686
"cell_type": "code",
8787
"id": "g6i460bvtda",
88-
"source": "%%bash\ncd ~/ORION_root/ORION/\nsource ./set_up_test_env.sh",
88+
"source": [
89+
"%%bash\n",
90+
"cd ~/ORION_root/ORION/\n",
91+
"source ./set_up_dev_env.sh"
92+
],
8993
"metadata": {},
9094
"execution_count": null,
9195
"outputs": []
@@ -130,4 +134,4 @@
130134
},
131135
"nbformat": 4,
132136
"nbformat_minor": 5
133-
}
137+
}

helm/orion/templates/graph-builder.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,15 +70,15 @@ spec:
7070
- name: BL_VERSION
7171
value: {{ .Values.orion.normalization.bl_version }}
7272
{{- if .Values.orion.normalization.nodeNormEndpoint }}
73-
- name: NODE_NORMALIZATION_ENDPOINT
73+
- name: NODE_NORMALIZATION_URL
7474
value: {{ .Values.orion.normalization.nodeNormEndpoint }}
7575
{{- end }}
7676
{{- if .Values.orion.normalization.edgeNormEndpoint }}
77-
- name: EDGE_NORMALIZATION_ENDPOINT
77+
- name: EDGE_NORMALIZATION_URL
7878
value: {{ .Values.orion.normalization.edgeNormEndpoint }}
7979
{{- end }}
8080
{{- if .Values.orion.normalization.nameResolverEndpoint }}
81-
- name: NAMERES_ENDPOINT
81+
- name: NAMERES_URL
8282
value: {{ .Values.orion.normalization.nameResolverEndpoint }}
8383
{{- end }}
8484
{{- if .Values.orion.normalization.sapbertEndpoint }}
@@ -157,15 +157,15 @@ spec:
157157
- name: BL_VERSION
158158
value: {{ .Values.orion.normalization.bl_version }}
159159
{{- if .Values.orion.normalization.nodeNormEndpoint }}
160-
- name: NODE_NORMALIZATION_ENDPOINT
160+
- name: NODE_NORMALIZATION_URL
161161
value: {{ .Values.orion.normalization.nodeNormEndpoint }}
162162
{{- end }}
163163
{{- if .Values.orion.normalization.edgeNormEndpoint }}
164-
- name: EDGE_NORMALIZATION_ENDPOINT
164+
- name: EDGE_NORMALIZATION_URL
165165
value: {{ .Values.orion.normalization.edgeNormEndpoint }}
166166
{{- end }}
167167
{{- if .Values.orion.normalization.nameResolverEndpoint }}
168-
- name: NAMERES_ENDPOINT
168+
- name: NAMERES_URL
169169
value: {{ .Values.orion.normalization.nameResolverEndpoint }}
170170
{{- end }}
171171
{{- if .Values.orion.normalization.sapbertEndpoint }}

0 commit comments

Comments
 (0)