From 53d52e4b622dc1b0b076a8014b4261c0bc8a49b2 Mon Sep 17 00:00:00 2001 From: Keenan Date: Tue, 28 Oct 2025 13:10:28 -0400 Subject: [PATCH 01/18] chore: azure migration --- pipelines/deploy_dv.yaml | 43 +++++++++++++ server/Dockerfile.prod | 126 +++++++++++++++++++++++++------------- server/entrypoint.prod.sh | 25 ++++++++ server/sshd_config | 13 ++++ 4 files changed, 166 insertions(+), 41 deletions(-) create mode 100644 pipelines/deploy_dv.yaml create mode 100644 server/entrypoint.prod.sh create mode 100644 server/sshd_config diff --git a/pipelines/deploy_dv.yaml b/pipelines/deploy_dv.yaml new file mode 100644 index 00000000..6f80d67b --- /dev/null +++ b/pipelines/deploy_dv.yaml @@ -0,0 +1,43 @@ +trigger: none + +resources: + repositories: + - repository: templates + type: github + name: PHACDataHub/ADO-Pipeline-Templates + # Service connection to GitHub configured in Azure DevOps + endpoint: DMIA + +parameters: + - name: Image + displayName: Image + type: string + default: 'App' + values: + - App + +jobs: +- template: django_azure_app_service_deployment.yaml@templates + parameters: + env: 'Dev' + repository: 'nsp/spib-sdse-hopic-dv' + appName: 'was-spib-sdse-hopic-dv' + subscription: 'SPIB-SDSE-HOPIC-CICDLZSP-DT' + resourceGroup: 'rg-spib-sdse-hopic-dv' + poolName: spib-sdse-hopic-agents-dv + image: ${{ parameters.Image }} + containerCommand: 'gunicorn --bind 0.0.0.0:8000 omd.wsgi --timeout 1000' + envars: | + DB_HOST=pgsql-spib-sdse-hopic-dv.postgres.database.azure.com + DB_PORT=5432 + DB_SSLMODE=require + SECRET_KEY=@Microsoft.KeyVault(SecretUri=https://kvspibsdsehopicdv.vault.azure.net/secrets/hopic-django-secret-key/) + WEBSITES_PORT=8000 + LATEST_COMMIT_SHA=$(Build.SourceVersion) + ALLOWED_HOSTS=was-spib-sdse-hopic-dv.azurewebsites.net + CSRF_TRUSTED_ORIGINS=https://was-spib-sdse-hopic-dv.azurewebsites.net + DB_NAME=hopicdb + DB_PASSWORD=@Microsoft.KeyVault(SecretUri=https://kvspibsdsehopicdv.vault.azure.net/secrets/hopicapp-pgsql-password/) + DB_USER=hopicapp + ENV=dev + AZCOPY_AUTO_LOGIN_TYPE=MSI diff --git a/server/Dockerfile.prod b/server/Dockerfile.prod index 38810aad..9573071d 100644 --- a/server/Dockerfile.prod +++ b/server/Dockerfile.prod @@ -1,60 +1,104 @@ -###################### -# DEPENDENCY BUILDER # -###################### -# NOTE: builder layer must mach python and distribution versions of distroless runtime layer! -FROM python:3.11-bookworm as build_env - -# MUST keep these envs in sync with the Dockerfile.prod "FINAL" layer AND with Dockerfile.dev-management -ENV HOME=/cpho -ENV APP_HOME=$HOME/web -ENV PYTHON_DEPS=$HOME/python_deps +# Builds a Prod Image expecting to write to ACR and run the image in an app service container -RUN mkdir "${HOME}" && \ - mkdir "${APP_HOME}" && \ - mkdir "${PYTHON_DEPS}" +########### +# BUILDER # +########### +FROM python:3.11-slim-bookworm as builder -# Update pip -RUN pip install --upgrade pip +# set work directory +WORKDIR /usr/src/app -COPY ./requirements.txt . -COPY ./requirements_dev.txt . -COPY ./requirements_formatting.txt . +# set environment variables +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 -ARG DEPENDENCY_SET="prod" +# Install build dependencies +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + build-essential \ + libpq-dev \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* -RUN [ "${DEPENDENCY_SET}" = "prod" ] \ - && pip install --no-cache-dir --target "${PYTHON_DEPS}" -r requirements.txt \ - || : +RUN pip install --upgrade pip wheel setuptools -RUN [ "${DEPENDENCY_SET}" = "test" ] \ - && pip install --no-cache-dir --target "${PYTHON_DEPS}" -r requirements.txt -r requirements_dev.txt -r requirements_formatting.txt \ - || : +# requirements and wheels +COPY requirements.txt . +RUN pip wheel --no-cache-dir --no-deps --wheel-dir /usr/src/app/wheels -r requirements.txt ######### # FINAL # ######### -FROM gcr.io/distroless/python3-debian12 +# pull official base image +FROM python:3.11-slim-bookworm + +# Make sure setuptools is always up to date +RUN pip install --upgrade pip setuptools -# MUST keep these envs in sync with the Dockerfile.prod "DEPENDENCY BUILDER" layer AND with Dockerfile.dev-management -ENV HOME=/cpho +# Environment Variables +ENV APP_NAME=hopicapp +ENV APP_USER=${APP_NAME}user +ENV HOME=/${APP_NAME} ENV APP_HOME=$HOME/web -ENV PYTHON_DEPS=$HOME/python_deps +ENV VIRTUALENV=$HOME/env +ENV WHEELDIR=$HOME/wheels +ENV PATH=$VIRTUALENV/bin:$PATH +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +EXPOSE 8000 + +# Install minimal runtime dependencies +RUN apt-get update && \ + apt-get upgrade -y && \ + apt-get install -y --no-install-recommends \ + gosu \ + libpq5 \ + netcat-traditional \ + openssh-server \ + postgresql-common \ + && addgroup --system $APP_NAME \ + && adduser --disabled-password --shell /bin/bash $APP_USER --system --ingroup $APP_NAME --home $HOME \ + && mkdir -p $APP_HOME $VIRTUALENV $WHEELDIR \ + && chown -R $APP_USER:$APP_NAME $HOME \ + && echo "root:Docker!" | chpasswd \ + && echo -e "HOPIC Container\n\nFor management commands run: \ncd $APP_HOME \npython manage\n\n" > /etc/motd \ + && echo -e "cd $APP_HOME\n" >> /etc/profile -# this is the ID of the distroless "nonroot" user, using ID instead of user name because the k8s runAsNonRoot security context -# can't verify non-rootness when the docker file sets user by name -# https://github.com/GoogleContainerTools/distroless/blob/9c5d2c431825d7aa21017551b2ec75c29c1f23c6/common/variables.bzl#L18 -ENV NONROOT_USER_ID=65532 +WORKDIR $APP_HOME -ENV PATH="${PYTHON_DEPS}/bin:${PATH}" -ENV PYTHONPATH="${PYTHON_DEPS}:${PYTHONPATH}" +# Copy wheels from builder stage +COPY --chown=$APP_USER:$APP_NAME --from=builder /usr/src/app/wheels $WHEELDIR -COPY --chown=$NONROOT_USER_ID:$NONROOT_USER_ID --from=build_env $HOME $HOME -COPY --chown=$NONROOT_USER_ID:$NONROOT_USER_ID . $APP_HOME -WORKDIR $APP_HOME -USER $NONROOT_USER_ID +# Create and configure virtual environment +RUN python -m venv $VIRTUALENV && \ + $VIRTUALENV/bin/pip install --no-cache-dir $WHEELDIR/* && \ + rm -rf $WHEELDIR + +# Make sure setuptools is always up to date +RUN pip install --upgrade setuptools + +# Copy project files +COPY --chown=$APP_USER:$APP_NAME src/ $APP_HOME + +# copy sshd_config file +COPY src/sshd_config /etc/ssh/ + +# copy version file +# COPY version.txt $APP_HOME + +# Setup entrypoint and create staticfiles +RUN chmod +x $APP_HOME/entrypoint.prod.sh && \ + rm -f $APP_HOME/sshd_config && \ + mkdir -p $APP_HOME/staticfiles && \ + SECRET_KEY=t ALLOWED_HOSTS=* DB_NAME=d DB_USER=d DB_PASSWORD=d DB_HOST=d DB_PORT=1 python -m manage collectstatic --no-input -EXPOSE 8080 +# Set entrypoint and default command +# Wrapping the dynamic entrypoint call to ensure parameters are passed through correctly +RUN echo '#!/bin/bash\n"${APP_HOME}/entrypoint.prod.sh" "$@"' > /entrypoint-wrapper.sh && \ + chmod +x /entrypoint-wrapper.sh -ENTRYPOINT [ "python", "./entrypoint.prod.py" ] +ENTRYPOINT ["/entrypoint-wrapper.sh"] +CMD ["gunicorn", "--bind", "0.0.0.0:8000", "server.wsgi"] diff --git a/server/entrypoint.prod.sh b/server/entrypoint.prod.sh new file mode 100644 index 00000000..7b106f4c --- /dev/null +++ b/server/entrypoint.prod.sh @@ -0,0 +1,25 @@ +#!/bin/bash +echo "Starting SSH server..." +service ssh start + +echo "Generating static files..." +python manage.py collectstatic --no-input + +if [ ! -z "$DB_HOST" ] && [ ! -z "$DB_PORT" ]; then + echo "Waiting for postgres ($DB_HOST:$DB_PORT)..." + + while ! nc -z $DB_HOST $DB_PORT; do + sleep 0.1 + done + sleep 1 + + echo "PostgreSQL started" + echo "applying migrations..." + python manage.py migrate + echo "migrations applied" + +fi + +eval $(printenv | sed -n "s/^\([^=]\+\)=\(.*\)$/export \1=\2/p" | sed 's/"/\\\"/g' | sed '/=/s//="/' | sed 's/$/"/' >> /etc/profile) + +exec gosu ${APP_USER} "$@" diff --git a/server/sshd_config b/server/sshd_config new file mode 100644 index 00000000..c6ceb581 --- /dev/null +++ b/server/sshd_config @@ -0,0 +1,13 @@ +Port 2222 +ListenAddress 0.0.0.0 +LoginGraceTime 180 +X11Forwarding yes +Ciphers aes128-cbc,3des-cbc,aes256-cbc,aes128-ctr,aes192-ctr,aes256-ctr +MACs hmac-sha1,hmac-sha1-96 +StrictModes yes +SyslogFacility DAEMON +PasswordAuthentication yes +PermitEmptyPasswords no +PermitRootLogin yes +Subsystem sftp internal-sftp + From 8b3b6a250db70bb10e227538d24ffafdbaecde32 Mon Sep 17 00:00:00 2001 From: Keenan Date: Tue, 28 Oct 2025 13:17:59 -0400 Subject: [PATCH 02/18] change deploy script --- pipelines/deploy_dv.yaml | 94 ++++++++++++++++++++++++++++++---------- 1 file changed, 71 insertions(+), 23 deletions(-) diff --git a/pipelines/deploy_dv.yaml b/pipelines/deploy_dv.yaml index 6f80d67b..5e6b8e98 100644 --- a/pipelines/deploy_dv.yaml +++ b/pipelines/deploy_dv.yaml @@ -1,33 +1,22 @@ trigger: none -resources: - repositories: - - repository: templates - type: github - name: PHACDataHub/ADO-Pipeline-Templates - # Service connection to GitHub configured in Azure DevOps - endpoint: DMIA - parameters: - name: Image - displayName: Image + displayName: Build Container Image type: string - default: 'App' + default: "prod" values: - - App + # - dev + - prod -jobs: -- template: django_azure_app_service_deployment.yaml@templates - parameters: - env: 'Dev' - repository: 'nsp/spib-sdse-hopic-dv' - appName: 'was-spib-sdse-hopic-dv' - subscription: 'SPIB-SDSE-HOPIC-CICDLZSP-DT' - resourceGroup: 'rg-spib-sdse-hopic-dv' - poolName: spib-sdse-hopic-agents-dv - image: ${{ parameters.Image }} - containerCommand: 'gunicorn --bind 0.0.0.0:8000 omd.wsgi --timeout 1000' - envars: | +variables: + registry: "hcsccrrc.azurecr.io" + repository: "nsp/sdse-spib-hopic-dv" + tag: "$(Build.SourceVersion)" + appName: was-spib-sdse-hopic-dv + subscription: SPIB-SDSE-HOPIC-CICDLZSP-DT + resourceGroup: rg-spib-sdse-hopic-dv + envars: > DB_HOST=pgsql-spib-sdse-hopic-dv.postgres.database.azure.com DB_PORT=5432 DB_SSLMODE=require @@ -41,3 +30,62 @@ jobs: DB_USER=hopicapp ENV=dev AZCOPY_AUTO_LOGIN_TYPE=MSI + + +pool: + name: spib-sdse-hopic-agents-dv + +jobs: + - job: Deploy_DV + steps: + - script: | + sudo apt-get update + sudo apt-get install unzip + displayName: "Install Unzip" + + - script: | + sudo apt install -y docker.io + sudo apt install docker-buildx + sudo systemctl start docker + sudo usermod -aG docker $(id -un) + sudo chmod 666 /var/run/docker.sock + displayName: "Install and Configure Docker" + + - script: | + curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash + displayName: "Install AZ CLI" + + - task: AzureCLI@2 + displayName: "Login to ACR" + inputs: + azureSubscription: $(subscription) + scriptType: bash + scriptLocation: inlineScript + inlineScript: | + az --version + az acr login --name hcsccrrc + + - ${{ if eq(parameters.Image, 'prod')}}: + - script: | + docker build -t $(registry)/$(repository):$(tag) -f server/Dockerfile.prod . + docker push $(registry)/$(repository):$(tag) + displayName: "Build and Push $(repository) Image" + + - task: AzureWebAppContainer@1 + displayName: "Install $(repository) into $(appName)" + inputs: + azureSubscription: "$(subscription)" + appName: "$(appName)" + deployToSlotOrASE: true + resourceGroupName: "$(resourceGroup)" + containers: "$(registry)/$(repository):$(tag)" + containerCommand: "gunicorn --bind 0.0.0.0:8000 server.wsgi --timeout 1000" + + - task: AzureCLI@2 + displayName: "AppSettings for $(appName)" + inputs: + azureSubscription: $(subscription) + scriptType: bash + scriptLocation: inlineScript + inlineScript: | + az webapp config appsettings set -g $(resourceGroup) -n $(appName) --settings ${{ variables.envars }} From ccaabe445f4314b2f2c950a1b18b7e2ecf336a2e Mon Sep 17 00:00:00 2001 From: Keenan Date: Tue, 28 Oct 2025 13:41:03 -0400 Subject: [PATCH 03/18] move sshd config --- server/Dockerfile.prod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/Dockerfile.prod b/server/Dockerfile.prod index 9573071d..c8559864 100644 --- a/server/Dockerfile.prod +++ b/server/Dockerfile.prod @@ -84,7 +84,7 @@ RUN pip install --upgrade setuptools COPY --chown=$APP_USER:$APP_NAME src/ $APP_HOME # copy sshd_config file -COPY src/sshd_config /etc/ssh/ +COPY server/sshd_config /etc/ssh/ # copy version file # COPY version.txt $APP_HOME From 839ffcbb2964804c7ae6fab5dfc7db5b5c153151 Mon Sep 17 00:00:00 2001 From: Keenan Date: Tue, 28 Oct 2025 13:48:08 -0400 Subject: [PATCH 04/18] update dockerfile --- server/Dockerfile.prod | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/Dockerfile.prod b/server/Dockerfile.prod index c8559864..f01e89d1 100644 --- a/server/Dockerfile.prod +++ b/server/Dockerfile.prod @@ -23,7 +23,7 @@ RUN apt-get update && \ RUN pip install --upgrade pip wheel setuptools # requirements and wheels -COPY requirements.txt . +COPY server/requirements.txt . RUN pip wheel --no-cache-dir --no-deps --wheel-dir /usr/src/app/wheels -r requirements.txt ######### @@ -81,7 +81,7 @@ RUN python -m venv $VIRTUALENV && \ RUN pip install --upgrade setuptools # Copy project files -COPY --chown=$APP_USER:$APP_NAME src/ $APP_HOME +COPY --chown=$APP_USER:$APP_NAME server/ $APP_HOME # copy sshd_config file COPY server/sshd_config /etc/ssh/ From 04749a0d4cef04144e3b0bbd29e194bc9f86b67e Mon Sep 17 00:00:00 2001 From: Keenan Date: Tue, 28 Oct 2025 13:55:33 -0400 Subject: [PATCH 05/18] add quotes for some env vars --- pipelines/deploy_dv.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelines/deploy_dv.yaml b/pipelines/deploy_dv.yaml index 5e6b8e98..ccd0c260 100644 --- a/pipelines/deploy_dv.yaml +++ b/pipelines/deploy_dv.yaml @@ -20,13 +20,13 @@ variables: DB_HOST=pgsql-spib-sdse-hopic-dv.postgres.database.azure.com DB_PORT=5432 DB_SSLMODE=require - SECRET_KEY=@Microsoft.KeyVault(SecretUri=https://kvspibsdsehopicdv.vault.azure.net/secrets/hopic-django-secret-key/) + SECRET_KEY='@Microsoft.KeyVault(SecretUri=https://kvspibsdsehopicdv.vault.azure.net/secrets/hopic-django-secret-key/)' WEBSITES_PORT=8000 LATEST_COMMIT_SHA=$(Build.SourceVersion) ALLOWED_HOSTS=was-spib-sdse-hopic-dv.azurewebsites.net CSRF_TRUSTED_ORIGINS=https://was-spib-sdse-hopic-dv.azurewebsites.net DB_NAME=hopicdb - DB_PASSWORD=@Microsoft.KeyVault(SecretUri=https://kvspibsdsehopicdv.vault.azure.net/secrets/hopicapp-pgsql-password/) + DB_PASSWORD='@Microsoft.KeyVault(SecretUri=https://kvspibsdsehopicdv.vault.azure.net/secrets/hopicapp-pgsql-password/)' DB_USER=hopicapp ENV=dev AZCOPY_AUTO_LOGIN_TYPE=MSI From 95aa546d85643d3785c5a9017106fa70c00db286 Mon Sep 17 00:00:00 2001 From: Keenan Date: Tue, 28 Oct 2025 14:07:10 -0400 Subject: [PATCH 06/18] change CSRF_TRUSTED_ORIGINS --- server/server/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/server/settings.py b/server/server/settings.py index 03d2bf47..551fece3 100644 --- a/server/server/settings.py +++ b/server/server/settings.py @@ -96,7 +96,7 @@ # Additional CORS allowed and CSRF trusted origins should be empty until if/when the app # is serving a REST/GraphQL API for external consumption CORS_ALLOWED_ORIGINS = [] -CSRF_TRUSTED_ORIGINS = [] +CSRF_TRUSTED_ORIGINS = config("CSRF_TRUSTED_ORIGINS", default="", cast=Csv()) # Prod only security settings if not IS_DEV: From 675525a0d371f2b5cc64bde93f62d101d6cb7712 Mon Sep 17 00:00:00 2001 From: Keenan Date: Tue, 28 Oct 2025 14:08:05 -0400 Subject: [PATCH 07/18] change csrf back for now --- server/server/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/server/settings.py b/server/server/settings.py index 551fece3..03d2bf47 100644 --- a/server/server/settings.py +++ b/server/server/settings.py @@ -96,7 +96,7 @@ # Additional CORS allowed and CSRF trusted origins should be empty until if/when the app # is serving a REST/GraphQL API for external consumption CORS_ALLOWED_ORIGINS = [] -CSRF_TRUSTED_ORIGINS = config("CSRF_TRUSTED_ORIGINS", default="", cast=Csv()) +CSRF_TRUSTED_ORIGINS = [] # Prod only security settings if not IS_DEV: From 6f7261704d2feadcfe5e8adfc00f3ac8ea1bbcaf Mon Sep 17 00:00:00 2001 From: Keenan Date: Wed, 29 Oct 2025 11:45:07 -0400 Subject: [PATCH 08/18] make CSRF_TRUSTED_ORIGINS an env var --- server/server/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/server/settings.py b/server/server/settings.py index 03d2bf47..551fece3 100644 --- a/server/server/settings.py +++ b/server/server/settings.py @@ -96,7 +96,7 @@ # Additional CORS allowed and CSRF trusted origins should be empty until if/when the app # is serving a REST/GraphQL API for external consumption CORS_ALLOWED_ORIGINS = [] -CSRF_TRUSTED_ORIGINS = [] +CSRF_TRUSTED_ORIGINS = config("CSRF_TRUSTED_ORIGINS", default="", cast=Csv()) # Prod only security settings if not IS_DEV: From 82cb74819e0bf1ae27bb313448cb630fb48d73bf Mon Sep 17 00:00:00 2001 From: Keenan Date: Fri, 31 Oct 2025 10:04:16 -0400 Subject: [PATCH 09/18] updates --- pipelines/deploy_dv.yaml | 2 +- server/server/settings.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelines/deploy_dv.yaml b/pipelines/deploy_dv.yaml index ccd0c260..15fb4e2c 100644 --- a/pipelines/deploy_dv.yaml +++ b/pipelines/deploy_dv.yaml @@ -25,7 +25,7 @@ variables: LATEST_COMMIT_SHA=$(Build.SourceVersion) ALLOWED_HOSTS=was-spib-sdse-hopic-dv.azurewebsites.net CSRF_TRUSTED_ORIGINS=https://was-spib-sdse-hopic-dv.azurewebsites.net - DB_NAME=hopicdb + DB_NAME=hopicdb_migration_test DB_PASSWORD='@Microsoft.KeyVault(SecretUri=https://kvspibsdsehopicdv.vault.azure.net/secrets/hopicapp-pgsql-password/)' DB_USER=hopicapp ENV=dev diff --git a/server/server/settings.py b/server/server/settings.py index 551fece3..7b000370 100644 --- a/server/server/settings.py +++ b/server/server/settings.py @@ -100,7 +100,7 @@ # Prod only security settings if not IS_DEV: - SECURE_SSL_REDIRECT = True + # SECURE_SSL_REDIRECT = True # For K8S Health Check SECURE_REDIRECT_EXEMPT = [ "^healthcheck/", From 413406c24e84f875c75311b223ec555e050b7616 Mon Sep 17 00:00:00 2001 From: Keenan Date: Fri, 31 Oct 2025 10:14:26 -0400 Subject: [PATCH 10/18] put SECURE_SSL_REDIRECT back --- server/server/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/server/settings.py b/server/server/settings.py index 7b000370..551fece3 100644 --- a/server/server/settings.py +++ b/server/server/settings.py @@ -100,7 +100,7 @@ # Prod only security settings if not IS_DEV: - # SECURE_SSL_REDIRECT = True + SECURE_SSL_REDIRECT = True # For K8S Health Check SECURE_REDIRECT_EXEMPT = [ "^healthcheck/", From 15fdc1af7388fd1f5c9b616312fbc3b1f38d1e24 Mon Sep 17 00:00:00 2001 From: Keenan Date: Fri, 31 Oct 2025 10:47:01 -0400 Subject: [PATCH 11/18] remove get_project_config --- server/server/settings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/server/settings.py b/server/server/settings.py index 551fece3..d37826e1 100644 --- a/server/server/settings.py +++ b/server/server/settings.py @@ -16,7 +16,7 @@ from django.urls import reverse_lazy -from decouple import Csv +from decouple import Csv, config from server.config_util import get_project_config, is_running_tests @@ -25,7 +25,7 @@ BASE_DIR = Path(__file__).resolve().parent.parent -config = get_project_config() +# config = get_project_config() IS_LOCAL = config("IS_LOCAL", cast=bool, default=False) IS_DEV = config("IS_DEV", cast=bool, default=False) From 90bbe3b25e32fafd372ff87545b2c42f324fda31 Mon Sep 17 00:00:00 2001 From: Keenan Date: Wed, 11 Feb 2026 09:33:38 -0500 Subject: [PATCH 12/18] update dockerfile --- server/Dockerfile.prod | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/server/Dockerfile.prod b/server/Dockerfile.prod index f01e89d1..7a8539a8 100644 --- a/server/Dockerfile.prod +++ b/server/Dockerfile.prod @@ -74,12 +74,10 @@ COPY --chown=$APP_USER:$APP_NAME --from=builder /usr/src/app/wheels $WHEELDIR # Create and configure virtual environment RUN python -m venv $VIRTUALENV && \ + $VIRTUALENV/bin/pip install --upgrade pip setuptools && \ $VIRTUALENV/bin/pip install --no-cache-dir $WHEELDIR/* && \ rm -rf $WHEELDIR -# Make sure setuptools is always up to date -RUN pip install --upgrade setuptools - # Copy project files COPY --chown=$APP_USER:$APP_NAME server/ $APP_HOME From f57a43ce55da40d5cc635395075823e36a55e740 Mon Sep 17 00:00:00 2001 From: Keenan Date: Wed, 11 Feb 2026 09:41:59 -0500 Subject: [PATCH 13/18] try changing order --- server/Dockerfile.prod | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/Dockerfile.prod b/server/Dockerfile.prod index 7a8539a8..683a1433 100644 --- a/server/Dockerfile.prod +++ b/server/Dockerfile.prod @@ -74,8 +74,9 @@ COPY --chown=$APP_USER:$APP_NAME --from=builder /usr/src/app/wheels $WHEELDIR # Create and configure virtual environment RUN python -m venv $VIRTUALENV && \ - $VIRTUALENV/bin/pip install --upgrade pip setuptools && \ + $VIRTUALENV/bin/pip install --upgrade pip && \ $VIRTUALENV/bin/pip install --no-cache-dir $WHEELDIR/* && \ + $VIRTUALENV/bin/pip install --upgrade setuptools && \ rm -rf $WHEELDIR # Copy project files From 038905e67598deeca007befdc49dc5cfe06d5b42 Mon Sep 17 00:00:00 2001 From: Keenan Date: Wed, 11 Feb 2026 09:45:10 -0500 Subject: [PATCH 14/18] force re-install --- server/Dockerfile.prod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/Dockerfile.prod b/server/Dockerfile.prod index 683a1433..c7c5e913 100644 --- a/server/Dockerfile.prod +++ b/server/Dockerfile.prod @@ -76,7 +76,7 @@ COPY --chown=$APP_USER:$APP_NAME --from=builder /usr/src/app/wheels $WHEELDIR RUN python -m venv $VIRTUALENV && \ $VIRTUALENV/bin/pip install --upgrade pip && \ $VIRTUALENV/bin/pip install --no-cache-dir $WHEELDIR/* && \ - $VIRTUALENV/bin/pip install --upgrade setuptools && \ + $VIRTUALENV/bin/pip install --force-reinstall setuptools && \ rm -rf $WHEELDIR # Copy project files From 4ccf4a236a228e4a0e42aa68b1973852b271347d Mon Sep 17 00:00:00 2001 From: Keenan Date: Wed, 11 Feb 2026 09:49:11 -0500 Subject: [PATCH 15/18] remove the open telementary call --- server/manage.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/server/manage.py b/server/manage.py index 96a699f6..a7eae46d 100755 --- a/server/manage.py +++ b/server/manage.py @@ -5,15 +5,12 @@ import sys from server.config_util import get_project_config -from server.open_telemetry_util import instrument_app_for_open_telemetry def main(): """Run administrative tasks.""" os.environ.setdefault("DJANGO_SETTINGS_MODULE", "server.settings") - instrument_app_for_open_telemetry() - try: from django.core.management import execute_from_command_line except ImportError as exc: From bd012180c1d2b4f85e1a43388dc41727be8cb52f Mon Sep 17 00:00:00 2001 From: Keenan Date: Wed, 11 Feb 2026 09:58:20 -0500 Subject: [PATCH 16/18] delete open telementray stuff --- server/server/open_telemetry_util.py | 195 --------------------------- 1 file changed, 195 deletions(-) delete mode 100644 server/server/open_telemetry_util.py diff --git a/server/server/open_telemetry_util.py b/server/server/open_telemetry_util.py deleted file mode 100644 index 8a53ca7e..00000000 --- a/server/server/open_telemetry_util.py +++ /dev/null @@ -1,195 +0,0 @@ -import logging -import os -import sys -import time - -import requests -from opentelemetry import trace -from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter -from opentelemetry.instrumentation.django import DjangoInstrumentor -from opentelemetry.instrumentation.psycopg2 import Psycopg2Instrumentor -from opentelemetry.propagate import set_global_textmap -from opentelemetry.propagators.cloud_trace_propagator import ( - CloudTraceFormatPropagator, -) -from opentelemetry.resourcedetector.gcp_resource_detector import ( - GoogleCloudResourceDetector, -) -from opentelemetry.sdk.resources import ProcessResourceDetector -from opentelemetry.sdk.trace import TracerProvider, sampling -from opentelemetry.sdk.trace.export import ( - BatchSpanProcessor, - ConsoleSpanExporter, -) -from phac_aspc.django.helpers.logging.utils import ( - add_fields_to_all_logs_for_current_request, -) - -from server.config_util import get_project_config, is_running_tests - -logger = logging.getLogger() - - -def instrument_app_for_open_telemetry(): - config = get_project_config() - - IS_LOCAL = config("IS_LOCAL", cast=bool, default=False) - FORCE_LOCAL_OTEL_BEHAVIOUR = config( - "FORCE_LOCAL_OTEL_BEHAVIOUR", cast=bool, default=False - ) - - if IS_LOCAL or FORCE_LOCAL_OTEL_BEHAVIOUR: - project_id = "local" - - OUTPUT_TELEMETRY_TO_CONSOLE = config( - "OUTPUT_TELEMETRY_TO_CONSOLE", cast=bool, default=False - ) - - span_exporter = ConsoleSpanExporter( - out=( - sys.stdout - if OUTPUT_TELEMETRY_TO_CONSOLE - else open(os.devnull, "w") - ) - ) - - resource = ProcessResourceDetector(raise_on_error=True).detect() - else: - # In Google Cloud, we must request resources information from a metadata server (metadata.google.internal). - # In theory this is consistently reachable across GCP solutions (Cloud Run, App Engine, GKE, etc), - # but in practice there's a big gotcha in GKE. New pods do not immediately have access to the metadata - # server, and may not for a "few seconds" according to the docs linked below. - # https://cloud.google.com/kubernetes-engine/docs/concepts/workload-identity#project_metadata - # - # Open telemetry "resource" information is used to identify the source of a span, and is imutable once - # the corresponding trace provider has been initialized... which all needs to happen before the Django server - # is initialized. This is all not ideal for cold start times! Not the slowest part though, running collect static - # and migrations on pod start is a bigger slow down right now. Retrying the metadata.google.internal request with - # a short linear delay is the best solution for now. Don't bother with exponential backoff because we want to know - # asap and aren't worried about load on the metadata server (in theory; something to keep an eye on in practice). - # - # Note: we directly call metadata.google.internal below for the project ID, which _could_ be passed as - # an env var, _but_ the GoogleCloudResourceDetector call following that also requires metadata server - # access. GoogleCloudResourceDetector doesn't have the logic to wait for the metadata server so we need to - # implement logic to wait for metadata.google.internal access our selves either way. - retry_limit = 12 - retry_delay = 0.25 - - logger.info("Attempting to connect to Google Cloud metadata server...") - project_id = None - for retry_count in range(retry_limit): - try: - project_id = requests.get( - "http://metadata.google.internal/computeMetadata/v1/project/project-id", - headers={"Metadata-Flavor": "Google"}, - ).text - - break - except requests.ConnectionError as error: - if retry_count < retry_limit - 1: - time.sleep(retry_delay) - else: - logger.error(error) - logger.info("Metadata server reachable!") - - span_exporter = CloudTraceSpanExporter( - project_id=project_id, - # resource labels aren't exported in GCP by default, as the labels aren't actually supported - # by Cloud Trace. This regex pattern is used to select resource labels to pick out and convert - # to span attributes - resource_regex=".*", - ) - - # WARNING: you might see examples wrapping a list of resource detectors in - # `opentelemetry.sdk.resources.get_aggregated_resources`. This calls detect() and - # merges the results for you BUT it uses thread pools and may not be suited for all - # prod environments (Cloud Run, small k8s pods, etc). - # Manually call detect and merge as needed instead, not a big deal, this only happens once - # and isn't CPU intensive at all. - # Note: for merge, the order matters with priority given to preceding resource objects - if not project_id: - resource = ProcessResourceDetector(raise_on_error=True).detect() - else: - resource = ( - GoogleCloudResourceDetector(raise_on_error=True).detect() - ).merge(ProcessResourceDetector(raise_on_error=True).detect()) - - # Propagate the X-Cloud-Trace-Context header if present. Add it otherwise - set_global_textmap(CloudTraceFormatPropagator()) - - # A BatchSpanProcessor is significantly better for performance, but has some caveats: - # 1) gunicorn caveat: it uses a worker thread, which means instrumentation calls must happen post-gunicorn - # worker fork, or else multiple gunicron app worker threads will attempt to share one BatchSpanProcessor - # worker (and trip over eachother's process locks). Does not apply if gunicorn workers = threads = 1 - # 2) Cloud Run caveat: GCP docs say NOT to use BatchSpanProcessor in Cloud Run, as Cloud Run "does not - # support background processes". That is a simplification though, what they really mean is that a Cloud Run - # container will lose it's CPU when not actively processing a request, so background processes not tied to - # request handling may not have a chance to immediately finish all their work without interuption. They can - # still resume in the background when the container next receives a request. In the case that a container is - # terminated before receiving a new request, the container receives a SIGTERM signal and 10 seconds of grace time - # with a CPU to wrap things up (https://cloud.google.com/run/docs/container-contract#lifecycle-services). - # This caveat may apply in other auto-scalling environments - # The returned `flush_telemetry_callback` can be used to manage this if your environment requires. - span_processor = BatchSpanProcessor(span_exporter) - - tracer_provider = TracerProvider( - active_span_processor=span_processor, - resource=resource, - # Always sample, even if propagating a trace that wasn't sampled in earlier stages (load balancer, etc). - # This could be too noisy on a busier app, but should be fine for CPHO's expected usage - sampler=sampling.ALWAYS_ON, - ) - - def associate_request_logs_to_telemetry(span, request): - add_fields_to_all_logs_for_current_request( - { - # see https://cloud.google.com/trace/docs/trace-log-integration#associating - # and https://cloud.google.com/logging/docs/structured-logging#special-payload-fields - "logging.googleapis.com/trace": ( - f"projects/{project_id}/traces/{trace.span.format_trace_id(span.get_span_context().trace_id)}" - ), - "logging.googleapis.com/spanId": ( - trace.span.format_span_id(span.get_span_context().span_id) - ), - # This one's awkward, see: https://www.w3.org/TR/trace-context/#sampled-flag - # Right now the only trace flag is the "sampled flag", so `trace_flags` is either 0 or 1; - # the "correct" way to get `trace_sampled` would be `span.get_span_context().trace_flags == 1`, - # but that seems fragile and might not pick up on overrides, like sampler=sampling.ALWAYS_ON? - # `span.is_recording()` doesn't indicate that the _whole_ trace is sampled, but it should - # indicate that the current span within the trace is reporting/being sampled, which is what this - # log field is actually intended for - "logging.googleapis.com/trace_sampled": span.is_recording(), - } - ) - - Psycopg2Instrumentor().instrument( - tracer_provider=tracer_provider, - enable_commenter=True, - commenter_options={}, - # This instrumentor expects the `psycopg2` package. This repo uses the `psycopg2-binary` package. - # Compatible with both, but need to disable the instrumentor's dependency checking - skip_dep_check=True, - ) - DjangoInstrumentor().instrument( - tracer_provider=tracer_provider, - meter_provider=None, # TODO - request_hook=associate_request_logs_to_telemetry, - # GOTCHA: in Cloud Run, if we disable our own instrumentation, I believe it just falls back to using - # the default tracing Google has on Cloud Run instance... so you'll still get generic spans for excluded routes. - # The default tracing is much lighter weight, so disabling does server _some_ purpose. This will also work as - # expected in non-Cloud Run deployments - excluded_urls=config( - "OTEL_PYTHON_DJANGO_EXCLUDED_URLS", default="healthcheck" - ), - # Confusingly named (typo included), when True this actually adds a sqlcommenter django middleware. - # When enabled, trace metadata is inserted as comments in each SQL query, allowing the corresponding logging - # output on the DB side to be associated back to the initiating trace. - # Currently disabled; may have a performance impact and, more importantly, currently causes test_infobase_export.py to fail - is_sql_commentor_enabled=False, - ) - - def flush_telemetry_callback(): - tracer_provider.force_flush() - tracer_provider.shutdown() - - return flush_telemetry_callback From 56f2a253df7d2444171b440d53bc16e119639216 Mon Sep 17 00:00:00 2001 From: Keenan Date: Wed, 11 Feb 2026 10:07:08 -0500 Subject: [PATCH 17/18] remove telementary gunicorn --- server/gunicorn.conf.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/server/gunicorn.conf.py b/server/gunicorn.conf.py index 0b3f2d78..f32fff92 100644 --- a/server/gunicorn.conf.py +++ b/server/gunicorn.conf.py @@ -3,8 +3,6 @@ import structlog -from server.open_telemetry_util import instrument_app_for_open_telemetry - # See https://cloud.google.com/run/docs/tips/python#optimize_gunicorn PORT = os.getenv("PORT", "8080") @@ -26,7 +24,7 @@ def post_fork(server, worker): # If NOT using BatchSpanProcessor (likely a bad idea, it's much more performant at run time) you can move instrumentation to wsgi.py and enable preload_app os.environ.setdefault("DJANGO_SETTINGS_MODULE", "server.settings") - worker.flush_telemetry_callback = instrument_app_for_open_telemetry() + # worker.flush_telemetry_callback = instrument_app_for_open_telemetry() def worker_exit(server, worker): From 5dcf4f4b5e60cbba0b3fe1d9cfe0bcfda818abc7 Mon Sep 17 00:00:00 2001 From: Keenan Date: Wed, 11 Feb 2026 10:18:52 -0500 Subject: [PATCH 18/18] change db --- pipelines/deploy_dv.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/deploy_dv.yaml b/pipelines/deploy_dv.yaml index 15fb4e2c..7be7bf5a 100644 --- a/pipelines/deploy_dv.yaml +++ b/pipelines/deploy_dv.yaml @@ -25,7 +25,7 @@ variables: LATEST_COMMIT_SHA=$(Build.SourceVersion) ALLOWED_HOSTS=was-spib-sdse-hopic-dv.azurewebsites.net CSRF_TRUSTED_ORIGINS=https://was-spib-sdse-hopic-dv.azurewebsites.net - DB_NAME=hopicdb_migration_test + DB_NAME=hopicdb_migration DB_PASSWORD='@Microsoft.KeyVault(SecretUri=https://kvspibsdsehopicdv.vault.azure.net/secrets/hopicapp-pgsql-password/)' DB_USER=hopicapp ENV=dev