From dc83b98549e8c1a9b43ccd932719e7f1236e32f0 Mon Sep 17 00:00:00 2001 From: Maxence Haouari Date: Sun, 26 Apr 2026 20:11:22 +0200 Subject: [PATCH 1/2] =?UTF-8?q?=E2=9C=A8(helm)=20add=20extra=20Helm=20char?= =?UTF-8?q?t=20=20configurations=20to=20fix=20Tilt=20setup?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor backend environment variables to use secret references Update LLM configuration file path --- .dockerignore | 2 + CHANGELOG.md | 2 + Makefile | 11 +- README.md | 2 + bin/Tiltfile | 25 ++- docs/examples/conversations.values.yaml | 2 +- docs/tilt.md | 91 +++++++++ env.d/development/kube-secret.dist | 6 + .../values.conversations.yaml.gotmpl | 2 +- .../dev/values.conversations.yaml.gotmpl | 50 +++-- src/helm/extra/Chart.yaml | 5 + .../extra/templates/kc-postgresql-sts.yaml | 71 +++++++ src/helm/extra/templates/keycloak-sts.yaml | 113 +++++++++++ src/helm/extra/templates/minio.yaml | 180 ++++++++++++++++++ src/helm/extra/templates/postgresql-sts.yaml | 70 +++++++ src/helm/helmfile.yaml.gotmpl | 85 +-------- 16 files changed, 616 insertions(+), 101 deletions(-) create mode 100644 docs/tilt.md create mode 100644 env.d/development/kube-secret.dist create mode 100644 src/helm/extra/Chart.yaml create mode 100644 src/helm/extra/templates/kc-postgresql-sts.yaml create mode 100644 src/helm/extra/templates/keycloak-sts.yaml create mode 100644 src/helm/extra/templates/minio.yaml create mode 100644 src/helm/extra/templates/postgresql-sts.yaml diff --git a/.dockerignore b/.dockerignore index 19761902..c2ae78b0 100644 --- a/.dockerignore +++ b/.dockerignore @@ -5,6 +5,8 @@ __pycache__ **/*.pyc venv .venv +**/venv +**/.venv # System-specific files .DS_Store diff --git a/CHANGELOG.md b/CHANGELOG.md index 6bcb72fb..a78c5d6c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,8 @@ and this project adheres to - πŸ’„(ui) review ui for part of the project - πŸ›(fix) Fix streaming crash with OpenAI-compatible APIs - πŸ›(fix) strip thinking part for models without reasoning support +- ✨(dev) setup Tilt for local development + ## [0.0.15] - 2026-03-31 diff --git a/Makefile b/Makefile index b65bfffd..d090cf2d 100644 --- a/Makefile +++ b/Makefile @@ -368,9 +368,13 @@ build-k8s-cluster: ## build the kubernetes cluster using kind ./bin/start-kind.sh .PHONY: build-k8s-cluster -start-tilt: ## start the kubernetes cluster using kind - tilt up -f ./bin/Tiltfile -.PHONY: build-k8s-cluster +start-tilt: ## start Tilt against the conversations kind cluster + tilt up --namespace=conversations -f ./bin/Tiltfile +.PHONY: start-tilt + +stop-tilt: ## stop Tilt and leave the kind cluster running + tilt down --namespace=conversations -f ./bin/Tiltfile +.PHONY: stop-tilt bump-packages-version: VERSION_TYPE ?= minor bump-packages-version: ## bump the version of the project - VERSION_TYPE can be "major", "minor", "patch" @@ -381,3 +385,4 @@ bump-packages-version: ## bump the version of the project - VERSION_TYPE can be cd ./src/frontend/packages/eslint-config-conversations/ && yarn version --no-git-tag-version --$(VERSION_TYPE) cd ./src/frontend/packages/i18n/ && yarn version --no-git-tag-version --$(VERSION_TYPE) .PHONY: bump-packages-version + diff --git a/README.md b/README.md index d8e0da08..9e814842 100644 --- a/README.md +++ b/README.md @@ -168,6 +168,8 @@ $ make superuser To run the app locally with breakpoints, read: [Debug Mode](docs/debug_mode.md). +To run the app locally with Tilt instead of docker compose, read: [Running with Tilt](docs/tilt.md). + ## Documentation πŸ“š Additional documentation is available in the `docs/` directory: diff --git a/bin/Tiltfile b/bin/Tiltfile index 78a3541b..e774a5da 100644 --- a/bin/Tiltfile +++ b/bin/Tiltfile @@ -1,7 +1,13 @@ load('ext://uibutton', 'cmd_button', 'bool_input', 'location') load('ext://namespace', 'namespace_create', 'namespace_inject') +load('ext://secret', 'secret_yaml_generic') namespace_create('conversations') +DEV_ENV = os.getenv('DEV_ENV', 'dev') + +def clean_old_images(image_name): + local('docker images -q %s | tail -n +2 | xargs -r docker rmi' % image_name) + docker_build( 'localhost:5001/conversations-backend:latest', context='..', @@ -10,8 +16,13 @@ docker_build( target = 'backend-production', live_update=[ sync('../src/backend', '/app'), + run( + 'pip install -r /app/requirements.txt', + trigger=['./src/backend/requirements.txt'] + ) ] ) +clean_old_images('localhost:5001/conversations-backend') docker_build( 'localhost:5001/conversations-frontend:latest', @@ -23,11 +34,21 @@ docker_build( sync('../src/frontend', '/home/frontend'), ] ) +clean_old_images('localhost:5001/conversations-frontend') + +k8s_yaml(secret_yaml_generic( + name='secret-dev', + from_env_file='../env.d/development/kube-secret' +)) + +k8s_yaml(local('cd ../src/helm && helmfile -n conversations -e %s template .' % DEV_ENV)) -k8s_resource('conversations-backend-migrate', resource_deps=['postgres-postgresql']) +k8s_resource('minio', port_forwards=['9000:9000', '9001:9001']) +k8s_resource('minio-bucket', resource_deps=['minio']) +k8s_resource('conversations-backend-migrate', resource_deps=['postgresql', 'minio', 'redis']) k8s_resource('conversations-backend-createsuperuser', resource_deps=['conversations-backend-migrate']) k8s_resource('conversations-backend', resource_deps=['conversations-backend-migrate']) -k8s_yaml(local('cd ../src/helm && helmfile -n conversations -e dev template .')) +k8s_resource('keycloak', resource_deps=['kc-postgresql']) migration = ''' set -eu diff --git a/docs/examples/conversations.values.yaml b/docs/examples/conversations.values.yaml index 5ac209b8..56c00d46 100644 --- a/docs/examples/conversations.values.yaml +++ b/docs/examples/conversations.values.yaml @@ -80,7 +80,7 @@ backend: # Extra volume to manage our local custom CA and avoid to set ssl_verify: false extraVolumeMounts: - name: certs - mountPath: /usr/local/lib/python3.12/site-packages/certifi/cacert.pem + mountPath: /app/.venv/lib/python3.13/site-packages/certifi/cacert.pem subPath: cacert.pem # Extra volume to manage our local custom CA and avoid to set ssl_verify: false diff --git a/docs/tilt.md b/docs/tilt.md new file mode 100644 index 00000000..03e9acc8 --- /dev/null +++ b/docs/tilt.md @@ -0,0 +1,91 @@ +# Running the app locally with Tilt + +[Tilt](https://tilt.dev) orchestrates the local Kubernetes development environment: it builds Docker images, deploys all services via Helm, and keeps everything in sync as you edit code. + +## Prerequisites + +Install the following tools before getting started: + +- [Docker](https://docs.docker.com/get-docker/) +- [Kind](https://kind.sigs.k8s.io/docs/user/quick-start/#installation) β€” local Kubernetes cluster +- [kubectl](https://kubernetes.io/docs/tasks/tools/) +- [Helm](https://helm.sh/docs/intro/install/) + [Helmfile](https://helmfile.readthedocs.io/en/latest/#installation) +- [mkcert](https://github.com/FiloSottile/mkcert#installation) β€” local TLS certificates +- [Tilt](https://docs.tilt.dev/install.html) + +## Step 1 β€” Create the Kubernetes cluster + +```bash +make build-k8s-cluster +``` + +This runs `bin/start-kind.sh`, which: + +1. Creates a local Docker registry at `localhost:5001` +2. Creates a Kind cluster named `conversations` +3. Installs the ingress-nginx controller +4. Generates mkcert TLS certificates for `*.127.0.0.1.nip.io` + +All local domains resolve to `127.0.0.1` via [nip.io](https://nip.io) β€” no `/etc/hosts` edits needed. + +## Step 2 β€” Configure secrets + +Copy the secrets template and fill in the required values: + +```bash +cp env.d/development/kube-secret.dist env.d/development/kube-secret +``` + +Then edit `env.d/development/kube-secret`: + +| Variable | Required | Description | +|---|---|---| +| `AI_BASE_URL` | Yes | LLM provider base URL | +| `AI_API_KEY` | Yes | LLM provider API key | +| `ALBERT_API_URL` | No | Albert API URL (if using Albert provider) | +| `ALBERT_API_KEY` | No | Albert API key | +| `BRAVE_API_KEY` | No | Brave Search API key (web search tool) | + +## Step 3 β€” Start the app + +```bash +make start-tilt +``` + +Tilt will: + +1. Build the backend and frontend Docker images and push them to `localhost:5001` +2. Deploy supporting services (PostgreSQL, Keycloak, MinIO, Redis) via the `extra` Helm chart +3. Deploy the backend and frontend via the `conversations` Helm chart +4. Run database migrations and create a superuser (`admin@example.com` / `admin`) +5. Watch source files and sync changes live + +The Tilt dashboard opens at `http://localhost:10350`. Wait for all resources to turn green before accessing the app. + +## Accessing the services + +| Service | URL | Credentials | +|---|---|---| +| App | `https://conversations.127.0.0.1.nip.io` | via Keycloak | +| Keycloak admin | `https://conversations-keycloak.127.0.0.1.nip.io` | `su` / `su` | +| MinIO console | `http://localhost:9001` | `conversations` / `password` | +| Tilt dashboard | `http://localhost:10350` | β€” | + +## Django management commands + +The Tilt dashboard exposes two buttons on the `conversations-backend` resource: + +- **Run makemigration** β€” runs `python manage.py makemigrations` +- **Run database migration** β€” runs `python manage.py migrate --no-input` + +## Stopping + +```bash +make stop-tilt +``` + +This shuts down Tilt but leaves the Kind cluster running. To also delete the cluster: + +```bash +kind delete cluster --name conversations +``` diff --git a/env.d/development/kube-secret.dist b/env.d/development/kube-secret.dist new file mode 100644 index 00000000..0f470ac2 --- /dev/null +++ b/env.d/development/kube-secret.dist @@ -0,0 +1,6 @@ +# Secrets β€” copy this file to kube-secret and fill in real values +AI_BASE_URL=changeme +AI_API_KEY=changeme +ALBERT_API_URL=changeme +ALBERT_API_KEY=changeme +BRAVE_API_KEY=changeme \ No newline at end of file diff --git a/src/helm/env.d/dev-staging/values.conversations.yaml.gotmpl b/src/helm/env.d/dev-staging/values.conversations.yaml.gotmpl index a3b0aef4..cbb2dd0c 100644 --- a/src/helm/env.d/dev-staging/values.conversations.yaml.gotmpl +++ b/src/helm/env.d/dev-staging/values.conversations.yaml.gotmpl @@ -90,7 +90,7 @@ backend: # Extra volume mounts to manage our local custom CA and avoid to set ssl_verify: false extraVolumeMounts: - name: certs - mountPath: /usr/local/lib/python3.12/site-packages/certifi/cacert.pem + mountPath: /app/.venv/lib/python3.13/site-packages/certifi/cacert.pem subPath: cacert.pem # Extra volumes to manage our local custom CA and avoid to set ssl_verify: false diff --git a/src/helm/env.d/dev/values.conversations.yaml.gotmpl b/src/helm/env.d/dev/values.conversations.yaml.gotmpl index bed47233..a83f6e54 100644 --- a/src/helm/env.d/dev/values.conversations.yaml.gotmpl +++ b/src/helm/env.d/dev/values.conversations.yaml.gotmpl @@ -1,8 +1,5 @@ -djangoSecretKey: &djangoSecretKey "lkjsdlfkjsldkfjslkdfjslkdjfslkdjf" djangoSuperUserEmail: admin@example.com djangoSuperUserPass: admin -aiApiKey: changeme -aiBaseUrl: changeme oidc: clientId: conversations clientSecret: ThisIsAnExampleKeyForDevPurposeOnly @@ -15,13 +12,38 @@ image: backend: replicas: 1 envVars: + DJANGO_SECRET_KEY: ThisIsAnExampleKeyForDevPurposeOnly + OIDC_RP_CLIENT_SECRET: ThisIsAnExampleKeyForDevPurposeOnly + AWS_S3_SECRET_ACCESS_KEY: password + AI_BASE_URL: + secretKeyRef: + name: secret-dev + key: AI_BASE_URL + AI_API_KEY: + secretKeyRef: + name: secret-dev + key: AI_API_KEY + ALBERT_API_URL: + secretKeyRef: + name: secret-dev + key: ALBERT_API_URL + ALBERT_API_KEY: + secretKeyRef: + name: secret-dev + key: ALBERT_API_KEY + BRAVE_API_KEY: + secretKeyRef: + name: secret-dev + key: BRAVE_API_KEY COLLABORATION_SERVER_SECRET: my-secret DJANGO_CSRF_TRUSTED_ORIGINS: https://conversations.127.0.0.1.nip.io DJANGO_CONFIGURATION: Feature - DJANGO_ALLOWED_HOSTS: conversations.127.0.0.1.nip.io - DJANGO_SECRET_KEY: *djangoSecretKey + DJANGO_ALLOWED_HOSTS: conversations.127.0.0.1.nip.io,conversations-backend.conversations.svc.cluster.local DJANGO_SETTINGS_MODULE: conversations.settings DJANGO_SUPERUSER_PASSWORD: admin + PYTHONPATH: /app + CONVERSATIONS_BASE_URL: https://conversations.127.0.0.1.nip.io + MEDIA_BASE_URL: https://conversations.127.0.0.1.nip.io/media DJANGO_EMAIL_BRAND_NAME: "La Suite NumΓ©rique" DJANGO_EMAIL_HOST: "maildev" DJANGO_EMAIL_LOGO_IMG: https://conversations.127.0.0.1.nip.io/assets/logo-suite-numerique.png @@ -38,13 +60,12 @@ backend: OIDC_OP_USER_ENDPOINT: https://conversations-keycloak.127.0.0.1.nip.io/realms/conversations/protocol/openid-connect/userinfo OIDC_OP_LOGOUT_ENDPOINT: https://conversations-keycloak.127.0.0.1.nip.io/realms/conversations/protocol/openid-connect/logout OIDC_RP_CLIENT_ID: conversations - OIDC_RP_CLIENT_SECRET: ThisIsAnExampleKeyForDevPurposeOnly OIDC_RP_SIGN_ALGO: RS256 OIDC_RP_SCOPES: "openid email" LOGIN_REDIRECT_URL: https://conversations.127.0.0.1.nip.io LOGIN_REDIRECT_URL_FAILURE: https://conversations.127.0.0.1.nip.io LOGOUT_REDIRECT_URL: https://conversations.127.0.0.1.nip.io - DB_HOST: postgres-postgresql + DB_HOST: postgres DB_NAME: conversations DB_USER: dinum DB_PASSWORD: pass @@ -54,12 +75,16 @@ backend: POSTGRES_PASSWORD: pass REDIS_URL: redis://default:pass@redis-master:6379/1 DJANGO_CELERY_BROKER_URL: redis://default:pass@redis-master:6379/1 + AI_MODEL: mistral-medium-2508 AWS_S3_ENDPOINT_URL: http://minio.conversations.svc.cluster.local:9000 - AWS_S3_ACCESS_KEY_ID: root - AWS_S3_SECRET_ACCESS_KEY: password + AWS_S3_ACCESS_KEY_ID: conversations + AWS_S3_DOMAIN_REPLACE: https://minio-conversations.127.0.0.1.nip.io AWS_STORAGE_BUCKET_NAME: conversations-media-storage STORAGES_STATICFILES_BACKEND: django.contrib.staticfiles.storage.StaticFilesStorage - CACHES_KEY_PREFIX: "{{ now | unixEpoch }}" + CACHES_KEY_PREFIX: conversations-dev + FEATURE_FLAG_WEB_SEARCH: ENABLED + FEATURE_FLAG_DOCUMENT_UPLOAD: ENABLED + LLM_CONFIGURATION_FILE_PATH: /app/conversations/configuration/llm/custom_llm_configuration.json migrate: command: - "/bin/sh" @@ -104,13 +129,12 @@ backend: file_content: {{ readFile "./configuration/theme/demo.json" }} llmConfiguration: - enabled: true - file_content: {{ readFile "./configuration/llm/demo.json" }} + enabled: false # Extra volume mounts to manage our local custom CA and avoid to set ssl_verify: false extraVolumeMounts: - name: certs - mountPath: /usr/local/lib/python3.13/site-packages/certifi/cacert.pem + mountPath: /app/.venv/lib/python3.13/site-packages/certifi/cacert.pem subPath: cacert.pem # Extra volumes to manage our local custom CA and avoid to set ssl_verify: false diff --git a/src/helm/extra/Chart.yaml b/src/helm/extra/Chart.yaml new file mode 100644 index 00000000..15632345 --- /dev/null +++ b/src/helm/extra/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v2 +name: extra +description: A Helm chart to add some manifests to conversations +type: application +version: 0.1.0 diff --git a/src/helm/extra/templates/kc-postgresql-sts.yaml b/src/helm/extra/templates/kc-postgresql-sts.yaml new file mode 100644 index 00000000..28177b94 --- /dev/null +++ b/src/helm/extra/templates/kc-postgresql-sts.yaml @@ -0,0 +1,71 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: kc-postgres + namespace: {{ .Release.Namespace | quote }} +spec: + ports: + - name: tcp-postgresql + port: 5432 + protocol: TCP + targetPort: tcp-postgresql + selector: + app.kubernetes.io/instance: extra + app.kubernetes.io/name: kc-postgresql + type: ClusterIP +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: kc-postgresql + namespace: {{ .Release.Namespace | quote }} +spec: + selector: + matchLabels: + app.kubernetes.io/instance: extra + app.kubernetes.io/name: kc-postgresql + serviceName: "kc-postgres" + replicas: 1 + template: + metadata: + labels: + app.kubernetes.io/instance: extra + app.kubernetes.io/name: kc-postgresql + spec: + terminationGracePeriodSeconds: 10 + containers: + - name: pg + image: postgres:16-alpine + ports: + - containerPort: 5432 + name: tcp-postgresql + env: + - name: POSTGRES_PASSWORD + value: keycloak + - name: POSTGRES_USER + value: keycloak + - name: POSTGRES_DB + value: keycloak + readinessProbe: + exec: + command: ["pg_isready", "-U", "keycloak"] + initialDelaySeconds: 5 + periodSeconds: 5 + livenessProbe: + exec: + command: ["pg_isready", "-U", "keycloak"] + initialDelaySeconds: 30 + periodSeconds: 10 + failureThreshold: 6 + volumeMounts: + - name: data + mountPath: /var/lib/postgresql + volumeClaimTemplates: + - metadata: + name: data + spec: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 1Gi diff --git a/src/helm/extra/templates/keycloak-sts.yaml b/src/helm/extra/templates/keycloak-sts.yaml new file mode 100644 index 00000000..25e54173 --- /dev/null +++ b/src/helm/extra/templates/keycloak-sts.yaml @@ -0,0 +1,113 @@ +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: keycloak + namespace: {{ .Release.Namespace | quote }} +spec: + rules: + - host: "conversations-keycloak.127.0.0.1.nip.io" + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: keycloak + port: + number: 8080 + tls: + - hosts: + - conversations-keycloak.127.0.0.1.nip.io + secretName: conversations-tls +--- +apiVersion: v1 +kind: Service +metadata: + name: keycloak + namespace: {{ .Release.Namespace | quote }} +spec: + ports: + - name: tcp-keycloak + port: 8080 + protocol: TCP + targetPort: tcp-keycloak + selector: + app.kubernetes.io/instance: extra + app.kubernetes.io/name: keycloak + type: ClusterIP +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: conversations-keycloak + namespace: {{ .Release.Namespace | quote }} +data: + conversations.json: | +{{ .Values.realm | indent 4 }} +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: keycloak + namespace: {{ .Release.Namespace | quote }} +spec: + selector: + matchLabels: + app.kubernetes.io/instance: extra + app.kubernetes.io/name: keycloak + serviceName: "keycloak" + replicas: 1 + template: + metadata: + labels: + app.kubernetes.io/instance: extra + app.kubernetes.io/name: keycloak + spec: + terminationGracePeriodSeconds: 10 + automountServiceAccountToken: false + containers: + - name: keycloak + image: quay.io/keycloak/keycloak:22.0.5 + args: + - start-dev + - --import-realm + - --proxy=edge + - --hostname=conversations-keycloak.127.0.0.1.nip.io + - --hostname-strict=false + - --hostname-strict-https=false + ports: + - containerPort: 8080 + name: tcp-keycloak + env: + - name: KEYCLOAK_ADMIN + value: su + - name: KEYCLOAK_ADMIN_PASSWORD + value: su + - name: KC_DB + value: postgres + - name: KC_DB_URL_HOST + value: kc-postgres + - name: KC_DB_URL_DATABASE + value: keycloak + - name: KC_DB_PASSWORD + value: keycloak + - name: KC_DB_USERNAME + value: keycloak + - name: KC_DB_SCHEMA + value: public + resources: + requests: + cpu: 500m + memory: 512Mi + limits: + cpu: "1" + memory: 1Gi + volumeMounts: + - name: realm + mountPath: "/opt/keycloak/data/import" + readOnly: true + volumes: + - name: realm + configMap: + name: conversations-keycloak diff --git a/src/helm/extra/templates/minio.yaml b/src/helm/extra/templates/minio.yaml new file mode 100644 index 00000000..5f3ea13b --- /dev/null +++ b/src/helm/extra/templates/minio.yaml @@ -0,0 +1,180 @@ +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: minio + namespace: {{ .Release.Namespace | quote }} + annotations: + nginx.ingress.kubernetes.io/proxy-body-size: 100m +spec: + rules: + - host: "minio-conversations.127.0.0.1.nip.io" + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: minio + port: + number: 9000 + tls: + - hosts: + - minio-conversations.127.0.0.1.nip.io + secretName: conversations-tls +--- +apiVersion: v1 +kind: Service +metadata: + name: minio + namespace: {{ .Release.Namespace | quote }} +spec: + ports: + - name: client + port: 9000 + protocol: TCP + targetPort: 9000 + - name: console + port: 9001 + protocol: TCP + targetPort: 9001 + selector: + app.kubernetes.io/instance: extra + app.kubernetes.io/name: minio + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: minio + namespace: {{ .Release.Namespace | quote }} + labels: + app.kubernetes.io/instance: extra + app.kubernetes.io/name: minio +spec: + selector: + matchLabels: + app.kubernetes.io/instance: extra + app.kubernetes.io/name: minio + replicas: 1 + template: + metadata: + labels: + app.kubernetes.io/instance: extra + app.kubernetes.io/name: minio + spec: + containers: + - name: minio + command: + - /bin/sh + - -c + - | + minio server --console-address :9001 /data + env: + - name: MINIO_ROOT_USER + value: conversations + - name: MINIO_ROOT_PASSWORD + value: password + image: "minio/minio:RELEASE.2025-09-07T16-13-09Z" + imagePullPolicy: IfNotPresent + ports: + - containerPort: 9000 + name: client + - containerPort: 9001 + name: console + volumeMounts: + - mountPath: /data + name: data + volumes: + - name: data + emptyDir: {} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: minio-bucket + namespace: {{ .Release.Namespace | quote }} +spec: + template: + spec: + containers: + - name: mc + image: minio/mc:RELEASE.2025-08-13T08-35-41Z + command: + - /bin/sh + - -c + - | + until /usr/bin/mc alias set conversations http://minio:9000 conversations password; do + echo "Waiting for minio..." + sleep 5 + done + /usr/bin/mc mb --ignore-existing conversations/conversations-media-storage + exit 0 + restartPolicy: Never + backoffLimit: 3 +--- +apiVersion: v1 +kind: Service +metadata: + name: redis-master + namespace: {{ .Release.Namespace | quote }} +spec: + ports: + - name: tcp-redis + port: 6379 + protocol: TCP + targetPort: tcp-redis + selector: + app.kubernetes.io/instance: extra + app.kubernetes.io/name: redis + type: ClusterIP +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: redis + namespace: {{ .Release.Namespace | quote }} +data: + redis.conf: | + bind 0.0.0.0 + port 6379 + user default on >pass ~* &* +@all +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: redis + namespace: {{ .Release.Namespace | quote }} + labels: + app.kubernetes.io/instance: extra + app.kubernetes.io/name: redis +spec: + selector: + matchLabels: + app.kubernetes.io/instance: extra + app.kubernetes.io/name: redis + replicas: 1 + template: + metadata: + labels: + app.kubernetes.io/instance: extra + app.kubernetes.io/name: redis + spec: + containers: + - name: redis + args: + - redis-server + - /usr/local/etc/redis/redis.conf + image: "redis:8.2-alpine" + imagePullPolicy: IfNotPresent + ports: + - containerPort: 6379 + name: tcp-redis + volumeMounts: + - name: redis + mountPath: "/usr/local/etc/redis" + readOnly: true + volumes: + - name: redis + configMap: + name: redis diff --git a/src/helm/extra/templates/postgresql-sts.yaml b/src/helm/extra/templates/postgresql-sts.yaml new file mode 100644 index 00000000..172b4f40 --- /dev/null +++ b/src/helm/extra/templates/postgresql-sts.yaml @@ -0,0 +1,70 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: postgres + namespace: {{ .Release.Namespace | quote }} +spec: + ports: + - name: tcp-postgresql + port: 5432 + protocol: TCP + targetPort: tcp-postgresql + selector: + app.kubernetes.io/instance: extra + app.kubernetes.io/name: postgresql + type: ClusterIP +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: postgresql + namespace: {{ .Release.Namespace | quote }} +spec: + selector: + matchLabels: + app.kubernetes.io/instance: extra + app.kubernetes.io/name: postgresql + serviceName: "postgres" + replicas: 1 + template: + metadata: + labels: + app.kubernetes.io/instance: extra + app.kubernetes.io/name: postgresql + spec: + terminationGracePeriodSeconds: 10 + containers: + - name: pg + image: postgres:16-alpine + readinessProbe: + exec: + command: [ "pg_isready", "-U", "dinum", "-d", "conversations", "-h", "127.0.0.1" ] + initialDelaySeconds: 5 + periodSeconds: 5 + livenessProbe: + exec: + command: [ "pg_isready", "-U", "dinum", "-d", "conversations", "-h", "127.0.0.1" ] + initialDelaySeconds: 15 + periodSeconds: 10 + ports: + - containerPort: 5432 + name: tcp-postgresql + env: + - name: POSTGRES_PASSWORD + value: pass + - name: POSTGRES_USER + value: dinum + - name: POSTGRES_DB + value: conversations + volumeMounts: + - name: data + mountPath: /var/lib/postgresql/data + volumeClaimTemplates: + - metadata: + name: data + spec: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 1Gi diff --git a/src/helm/helmfile.yaml.gotmpl b/src/helm/helmfile.yaml.gotmpl index 2aa933d2..ccc3cfe4 100644 --- a/src/helm/helmfile.yaml.gotmpl +++ b/src/helm/helmfile.yaml.gotmpl @@ -3,92 +3,15 @@ environments: values: - version: 0.0.1 --- -repositories: -- name: bitnami - url: registry-1.docker.io/bitnamicharts - oci: true ---- releases: - - name: keycloak - installed: {{ eq .Environment.Name "dev" | toYaml }} - missingFileHandler: Warn - namespace: {{ .Namespace }} - chart: bitnami/keycloak - version: 17.3.6 - values: - - postgresql: - auth: - username: keycloak - password: keycloak - database: keycloak - - extraEnvVars: - - name: KEYCLOAK_EXTRA_ARGS - value: "--import-realm" - - name: KC_HOSTNAME_URL - value: https://conversations-keycloak.127.0.0.1.nip.io - - extraVolumes: - - name: import - configMap: - name: conversations-keycloak - - extraVolumeMounts: - - name: import - mountPath: /opt/bitnami/keycloak/data/import/ - - auth: - adminUser: su - adminPassword: su - - proxy: edge - - ingress: - enabled: true - hostname: conversations-keycloak.127.0.0.1.nip.io - - extraDeploy: - - apiVersion: v1 - kind: ConfigMap - metadata: - name: conversations-keycloak - namespace: {{ .Namespace }} - data: - conversations.json: | -{{ readFile "../../docker/auth/realm.json" | replace "http://localhost:3200" "https://conversations.127.0.0.1.nip.io" | indent 14 }} - - - name: postgres - installed: {{ eq .Environment.Name "dev" | toYaml }} - namespace: {{ .Namespace }} - chart: bitnami/postgresql - version: 13.1.5 - values: - - auth: - username: dinum - password: pass - database: conversations - - tls: - enabled: true - autoGenerated: true - - - name: minio - installed: {{ eq .Environment.Name "dev" | toYaml }} - namespace: {{ .Namespace }} - chart: bitnami/minio - version: 12.10.10 - values: - - auth: - rootUser: root - rootPassword: password - - provisioning: - enabled: true - buckets: - - name: conversations-media-storage - versioning: true - - - name: redis + - name: extra installed: {{ eq .Environment.Name "dev" | toYaml }} namespace: {{ .Namespace }} - chart: bitnami/redis - version: 20.6.2 + chart: ./extra values: - - auth: - password: pass - architecture: standalone + - realm: | +{{ readFile "../../docker/auth/realm.json" | replace "http://localhost:3200" "https://conversations.127.0.0.1.nip.io" | indent 10 }} - name: conversations version: {{ .Values.version }} From 777225680e986eaff69b153ce2a8f6e6ea7422eb Mon Sep 17 00:00:00 2001 From: Maxence Haouari Date: Thu, 16 Apr 2026 16:15:59 +0200 Subject: [PATCH 2/2] =?UTF-8?q?=E2=9C=A8(backend)=20add=20audio=20transcri?= =?UTF-8?q?ption?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create connection to meet/summary service and webhook to receive transcript Store transcript in RAG --- Makefile | 4 + docs/tilt.md | 6 + env.d/development/common.dist | 13 + env.d/development/kube-secret.dist | 8 +- src/backend/chat/authentication.py | 37 ++ src/backend/chat/clients/pydantic_ai.py | 67 ++- src/backend/chat/malware_detection.py | 29 +- ...ersationattachment_transcription_job_id.py | 23 + src/backend/chat/models.py | 8 + .../pydantic_ai/test_parse_audio_documents.py | 174 ++++++ .../chat/tests/test_malware_detection.py | 69 ++- src/backend/chat/tests/test_transcription.py | 56 ++ .../test_conversation_with_audio_upload.py | 507 ++++++++++++++++++ .../test_conversation_with_document_upload.py | 6 +- .../test_conversation_with_document_url.py | 8 +- .../tests/views/test_transcription_webhook.py | 171 ++++++ src/backend/chat/transcription.py | 134 +++++ src/backend/chat/views.py | 79 +++ src/backend/chat/webhook_models.py | 69 +++ src/backend/conversations/settings.py | 23 + src/backend/core/file_upload/enums.py | 2 + src/backend/core/file_upload/utils.py | 12 + src/backend/core/urls.py | 6 + .../features/chat/components/MessageItem.tsx | 27 +- .../chat/components/ToolInvocationItem.tsx | 46 +- .../dev/values.conversations.yaml.gotmpl | 32 +- 26 files changed, 1570 insertions(+), 46 deletions(-) create mode 100644 src/backend/chat/authentication.py create mode 100644 src/backend/chat/migrations/0007_chatconversationattachment_transcription_job_id.py create mode 100644 src/backend/chat/tests/clients/pydantic_ai/test_parse_audio_documents.py create mode 100644 src/backend/chat/tests/test_transcription.py create mode 100644 src/backend/chat/tests/views/chat/conversations/test_conversation_with_audio_upload.py create mode 100644 src/backend/chat/tests/views/test_transcription_webhook.py create mode 100644 src/backend/chat/transcription.py create mode 100644 src/backend/chat/webhook_models.py diff --git a/Makefile b/Makefile index d090cf2d..86e8b805 100644 --- a/Makefile +++ b/Makefile @@ -169,6 +169,10 @@ stop: ## stop the development server using Docker @$(COMPOSE_E2E) stop .PHONY: stop +restart: ## restart the development server using Docker + @$(COMPOSE_E2E) restart +.PHONY: restart + # -- Backend demo: ## flush db then create a demo for load testing purpose diff --git a/docs/tilt.md b/docs/tilt.md index 03e9acc8..43a65585 100644 --- a/docs/tilt.md +++ b/docs/tilt.md @@ -45,6 +45,12 @@ Then edit `env.d/development/kube-secret`: | `ALBERT_API_URL` | No | Albert API URL (if using Albert provider) | | `ALBERT_API_KEY` | No | Albert API key | | `BRAVE_API_KEY` | No | Brave Search API key (web search tool) | +| `STT_SERVICE_URL` | No | Speech-to-text service URL | +| `STT_SERVICE_API_KEY` | No | Speech-to-text service API key | +| `STT_WEBHOOK_API_KEY` | No | Bearer token the STT service uses when calling back the transcription webhook | +| `LANGFUSE_SECRET_KEY` | No | Langfuse secret key | +| `LANGFUSE_PUBLIC_KEY` | No | Langfuse public key | +| `LANGFUSE_HOST` | No | Langfuse instance URL | ## Step 3 β€” Start the app diff --git a/env.d/development/common.dist b/env.d/development/common.dist index dee107a8..5143c0c5 100644 --- a/env.d/development/common.dist +++ b/env.d/development/common.dist @@ -53,3 +53,16 @@ OIDC_AUTH_REQUEST_EXTRA_PARAMS={"acr_values": "eidas1"} # AI_BASE_URL=https://openaiendpoint.com AI_API_KEY=password # AI_MODEL=llama + +# Speech to Text service +# STT_SERVICE_URL=https://ai-service.example.com +# STT_SERVICE_API_KEY= +# STT_WEBHOOK_API_KEY= + +# Langfuse observability +# LANGFUSE_ENABLED=true +# LANGFUSE_SECRET_KEY= +# LANGFUSE_PUBLIC_KEY= +# LANGFUSE_HOST= +# LANGFUSE_DEBUG=false +# LANGFUSE_MEDIA_UPLOAD_ENABLED=false diff --git a/env.d/development/kube-secret.dist b/env.d/development/kube-secret.dist index 0f470ac2..43462c93 100644 --- a/env.d/development/kube-secret.dist +++ b/env.d/development/kube-secret.dist @@ -3,4 +3,10 @@ AI_BASE_URL=changeme AI_API_KEY=changeme ALBERT_API_URL=changeme ALBERT_API_KEY=changeme -BRAVE_API_KEY=changeme \ No newline at end of file +BRAVE_API_KEY=changeme +STT_SERVICE_URL=https://ai-service.example.com +STT_SERVICE_API_KEY=changeme +STT_WEBHOOK_API_KEY=changeme +LANGFUSE_SECRET_KEY=changeme +LANGFUSE_PUBLIC_KEY=changeme +LANGFUSE_HOST=changeme \ No newline at end of file diff --git a/src/backend/chat/authentication.py b/src/backend/chat/authentication.py new file mode 100644 index 00000000..09e8b162 --- /dev/null +++ b/src/backend/chat/authentication.py @@ -0,0 +1,37 @@ +"""Custom authentication classes for chat webhooks.""" + +import logging + +from django.conf import settings +from django.contrib.auth.models import AnonymousUser + +from rest_framework.authentication import BaseAuthentication +from rest_framework.exceptions import AuthenticationFailed + +logger = logging.getLogger(__name__) + + +class AiWebhookAuthentication(BaseAuthentication): + """ + Custom authentication class for AI webhook requests. + Validates the API key in the Authorization header. + """ + + def authenticate(self, request): + """ + Authenticate the request and return a two-tuple of (user, token). + """ + if not settings.STT_WEBHOOK_API_KEY: + raise AuthenticationFailed("STT_WEBHOOK_API_KEY is not configured.") + + authorization_header: str = request.headers.get("Authorization") or "" + token = authorization_header.removeprefix("Bearer ") + if not token or token != settings.STT_WEBHOOK_API_KEY: + logger.warning( + "Authentication failed: Bad Authorization header (ip: %s)", + request.META.get("REMOTE_ADDR"), + ) + raise AuthenticationFailed() + + # No users are associated with the transcribe webhooks + return AnonymousUser(), None diff --git a/src/backend/chat/clients/pydantic_ai.py b/src/backend/chat/clients/pydantic_ai.py index b946fb22..a39e1c47 100644 --- a/src/backend/chat/clients/pydantic_ai.py +++ b/src/backend/chat/clients/pydantic_ai.py @@ -125,6 +125,7 @@ from pydantic_ai.models import Model, infer_model_profile from core.feature_flags.helpers import is_feature_enabled +from core.file_upload.utils import is_audio_content_type from chat import models from chat.agents.conversation import ConversationAgent, TitleGenerationAgent @@ -156,6 +157,7 @@ from chat.tools.document_search_rag import add_document_rag_search_tool from chat.tools.document_summarize import document_summarize from chat.tools.self_documentation import build_self_documentation_payload +from chat.transcription import wait_for_transcript from chat.vercel_ai_sdk.core import events_v4, events_v5 from chat.vercel_ai_sdk.encoder import CURRENT_EVENT_ENCODER_VERSION, EventEncoder @@ -310,6 +312,7 @@ def __init__( # pylint: disable=too-many-arguments,too-many-positional-argument ) self._web_search_tool_registered = False self._self_documentation_tool_registered = False + self._audio_document_names: list[str] = [] self.conversation_agent = ConversationAgent( model_hrid=self.model_hrid, @@ -419,6 +422,7 @@ async def _clean(self): It can be used to release resources or perform any necessary cleanup. """ self._last_stop_check = 0 + self._audio_document_names = [] await cache.adelete(self._stop_cache_key) # --------------------------------------------------------------------- # @@ -585,7 +589,7 @@ async def _process_agent_nodes( logger.debug("v: %s", dataclasses.asdict(node)) yield self._handle_end_node(node, langfuse, state) - async def _parse_input_documents(self, documents: List[BinaryContent | DocumentUrl]): + async def _parse_input_documents(self, documents: List[BinaryContent | DocumentUrl]): # pylint: disable=too-many-branches """ Parse and store input documents in the conversation's document store. @@ -644,17 +648,24 @@ async def _parse_input_documents(self, documents: List[BinaryContent | DocumentU # Security check: ensure the document belongs to the conversation if not key.startswith(f"{self.conversation.pk}/"): raise ValueError("Document URL does not belong to the conversation.") - # Retrieve the document data - with default_storage.open(key, "rb") as file: - document_data = file.read() - # Run in thread to avoid blocking the event loop during parsing - parsed_content = await asyncio.to_thread( - document_store.parse_and_store_document, - name=document.identifier, - content_type=document.media_type, - content=document_data, - user_sub=self.user.sub, - ) + if is_audio_content_type(document.media_type): + parsed_content = await wait_for_transcript(key, self.conversation) + await document_store.astore_document( + name=document.identifier, content=parsed_content + ) + self._audio_document_names.append(document.identifier) + else: + # Retrieve the document data + with default_storage.open(key, "rb") as file: + document_data = file.read() + # Run in thread to avoid blocking the event loop during parsing + parsed_content = await asyncio.to_thread( + document_store.parse_and_store_document, + name=document.identifier, + content_type=document.media_type, + content=document_data, + user_sub=self.user.sub, + ) else: # Remote URL raise ValueError("External document URL are not accepted yet.") @@ -668,7 +679,9 @@ async def _parse_input_documents(self, documents: List[BinaryContent | DocumentU user_sub=self.user.sub, ) - if not document.media_type.startswith(TEXT_MIME_PREFIX): + if not document.media_type.startswith(TEXT_MIME_PREFIX) and not is_audio_content_type( + document.media_type + ): md_attachment = await models.ChatConversationAttachment.objects.acreate( conversation=self.conversation, uploaded_by=self.user, @@ -718,8 +731,11 @@ def _prepare_prompt( # noqa: PLR0912 # pylint: disable=too-many-branches raise ValueError(f"Unsupported UserContent type: {type(content)}") if attachment_audio: - # Should be handled by the frontend, but just in case - raise ValueError("Audio attachments are not supported in the current implementation.") + # Inline audio data (base64 data URLs) not supported β€” uploaded audio + # files are handled as DocumentUrl and processed via transcript. + raise ValueError( + "Inline audio binary content is not supported. Upload audio files as attachments." + ) if attachment_video: # Should be handled by the frontend, but just in case raise ValueError("Video attachments are not supported in the current implementation.") @@ -794,6 +810,18 @@ def attached_documents_note() -> str: return base_note return f"{base_note}\n\n{document_context_instruction}" + @self.conversation_agent.instructions + def audio_transcripts_note() -> str: + if not self._audio_document_names: + return "" + names = ", ".join(self._audio_document_names) + return ( + f"[Internal context] The following audio file(s) have been transcribed " + f"and their transcripts are available in the document store: {names}. " + "Use the search tool to retrieve the transcript " + "before answering questions about them." + ) + @self.conversation_agent.tool( name="summarize", retries=2, @@ -893,11 +921,18 @@ async def _handle_input_documents( yield DocumentParsingResult(success=True, has_documents=conversation_has_documents) return + has_audio = any( + isinstance(doc, DocumentUrl) and is_audio_content_type(doc.media_type) + for doc in input_documents + ) _tool_call_id = str(uuid.uuid4()) yield events_v4.ToolCallPart( tool_call_id=_tool_call_id, tool_name="document_parsing", - args={"documents": [{"identifier": doc.identifier} for doc in input_documents]}, + args={ + "documents": [{"identifier": doc.identifier} for doc in input_documents], + "has_audio": has_audio, + }, ) try: diff --git a/src/backend/chat/malware_detection.py b/src/backend/chat/malware_detection.py index 9b57d038..ac55cec6 100644 --- a/src/backend/chat/malware_detection.py +++ b/src/backend/chat/malware_detection.py @@ -3,20 +3,43 @@ import logging from core.file_upload.enums import AttachmentStatus +from core.file_upload.utils import is_audio_content_type from chat.models import ChatConversationAttachment +from chat.transcription import trigger_audio_transcription logger = logging.getLogger(__name__) security_logger = logging.getLogger("conversations.security") def conversation_safe_attachment_callback(file_path, *, conversation_id, **kwargs): - """Callback when a malware scan is completed and unsafe for a conversation attachment.""" + """Callback when a malware scan is completed and safe for a conversation attachment.""" logger.info("File %s for conversation %s is safe", file_path, conversation_id) - ChatConversationAttachment.objects.filter( + attachment = ChatConversationAttachment.objects.filter( conversation_id=conversation_id, key=file_path - ).update(upload_state=AttachmentStatus.READY) + ).first() + + if attachment is None: + logger.warning( + "Attachment not found for file %s in conversation %s", file_path, conversation_id + ) + return + + if is_audio_content_type(attachment.content_type): + try: + trigger_audio_transcription(attachment) + except Exception: # pylint: disable=broad-except + logger.exception( + "Failed to trigger transcription for attachment %s, " + "marking as TRANSCRIPTION_FAILED", + attachment.pk, + ) + attachment.upload_state = AttachmentStatus.TRANSCRIPTION_FAILED + attachment.save(update_fields=["upload_state", "updated_at"]) + else: + attachment.upload_state = AttachmentStatus.READY + attachment.save(update_fields=["upload_state", "updated_at"]) def unknown_attachment_callback(file_path, error_info, *, conversation_id, **kwargs) -> bool: diff --git a/src/backend/chat/migrations/0007_chatconversationattachment_transcription_job_id.py b/src/backend/chat/migrations/0007_chatconversationattachment_transcription_job_id.py new file mode 100644 index 00000000..84e9e84a --- /dev/null +++ b/src/backend/chat/migrations/0007_chatconversationattachment_transcription_job_id.py @@ -0,0 +1,23 @@ +# Generated by Django 5.2.13 on 2026-04-28 12:42 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("chat", "0006_chatproject_chatconversation_project"), + ] + + operations = [ + migrations.AddField( + model_name="chatconversationattachment", + name="transcription_job_id", + field=models.CharField( + blank=True, + help_text="Job ID returned by meet/summary transcription service", + max_length=255, + null=True, + unique=True, + ), + ), + ] diff --git a/src/backend/chat/models.py b/src/backend/chat/models.py index 7e692d78..3d012ab1 100644 --- a/src/backend/chat/models.py +++ b/src/backend/chat/models.py @@ -221,3 +221,11 @@ class ChatConversationAttachment(BaseModel): null=True, help_text="Original file key if the Markdown from another file", ) + + transcription_job_id = models.CharField( + max_length=255, + blank=True, + null=True, + unique=True, + help_text="Job ID returned by meet/summary transcription service", + ) diff --git a/src/backend/chat/tests/clients/pydantic_ai/test_parse_audio_documents.py b/src/backend/chat/tests/clients/pydantic_ai/test_parse_audio_documents.py new file mode 100644 index 00000000..f48ea0de --- /dev/null +++ b/src/backend/chat/tests/clients/pydantic_ai/test_parse_audio_documents.py @@ -0,0 +1,174 @@ +"""Test that audio documents use pre-stored transcript in RAG pipeline.""" + +# pylint: disable=protected-access +import asyncio +from unittest import mock + +from django.core.files.storage import default_storage + +import pytest +from pydantic_ai.messages import DocumentUrl + +from core.file_upload.enums import AttachmentStatus + +from chat import models as chat_models +from chat.clients.pydantic_ai import AIAgentService +from chat.factories import ChatConversationAttachmentFactory, ChatConversationFactory + +pytestmark = pytest.mark.django_db(transaction=True) + + +@pytest.fixture(name="conversation") +def fixture_conversation(): + """Create a conversation.""" + return ChatConversationFactory() + + +TRANSCRIPT_CONTENT = "# Transcription \n\n## SPEAKER_00\n Hello world\n" + + +@pytest.fixture(name="audio_attachment") +def fixture_audio_attachment(conversation): + """Create a READY audio attachment.""" + return ChatConversationAttachmentFactory( + conversation=conversation, + content_type="application/ogg", + upload_state=AttachmentStatus.READY, + ) + + +@pytest.fixture(name="transcript_attachment") +def fixture_transcript_attachment(conversation, audio_attachment): + """Create the text/markdown transcript attachment produced by the webhook.""" + return ChatConversationAttachmentFactory( + conversation=conversation, + uploaded_by=audio_attachment.uploaded_by, + key=f"{conversation.pk}/attachments/{audio_attachment.file_name}.md", + file_name=f"{audio_attachment.file_name}.md", + content_type="text/markdown", + conversion_from=audio_attachment.key, + upload_state=AttachmentStatus.READY, + ) + + +def test_audio_document_uses_transcript_not_parse_document( + conversation, audio_attachment, transcript_attachment +): # pylint: disable=unused-argument + """Audio DocumentUrl uses transcript from S3, not parse_document().""" + + mock_store = mock.MagicMock() + mock_store.collection_id = "test-collection" + mock_store.astore_document = mock.AsyncMock(return_value=None) + mock_store.parse_and_store_document = mock.MagicMock(return_value="") + + document = DocumentUrl( + url=f"/media-key/{audio_attachment.key}", + media_type="application/ogg", + identifier=audio_attachment.file_name, + ) + + async def run(): + svc = AIAgentService.__new__(AIAgentService) + svc.conversation = conversation + svc.user = audio_attachment.uploaded_by + svc._is_document_upload_enabled = True + svc._audio_document_names = [] + + with ( + mock.patch("chat.clients.pydantic_ai.document_store_backend", return_value=mock_store), + mock.patch.object( + default_storage, + "open", + return_value=mock.mock_open(read_data=TRANSCRIPT_CONTENT.encode())(), + ), + ): + await svc._parse_input_documents([document]) + + asyncio.run(run()) + + mock_store.parse_and_store_document.assert_not_called() + mock_store.astore_document.assert_called_once() + # Verify transcript content was passed + call_kwargs = mock_store.astore_document.call_args + content = call_kwargs.kwargs.get("content") or ( + call_kwargs.args[1] if len(call_kwargs.args) > 1 else "" + ) + assert "# Transcription" in content + + +def test_audio_document_transcription_failed_raises_error(conversation, audio_attachment): + """When upload_state is TRANSCRIPTION_FAILED, a user-friendly ValueError is raised.""" + audio_attachment.upload_state = AttachmentStatus.TRANSCRIPTION_FAILED + audio_attachment.save() + + mock_store = mock.MagicMock() + mock_store.collection_id = "test-collection" + + document = DocumentUrl( + url=f"/media-key/{audio_attachment.key}", + media_type="application/ogg", + identifier=audio_attachment.file_name, + ) + + error_raised = None + + async def run(): + nonlocal error_raised + svc = AIAgentService.__new__(AIAgentService) + svc.conversation = conversation + svc.user = audio_attachment.uploaded_by + svc._is_document_upload_enabled = True + + with mock.patch("chat.clients.pydantic_ai.document_store_backend", return_value=mock_store): + try: + await svc._parse_input_documents([document]) + except ValueError as exc: + error_raised = exc + + asyncio.run(run()) + + assert error_raised is not None + assert "transcription" in str(error_raised).lower() + mock_store.astore_document.assert_not_called() + + +def test_audio_document_transcript_attachment_created_by_webhook( + conversation, audio_attachment, transcript_attachment +): # pylint: disable=unused-argument + """The webhook creates a text/markdown attachment; _parse_input_documents reads it via RAG.""" + mock_store = mock.MagicMock() + mock_store.collection_id = "test-collection" + mock_store.astore_document = mock.AsyncMock(return_value=None) + + document = DocumentUrl( + url=f"/media-key/{audio_attachment.key}", + media_type="application/ogg", + identifier=audio_attachment.file_name, + ) + + async def run(): + svc = AIAgentService.__new__(AIAgentService) + svc.conversation = conversation + svc.user = audio_attachment.uploaded_by + svc._is_document_upload_enabled = True + svc._audio_document_names = [] + + with ( + mock.patch("chat.clients.pydantic_ai.document_store_backend", return_value=mock_store), + mock.patch.object( + default_storage, + "open", + return_value=mock.mock_open(read_data=TRANSCRIPT_CONTENT.encode())(), + ), + ): + await svc._parse_input_documents([document]) + + asyncio.run(run()) + assert ( + chat_models.ChatConversationAttachment.objects.filter( + conversation=conversation, + conversion_from=audio_attachment.key, + ).count() + == 1 + ) + mock_store.astore_document.assert_called_once() diff --git a/src/backend/chat/tests/test_malware_detection.py b/src/backend/chat/tests/test_malware_detection.py index aa665f9e..43648525 100644 --- a/src/backend/chat/tests/test_malware_detection.py +++ b/src/backend/chat/tests/test_malware_detection.py @@ -1,5 +1,7 @@ """Test malware detection callback.""" +from unittest import mock + from django.core.files.base import ContentFile from django.core.files.storage import default_storage @@ -35,7 +37,7 @@ def fixture_unsafe_file(): def test_malware_detection_callback_safe_status(safe_file): """Test malware detection callback with safe status.""" - attachment = ChatConversationAttachmentFactory(key=safe_file) + attachment = ChatConversationAttachmentFactory(key=safe_file, content_type="application/pdf") malware_detection_callback( safe_file, @@ -110,3 +112,68 @@ def test_malware_detection_callback_unknown_status_too_large(unsafe_file): assert default_storage.exists(unsafe_file) attachment.refresh_from_db() assert attachment.upload_state == AttachmentStatus.FILE_TOO_LARGE_TO_ANALYZE + + +def test_malware_detection_callback_safe_audio_triggers_transcription(safe_file): + """Safe audio file triggers transcription instead of going directly to READY.""" + attachment = ChatConversationAttachmentFactory(key=safe_file, content_type="audio/mpeg") + + with mock.patch("chat.malware_detection.trigger_audio_transcription") as mock_trigger: + malware_detection_callback( + safe_file, + ReportStatus.SAFE, + error_info={}, + safe_callback="chat.malware_detection.conversation_safe_attachment_callback", + unknown_callback="chat.malware_detection.unknown_attachment_callback", + unsafe_callback="chat.malware_detection.conversation_unsafe_attachment_callback", + conversation_id=attachment.conversation.pk, + ) + + mock_trigger.assert_called_once_with(attachment) + attachment.refresh_from_db() + assert attachment.upload_state not in { + AttachmentStatus.READY, + AttachmentStatus.TRANSCRIPTION_FAILED, + } + + +def test_malware_detection_callback_safe_audio_transcription_failure(safe_file): + """When transcription trigger fails, attachment is marked TRANSCRIPTION_FAILED.""" + attachment = ChatConversationAttachmentFactory(key=safe_file, content_type="audio/mpeg") + + with mock.patch( + "chat.malware_detection.trigger_audio_transcription", + side_effect=Exception("service unavailable"), + ): + malware_detection_callback( + safe_file, + ReportStatus.SAFE, + error_info={}, + safe_callback="chat.malware_detection.conversation_safe_attachment_callback", + unknown_callback="chat.malware_detection.unknown_attachment_callback", + unsafe_callback="chat.malware_detection.conversation_unsafe_attachment_callback", + conversation_id=attachment.conversation.pk, + ) + + attachment.refresh_from_db() + assert attachment.upload_state == AttachmentStatus.TRANSCRIPTION_FAILED + + +def test_malware_detection_callback_safe_non_audio_goes_ready(safe_file): + """Safe non-audio file goes directly to READY without triggering transcription.""" + attachment = ChatConversationAttachmentFactory(key=safe_file, content_type="application/pdf") + + with mock.patch("chat.malware_detection.trigger_audio_transcription") as mock_trigger: + malware_detection_callback( + safe_file, + ReportStatus.SAFE, + error_info={}, + safe_callback="chat.malware_detection.conversation_safe_attachment_callback", + unknown_callback="chat.malware_detection.unknown_attachment_callback", + unsafe_callback="chat.malware_detection.conversation_unsafe_attachment_callback", + conversation_id=attachment.conversation.pk, + ) + + mock_trigger.assert_not_called() + attachment.refresh_from_db() + assert attachment.upload_state == AttachmentStatus.READY diff --git a/src/backend/chat/tests/test_transcription.py b/src/backend/chat/tests/test_transcription.py new file mode 100644 index 00000000..72ab684b --- /dev/null +++ b/src/backend/chat/tests/test_transcription.py @@ -0,0 +1,56 @@ +"""Tests for audio transcription triggering.""" + +from unittest.mock import patch +from urllib.parse import urljoin + +from django.conf import settings + +import pytest +import requests +import responses as responses_lib + +from core.file_upload.enums import AttachmentStatus + +from chat.factories import ChatConversationAttachmentFactory +from chat.transcription import trigger_audio_transcription + +pytestmark = pytest.mark.django_db + + +@pytest.fixture(name="audio_attachment") +def fixture_audio_attachment(): + """Create an audio attachment.""" + return ChatConversationAttachmentFactory(content_type="audio/mpeg") + + +@responses_lib.activate +@patch("chat.transcription.generate_retrieve_policy", return_value="https://presigned.test/audio") +def test_trigger_audio_transcription_sets_transcribing_status(_mock_policy, audio_attachment): + """Transcription trigger sets attachment to TRANSCRIBING and stores job_id.""" + responses_lib.add( + responses_lib.POST, + urljoin(settings.STT_SERVICE_URL, "async-jobs/transcribe"), + json={"job_id": "test-job-123"}, + status=200, + ) + trigger_audio_transcription(audio_attachment) + + audio_attachment.refresh_from_db() + assert audio_attachment.upload_state == AttachmentStatus.TRANSCRIBING + assert audio_attachment.transcription_job_id == "test-job-123" + + +@responses_lib.activate +@patch("chat.transcription.generate_retrieve_policy", return_value="https://presigned.test/audio") +def test_trigger_audio_transcription_failure_does_not_update(_mock_policy, audio_attachment): + """If transcription service fails, attachment status is unchanged.""" + responses_lib.add( + responses_lib.POST, + urljoin(settings.STT_SERVICE_URL, "async-jobs/transcribe"), + status=500, + ) + with pytest.raises(requests.HTTPError): + trigger_audio_transcription(audio_attachment) + + audio_attachment.refresh_from_db() + assert audio_attachment.upload_state != AttachmentStatus.TRANSCRIBING diff --git a/src/backend/chat/tests/views/chat/conversations/test_conversation_with_audio_upload.py b/src/backend/chat/tests/views/chat/conversations/test_conversation_with_audio_upload.py new file mode 100644 index 00000000..74f26bc3 --- /dev/null +++ b/src/backend/chat/tests/views/chat/conversations/test_conversation_with_audio_upload.py @@ -0,0 +1,507 @@ +"""Unit tests for chat conversation actions with audio file upload.""" + +from io import BytesIO +from unittest import mock + +from django.contrib.sessions.backends.cache import SessionStore +from django.core.files.storage import default_storage +from django.utils import timezone + +import httpx +import pytest +import responses +import respx +from dirty_equals import IsUUID +from freezegun import freeze_time +from pydantic_ai.messages import ModelMessage +from pydantic_ai.models.function import AgentInfo, DeltaToolCall, FunctionModel +from rest_framework import status + +from core.file_upload.enums import AttachmentStatus + +from chat.ai_sdk_types import ( + Attachment, + LanguageModelV1Source, + SourceUIPart, + TextUIPart, + ToolInvocationCall, + ToolInvocationUIPart, + UIMessage, +) +from chat.factories import ChatConversationAttachmentFactory, ChatConversationFactory +from chat.tests.utils import replace_uuids_with_placeholder +from chat.tools.descriptions import ( + DOCUMENT_SEARCH_RAG_SYSTEM_PROMPT, + DOCUMENT_SUMMARIZE_SYSTEM_PROMPT, + SELF_DOCUMENTATION_TOOL_DESCRIPTION, +) + +# enable database transactions for tests: +# transaction=True ensures that the data are available in the database +# in other threads +pytestmark = pytest.mark.django_db(transaction=True) + +TRANSCRIPT_CONTENT = "Hello world from the interview." + + +def _expected_audio_instructions(today_prompt_date: str, audio_file_name: str) -> str: + """Return expected concatenated system instructions for audio conversations.""" + return ( + "You are a helpful test assistant :)\n\n" + f"{today_prompt_date}\n\n" + "Answer in english.\n\n" + f"{SELF_DOCUMENTATION_TOOL_DESCRIPTION}\n\n" + f"{DOCUMENT_SEARCH_RAG_SYSTEM_PROMPT}\n\n" + f"{DOCUMENT_SUMMARIZE_SYSTEM_PROMPT}\n\n" + "[Internal context] User documents are attached to this conversation. " + "Do not request re-upload of documents; consider them already available " + "via the internal store.\n\n" + f"[Internal context] The following audio file(s) have been transcribed " + f"and their transcripts are available in the document store: {audio_file_name}. " + "Use the search tool to retrieve the transcript " + "before answering questions about them." + ) + + +@pytest.fixture(autouse=True) +def mock_refresh_access_token(): + """Mock refresh_access_token to bypass token refresh in tests.""" + with mock.patch("utils.oidc.refresh_access_token") as mocked_refresh_access_token: + session = SessionStore() + session["oidc_access_token"] = "mocked-access-token" + mocked_refresh_access_token.return_value = session + yield mocked_refresh_access_token + + +@pytest.fixture( + autouse=True, + params=[ + "chat.agent_rag.document_rag_backends.find_rag_backend.FindRagBackend", + "chat.agent_rag.document_rag_backends.albert_rag_backend.AlbertRagBackend", + ], +) +def ai_settings(request, settings): + """Fixture to set AI service URLs for testing.""" + settings.RAG_DOCUMENT_SEARCH_BACKEND = request.param + settings.AI_BASE_URL = "https://www.external-ai-service.com/" + settings.AI_API_KEY = "test-api-key" + settings.AI_MODEL = "test-model" + settings.AI_AGENT_INSTRUCTIONS = "You are a helpful test assistant :)" + settings.ALBERT_API_URL = "https://albert.api.etalab.gouv.fr" + settings.ALBERT_API_KEY = "albert-api-key" + settings.FIND_API_URL = "https://find.api.example.com" + settings.FIND_API_KEY = "find-api-key" + return settings + + +@pytest.fixture(name="mock_audio_rag_api") +def fixture_mock_audio_rag_api(): + """Fixture to mock Albert/Find API endpoints for audio transcript RAG operations. + + Unlike regular document upload, audio transcripts are stored directly without + calling parse-beta, since they are already plain text. + """ + search_score = 0.9 + prompt_tokens = 10 + completion_tokens = 20 + + # Collection creation (Albert backend) + responses.post( + "https://albert.api.etalab.gouv.fr/v1/collections", + json={"id": "123", "name": "test-collection"}, + status=status.HTTP_200_OK, + ) + + # Direct document storage via httpx (astore_document is async) β€” no parse-beta needed + respx.post("https://albert.api.etalab.gouv.fr/v1/documents").mock( + return_value=httpx.Response(status.HTTP_201_CREATED, json={"id": 456}) + ) + + # Semantic search (Albert backend) + responses.post( + "https://albert.api.etalab.gouv.fr/v1/search", + json={ + "data": [ + { + "method": "semantic", + "chunk": { + "id": 123, + "content": TRANSCRIPT_CONTENT, + "metadata": {"document_name": "interview.ogg"}, + }, + "score": search_score, + } + ], + "usage": {"prompt_tokens": prompt_tokens, "completion_tokens": completion_tokens}, + }, + status=status.HTTP_200_OK, + ) + + # Document indexing (Find backend) + responses.post( + "https://find.api.example.com/api/v1.0/documents/index/", + json={"id": "456", "status": "indexed"}, + status=status.HTTP_200_OK, + ) + + # Semantic search (Find backend) + responses.post( + "https://find.api.example.com/api/v1.0/documents/search/", + json=[ + { + "_source": { + "title.fr": "interview.ogg", + "content.fr": TRANSCRIPT_CONTENT, + }, + "_score": search_score, + } + ], + status=status.HTTP_200_OK, + ) + + +@responses.activate +@respx.mock +@freeze_time() +def test_post_conversation_with_audio_upload( + # pylint: disable=too-many-arguments,too-many-positional-arguments + api_client, + mock_audio_rag_api, # pylint: disable=unused-argument + today_prompt_date, + mock_ai_agent_service, +): + """ + Test POST to /api/v1/chats/{pk}/conversation/ with a ready audio file. + + The audio attachment and its linked transcript already exist in DB and S3 + (simulating a completed transcription webhook flow). Verifies: + - has_audio=true in the document_parsing tool call + - Transcript is stored in RAG without calling parse-beta + - System instructions include the audio transcripts note + - The LLM can search the transcript via document_search_rag + """ + chat_conversation = ChatConversationFactory(owner__language="en-us") + api_client.force_authenticate(user=chat_conversation.owner) + + # Simulate a completed transcription: audio attachment + linked transcript attachment + audio_attachment = ChatConversationAttachmentFactory( + conversation=chat_conversation, + uploaded_by=chat_conversation.owner, + file_name="interview.ogg", + content_type="audio/ogg", + upload_state=AttachmentStatus.READY, + ) + transcript_attachment = ChatConversationAttachmentFactory( + conversation=chat_conversation, + uploaded_by=chat_conversation.owner, + key=f"{chat_conversation.pk}/attachments/interview.ogg.md", + file_name="interview.ogg.md", + content_type="text/markdown", + conversion_from=audio_attachment.key, + upload_state=AttachmentStatus.READY, + ) + default_storage.save(transcript_attachment.key, BytesIO(TRANSCRIPT_CONTENT.encode())) + + message = UIMessage( + id="1", + role="user", + content="What was discussed in the interview?", + parts=[ + TextUIPart( + text="What was discussed in the interview?", + type="text", + ), + ], + experimental_attachments=[ + Attachment( + name="interview.ogg", + contentType="audio/ogg", + url=f"/media-key/{audio_attachment.key}", + ) + ], + ) + + async def agent_model(messages: list[ModelMessage], _info: AgentInfo): + if len(messages) == 1: + yield { + 0: DeltaToolCall( + name="document_search_rag", + json_args='{"query": "What was discussed in the interview?"}', + ) + } + else: + yield "The interview discussed Hello world." + + with mock_ai_agent_service(FunctionModel(stream_function=agent_model)): + response = api_client.post( + f"/api/v1.0/chats/{chat_conversation.pk}/conversation/", + data={"messages": [message.model_dump(mode="json")]}, + format="json", + ) + + assert response.status_code == status.HTTP_200_OK + assert response.get("Content-Type") == "text/event-stream" + assert response.get("x-vercel-ai-data-stream") == "v1" + assert response.streaming + + response_content = b"".join(response.streaming_content).decode("utf-8") + response_content = replace_uuids_with_placeholder(response_content) + + assert response_content == ( + '9:{"toolCallId":"XXX","toolName":"document_parsing",' + '"args":{"documents":[{"identifier":"interview.ogg"}],"has_audio":true}}\n' + 'a:{"toolCallId":"XXX","result":{"state":"done"}}\n' + 'b:{"toolCallId":"pyd_ai_YYY","toolName":"document_search_rag"}\n' + '9:{"toolCallId":"pyd_ai_YYY","toolName":"document_search_rag",' + '"args":{"query":"What was discussed in the interview?"}}\n' + 'h:{"sourceType":"url","id":"","url":"interview.ogg","title":null,' + '"providerMetadata":{}}\n' + 'a:{"toolCallId":"pyd_ai_YYY","result":[{"url":"interview.ogg","content":' + '"Hello world from the interview.","score":0.9}]}\n' + '0:"The interview discussed Hello world."\n' + 'f:{"messageId":""}\n' + 'd:{"finishReason":"stop","usage":{"promptTokens":100,"completionTokens":15,' + '"co2Impact":0.0}}\n' + ) + + # Check that the conversation was updated + chat_conversation.refresh_from_db() + assert len(chat_conversation.messages) == 2 + + assert chat_conversation.messages[0].id == IsUUID(4) + assert chat_conversation.messages[0] == UIMessage( + id=chat_conversation.messages[0].id, + createdAt=timezone.now(), + content="What was discussed in the interview?", + reasoning=None, + experimental_attachments=None, + role="user", + annotations=None, + toolInvocations=None, + parts=[TextUIPart(type="text", text="What was discussed in the interview?")], + ) + + assert chat_conversation.messages[1].id == IsUUID(4) + assert chat_conversation.messages[1] == UIMessage( + id=chat_conversation.messages[1].id, + createdAt=timezone.now(), + content="The interview discussed Hello world.", + reasoning=None, + experimental_attachments=None, + role="assistant", + annotations=None, + toolInvocations=None, + parts=[ + ToolInvocationUIPart( + type="tool-invocation", + toolInvocation=ToolInvocationCall( + toolCallId=chat_conversation.messages[1].parts[0].toolInvocation.toolCallId, + toolName="document_search_rag", + args={"query": "What was discussed in the interview?"}, + state="call", + step=None, + ), + ), + TextUIPart(type="text", text="The interview discussed Hello world."), + SourceUIPart( + type="source", + source=LanguageModelV1Source( + sourceType="url", + id=chat_conversation.messages[1].parts[2].source.id, + url="interview.ogg", + title=None, + providerMetadata={}, + ), + ), + ], + ) + + timezone_now = timezone.now().isoformat().replace("+00:00", "Z") + + assert len(chat_conversation.pydantic_messages) == 4 + + _run_id = chat_conversation.pydantic_messages[0]["run_id"] + + assert chat_conversation.pydantic_messages[0] == { + "instructions": _expected_audio_instructions(today_prompt_date, "interview.ogg"), + "kind": "request", + "metadata": None, + "parts": [ + { + "content": ["What was discussed in the interview?"], + "part_kind": "user-prompt", + "timestamp": timezone_now, + }, + ], + "run_id": _run_id, + "timestamp": timezone_now, + } + assert chat_conversation.pydantic_messages[1] == { + "finish_reason": None, + "kind": "response", + "metadata": None, + "model_name": "function::agent_model", + "parts": [ + { + "args": '{"query": "What was discussed in the interview?"}', + "id": None, + "part_kind": "tool-call", + "tool_call_id": chat_conversation.pydantic_messages[1]["parts"][0]["tool_call_id"], + "tool_name": "document_search_rag", + "provider_details": None, + "provider_name": None, + } + ], + "provider_details": None, + "provider_name": None, + "provider_response_id": None, + "provider_url": None, + "timestamp": timezone_now, + "usage": { + "cache_audio_read_tokens": 0, + "cache_read_tokens": 0, + "cache_write_tokens": 0, + "details": {}, + "input_audio_tokens": 0, + "input_tokens": 50, + "output_audio_tokens": 0, + "output_tokens": 9, + }, + "run_id": _run_id, + } + assert chat_conversation.pydantic_messages[2] == { + "instructions": _expected_audio_instructions(today_prompt_date, "interview.ogg"), + "kind": "request", + "metadata": None, + "parts": [ + { + "content": [ + { + "content": TRANSCRIPT_CONTENT, + "score": 0.9, + "url": "interview.ogg", + } + ], + "metadata": {"sources": ["interview.ogg"]}, + "outcome": "success", + "part_kind": "tool-return", + "timestamp": timezone_now, + "tool_call_id": chat_conversation.pydantic_messages[2]["parts"][0]["tool_call_id"], + "tool_name": "document_search_rag", + } + ], + "run_id": _run_id, + "timestamp": timezone_now, + } + assert chat_conversation.pydantic_messages[3] == { + "finish_reason": None, + "kind": "response", + "metadata": None, + "model_name": "function::agent_model", + "parts": [ + { + "content": "The interview discussed Hello world.", + "id": None, + "part_kind": "text", + "provider_details": None, + "provider_name": None, + } + ], + "provider_details": None, + "provider_name": None, + "provider_response_id": None, + "provider_url": None, + "timestamp": timezone_now, + "usage": { + "cache_audio_read_tokens": 0, + "cache_read_tokens": 0, + "cache_write_tokens": 0, + "details": {}, + "input_audio_tokens": 0, + "input_tokens": 50, + "output_audio_tokens": 0, + "output_tokens": 6, + }, + "run_id": _run_id, + } + + +@responses.activate +@freeze_time() +def test_post_conversation_with_audio_transcription_failed( + api_client, + mock_ai_agent_service, +): + """ + Test POST to /api/v1/chats/{pk}/conversation/ when audio transcription has failed. + + Verifies that when the audio attachment is in TRANSCRIPTION_FAILED state, the + document_parsing tool reports an error and the conversation is not updated. + """ + # Albert create_collection uses requests; mock it even though storage never completes + responses.post( + "https://albert.api.etalab.gouv.fr/v1/collections", + json={"id": "123", "name": "test-collection"}, + status=status.HTTP_200_OK, + ) + + chat_conversation = ChatConversationFactory(owner__language="en-us") + api_client.force_authenticate(user=chat_conversation.owner) + + audio_attachment = ChatConversationAttachmentFactory( + conversation=chat_conversation, + uploaded_by=chat_conversation.owner, + file_name="interview.ogg", + content_type="audio/ogg", + upload_state=AttachmentStatus.TRANSCRIPTION_FAILED, + ) + + message = UIMessage( + id="1", + role="user", + content="What was discussed in the interview?", + parts=[ + TextUIPart( + text="What was discussed in the interview?", + type="text", + ), + ], + experimental_attachments=[ + Attachment( + name="interview.ogg", + contentType="audio/ogg", + url=f"/media-key/{audio_attachment.key}", + ) + ], + ) + + async def agent_model(messages: list[ModelMessage], _info: AgentInfo): + raise RuntimeError("LLM should not be called when transcription failed") + + with mock_ai_agent_service(FunctionModel(stream_function=agent_model)): + response = api_client.post( + f"/api/v1.0/chats/{chat_conversation.pk}/conversation/", + data={"messages": [message.model_dump(mode="json")]}, + format="json", + ) + + assert response.status_code == status.HTTP_200_OK + assert response.get("Content-Type") == "text/event-stream" + assert response.get("x-vercel-ai-data-stream") == "v1" + assert response.streaming + + response_content = b"".join(response.streaming_content).decode("utf-8") + response_content = replace_uuids_with_placeholder(response_content) + + assert response_content == ( + '9:{"toolCallId":"XXX","toolName":"document_parsing",' + '"args":{"documents":[{"identifier":"interview.ogg"}],"has_audio":true}}\n' + 'a:{"toolCallId":"XXX","result":{"state":"error","error":"The transcription of ' + 'this audio failed. Please try again with another file."}}\n' + 'd:{"finishReason":"error","usage":{"promptTokens":0,"completionTokens":0,' + '"co2Impact":0.0}}\n' + ) + + # Conversation should not be updated on error + chat_conversation.refresh_from_db() + assert len(chat_conversation.messages) == 0 diff --git a/src/backend/chat/tests/views/chat/conversations/test_conversation_with_document_upload.py b/src/backend/chat/tests/views/chat/conversations/test_conversation_with_document_upload.py index f86f9b81..ede13577 100644 --- a/src/backend/chat/tests/views/chat/conversations/test_conversation_with_document_upload.py +++ b/src/backend/chat/tests/views/chat/conversations/test_conversation_with_document_upload.py @@ -466,7 +466,7 @@ async def agent_model(messages: list[ModelMessage], _info: AgentInfo): assert response_content == ( '9:{"toolCallId":"XXX","toolName":"document_parsing",' - '"args":{"documents":[{"identifier":"sample.pdf"}]}}\n' + '"args":{"documents":[{"identifier":"sample.pdf"}],"has_audio":false}}\n' 'a:{"toolCallId":"XXX","result":{"state":"done"}}\n' 'b:{"toolCallId":"pyd_ai_YYY","toolName":"document_search_rag"}\n' '9:{"toolCallId":"pyd_ai_YYY","toolName":"document_search_rag",' @@ -786,7 +786,7 @@ async def agent_model(messages: list[ModelMessage], _info: AgentInfo): assert response_content == ( '9:{"toolCallId":"XXX","toolName":"document_parsing",' - '"args":{"documents":[{"identifier":"sample.pdf"}]}}\n' + '"args":{"documents":[{"identifier":"sample.pdf"}],"has_audio":false}}\n' 'a:{"toolCallId":"XXX","result":{"state":"done"}}\n' 'b:{"toolCallId":"pyd_ai_YYY","toolName":"summarize"}\n' '9:{"toolCallId":"pyd_ai_YYY","toolName":"summarize","args":{}}\n' @@ -1030,7 +1030,7 @@ async def agent_model(messages: list[ModelMessage], _info: AgentInfo): assert response_content == ( '9:{"toolCallId":"XXX","toolName":"document_parsing",' - '"args":{"documents":[{"identifier":"sample.odt"}]}}\n' + '"args":{"documents":[{"identifier":"sample.odt"}],"has_audio":false}}\n' 'a:{"toolCallId":"XXX","result":{"state":"done"}}\n' 'b:{"toolCallId":"pyd_ai_YYY","toolName":"document_search_rag"}\n' '9:{"toolCallId":"pyd_ai_YYY","toolName":"document_search_rag",' diff --git a/src/backend/chat/tests/views/chat/conversations/test_conversation_with_document_url.py b/src/backend/chat/tests/views/chat/conversations/test_conversation_with_document_url.py index 7907a492..25ffbaea 100644 --- a/src/backend/chat/tests/views/chat/conversations/test_conversation_with_document_url.py +++ b/src/backend/chat/tests/views/chat/conversations/test_conversation_with_document_url.py @@ -244,7 +244,7 @@ async def agent_model(messages: list[ModelMessage], _info: AgentInfo): assert response_content == ( '9:{"toolCallId":"XXX","toolName":"document_parsing",' - '"args":{"documents":[{"identifier":"sample.pdf"}]}}\n' + '"args":{"documents":[{"identifier":"sample.pdf"}],"has_audio":false}}\n' 'a:{"toolCallId":"XXX","result":{"state":"done"}}\n' '0:"This is a document about a single pixel."\n' 'f:{"messageId":""}\n' @@ -399,7 +399,7 @@ async def agent_model(messages: list[ModelMessage], _info: AgentInfo): assert response_content == ( '9:{"toolCallId":"XXX","toolName":"document_parsing",' - '"args":{"documents":[{"identifier":"sample.pdf"}]}}\n' + '"args":{"documents":[{"identifier":"sample.pdf"}],"has_audio":false}}\n' 'a:{"toolCallId":"XXX",' '"result":{"state":"error","error":"Document ' 'URL does not belong to the conversation."}}\n' @@ -468,7 +468,7 @@ async def agent_model(messages: list[ModelMessage], _info: AgentInfo): assert response_content == ( '9:{"toolCallId":"XXX","toolName":"document_parsing",' - '"args":{"documents":[{"identifier":"sample.pdf"}]}}\n' + '"args":{"documents":[{"identifier":"sample.pdf"}],"has_audio":false}}\n' 'a:{"toolCallId":"XXX",' '"result":{"state":"error","error":"External document ' 'URL are not accepted yet."}}\n' @@ -954,7 +954,7 @@ async def agent_model(messages: list[ModelMessage], _info: AgentInfo): assert response_content == ( '9:{"toolCallId":"XXX","toolName":"document_parsing",' - f'"args":{{"documents":[{{"identifier":"{file_name}"}}]}}}}\n' + f'"args":{{"documents":[{{"identifier":"{file_name}"}}],"has_audio":false}}}}\n' 'a:{"toolCallId":"XXX","result":{"state":"done"}}\n' '0:"This is a document about you."\n' 'f:{"messageId":""}\n' diff --git a/src/backend/chat/tests/views/test_transcription_webhook.py b/src/backend/chat/tests/views/test_transcription_webhook.py new file mode 100644 index 00000000..0807048f --- /dev/null +++ b/src/backend/chat/tests/views/test_transcription_webhook.py @@ -0,0 +1,171 @@ +"""Tests for the transcription webhook endpoint.""" + +import json +from unittest import mock + +from django.core.files.storage import default_storage +from django.urls import reverse + +import pytest +import responses as responses_lib + +from core.file_upload.enums import AttachmentStatus + +from chat import models +from chat.factories import ChatConversationAttachmentFactory + +pytestmark = pytest.mark.django_db + +AUTH_HEADER = {"HTTP_AUTHORIZATION": "Bearer test-webhook-key"} + + +WHISPER_TRANSCRIPT_DATA = { + "segments": [ + { + "start": 0.0, + "end": 2.5, + "text": "Hello world", + "words": [], + "speaker": "SPEAKER_00", + }, + { + "start": 2.5, + "end": 5.0, + "text": "How are you?", + "words": [], + "speaker": "SPEAKER_01", + }, + ], + "word_segments": [], +} + + +@pytest.fixture(name="transcribing_attachment") +def fixture_transcribing_attachment(): + """Create a TRANSCRIBING audio attachment with a known job_id.""" + return ChatConversationAttachmentFactory( + content_type="audio/mpeg", + upload_state=AttachmentStatus.TRANSCRIBING, + transcription_job_id="job-abc-123", + ) + + +@responses_lib.activate +def test_transcription_webhook_success(client, transcribing_attachment): + """Successful webhook creates a text/markdown S3 attachment and sets audio to READY.""" + transcript_url = "http://ai-service.test/transcripts/job-abc-123.json" + responses_lib.add( + responses_lib.GET, + transcript_url, + json=WHISPER_TRANSCRIPT_DATA, + status=200, + ) + + payload = { + "job_id": "job-abc-123", + "type": "transcript", + "status": "success", + "transcription_data_url": transcript_url, + } + + url = reverse("transcription-webhook") + with mock.patch.object(default_storage, "save") as mock_save: + response = client.post( + url, + data=json.dumps(payload), + content_type="application/json", + **AUTH_HEADER, + ) + + assert response.status_code == 200 + transcribing_attachment.refresh_from_db() + assert transcribing_attachment.upload_state == AttachmentStatus.READY + + # A text/markdown attachment should have been created + text_attachment = models.ChatConversationAttachment.objects.get( + conversion_from=transcribing_attachment.key + ) + assert text_attachment.content_type == "text/markdown" + assert text_attachment.upload_state == AttachmentStatus.READY + + # Transcript saved to S3 with correct content + mock_save.assert_called_once() + saved_content = mock_save.call_args[0][1].read().decode("utf-8") + assert "# Transcription" in saved_content + assert "SPEAKER_00" in saved_content + assert "Hello world" in saved_content + assert "SPEAKER_01" in saved_content + + +def test_transcription_webhook_unknown_job_id(client): + """Webhook with unknown job_id returns 404.""" + payload = { + "job_id": "nonexistent-job", + "type": "transcript", + "status": "success", + "transcription_data_url": "https://example.com/data.json", + } + url = reverse("transcription-webhook") + response = client.post( + url, + data=json.dumps(payload), + content_type="application/json", + **AUTH_HEADER, + ) + assert response.status_code == 404 + + +def test_transcription_webhook_invalid_auth(client, transcribing_attachment): # pylint: disable=unused-argument + """Webhook with wrong API key returns 403.""" + payload = { + "job_id": "job-abc-123", + "type": "transcript", + "status": "failure", + "error_code": "unknown_error", + } + url = reverse("transcription-webhook") + response = client.post( + url, + data=json.dumps(payload), + content_type="application/json", + HTTP_AUTHORIZATION="Bearer wrong-key", + ) + assert response.status_code == 403 + + +def test_transcription_webhook_missing_auth(client, transcribing_attachment): # pylint: disable=unused-argument + """Webhook with no Authorization header returns 403.""" + payload = { + "job_id": "job-abc-123", + "type": "transcript", + "status": "failure", + "error_code": "unknown_error", + } + url = reverse("transcription-webhook") + response = client.post( + url, + data=json.dumps(payload), + content_type="application/json", + ) + assert response.status_code == 403 + + +def test_transcription_webhook_failure_payload(client, transcribing_attachment): + """Failure webhook payload marks attachment as TRANSCRIPTION_FAILED.""" + payload = { + "job_id": "job-abc-123", + "type": "transcript", + "status": "failure", + "error_code": "unknown_error", + } + url = reverse("transcription-webhook") + response = client.post( + url, + data=json.dumps(payload), + content_type="application/json", + **AUTH_HEADER, + ) + assert response.status_code == 200 + assert response.json() == {"status": "failed"} + transcribing_attachment.refresh_from_db() + assert transcribing_attachment.upload_state == AttachmentStatus.TRANSCRIPTION_FAILED diff --git a/src/backend/chat/transcription.py b/src/backend/chat/transcription.py new file mode 100644 index 00000000..2534b697 --- /dev/null +++ b/src/backend/chat/transcription.py @@ -0,0 +1,134 @@ +"""Audio transcription helpers for chat attachments.""" + +from __future__ import annotations + +import asyncio +import logging +from urllib.parse import urljoin + +from django.conf import settings +from django.core.exceptions import ImproperlyConfigured +from django.core.files.storage import default_storage + +import requests +from asgiref.sync import sync_to_async + +from core.file_upload.enums import AttachmentStatus +from core.file_upload.utils import generate_retrieve_policy + +from chat.models import ChatConversationAttachment +from chat.webhook_models import WhisperXResponse + +logger = logging.getLogger(__name__) + + +def parse_whisper_response(whisper_data: WhisperXResponse) -> str: + """Convert WhisperX segments into a markdown transcript string.""" + out_str = "# Transcription \n" + last_speaker = None + for chunk in whisper_data.segments: + speaker = chunk.speaker or "Unknown" + if speaker != last_speaker: + out_str += f"\n## {speaker}\n" + last_speaker = speaker + out_str += f"{chunk.text}\n" + return out_str + + +def trigger_audio_transcription(attachment: ChatConversationAttachment) -> None: + """ + Trigger async transcription for an audio attachment. + + Posts to the transcription service, stores the returned job_id on the + attachment, and sets its status to TRANSCRIBING. + + Args: + attachment: ChatConversationAttachment instance with an audio content_type. + + Raises: + requests.HTTPError: If the transcription service returns an error. + """ + if not settings.STT_SERVICE_URL: + raise ImproperlyConfigured("STT_SERVICE_URL must be configured to use audio transcription.") + + presigned_url = generate_retrieve_policy(attachment.key) + + response = requests.post( + urljoin(settings.STT_SERVICE_URL, "async-jobs/transcribe"), + json={ + "user_sub": str(attachment.uploaded_by.sub), + "language": (attachment.uploaded_by.language or "fr-fr").split("-")[0], + "cloud_storage_url": presigned_url, + }, + headers={ + "Authorization": f"Bearer {settings.STT_SERVICE_API_KEY}", + }, + timeout=10, + ) + response.raise_for_status() + + data = response.json() + attachment.transcription_job_id = data["job_id"] + attachment.upload_state = AttachmentStatus.TRANSCRIBING + attachment.save(update_fields=["transcription_job_id", "upload_state", "updated_at"]) + + logger.info("Transcription job %s created for attachment %s", attachment.transcription_job_id, attachment.pk) + + +async def wait_for_transcript(attachment_key: str, conversation) -> str: + """ + Wait for an audio transcript to be ready and return the transcript text. + + Polls the DB every 10 seconds until the attachment reaches a terminal_states state + OR we reach the 20 minutes timeout. + + Args: + attachment_key: The S3 key of the audio attachment. + conversation: The ChatConversation instance the attachment belongs to. + + Returns: + The transcript text. + + Raises: + ValueError: If transcription times out or fails. + """ + poll_interval = 10.0 + timeout = 1200.0 + terminal_states = { + AttachmentStatus.READY, + AttachmentStatus.SUSPICIOUS, + AttachmentStatus.FILE_TOO_LARGE_TO_ANALYZE, + AttachmentStatus.TRANSCRIPTION_FAILED, + } + + attachment = await ChatConversationAttachment.objects.aget( + key=attachment_key, + conversation=conversation, + ) + elapsed = 0.0 + while attachment.upload_state not in terminal_states: + if elapsed >= timeout: + raise ValueError("The audio transcription took too long. Please try again.") + await asyncio.sleep(poll_interval) + elapsed += poll_interval + attachment = await ChatConversationAttachment.objects.aget( + key=attachment_key, + conversation=conversation, + ) + + if attachment.upload_state != AttachmentStatus.READY: + raise ValueError( + "The transcription of this audio failed. Please try again with another file." + ) + + text_attachment = await ChatConversationAttachment.objects.aget( + conversation=conversation, + conversion_from=attachment_key, + ) + + @sync_to_async + def read_transcript(): + with default_storage.open(text_attachment.key) as f: + return f.read().decode("utf-8") + + return await read_transcript() diff --git a/src/backend/chat/views.py b/src/backend/chat/views.py index 17297423..1ff201e2 100644 --- a/src/backend/chat/views.py +++ b/src/backend/chat/views.py @@ -2,6 +2,7 @@ import logging import os +from io import BytesIO from uuid import UUID, uuid4 from django.conf import settings @@ -14,9 +15,11 @@ import langfuse import magic import posthog +import requests as http_requests from drf_spectacular.utils import extend_schema from lasuite.malware_detection import malware_detection from lasuite.oidc_login.decorators import refresh_oidc_access_token +from pydantic import ValidationError as PydanticValidationError from rest_framework import decorators, filters, mixins, permissions, status, viewsets from rest_framework.exceptions import MethodNotAllowed, PermissionDenied, ValidationError from rest_framework.response import Response @@ -32,9 +35,17 @@ from activation_codes.permissions import IsActivatedUser from chat import models, serializers +from chat.authentication import AiWebhookAuthentication from chat.clients.pydantic_ai import AIAgentService from chat.keepalive import stream_with_keepalive_async, stream_with_keepalive_sync from chat.serializers import ChatConversationRequestSerializer +from chat.transcription import parse_whisper_response +from chat.webhook_models import ( + TranscribeWebhookFailurePayload, + TranscribeWebhookPendingPayload, + WhisperXResponse, + webhook_payload_adapter, +) logger = logging.getLogger(__name__) @@ -741,3 +752,71 @@ def perform_destroy(self, instance): """ instance.conversations.all().delete() instance.delete() + + +class TranscriptionWebhookView(APIView): + """Webhook endpoint that receives transcription results from the AI service.""" + + authentication_classes = [AiWebhookAuthentication] + permission_classes = [] + + def post(self, request): # pylint: disable=too-many-return-statements + """Handle transcription webhook payload.""" + try: + payload = webhook_payload_adapter.validate_python(request.data) + except PydanticValidationError: + return Response({"error": "Invalid payload"}, status=status.HTTP_400_BAD_REQUEST) + + job_id = payload.job_id + + try: + attachment = models.ChatConversationAttachment.objects.get(transcription_job_id=job_id) + except models.ChatConversationAttachment.DoesNotExist: + return Response({"error": "Attachment not found"}, status=status.HTTP_404_NOT_FOUND) + + if isinstance(payload, TranscribeWebhookFailurePayload): + attachment.upload_state = AttachmentStatus.TRANSCRIPTION_FAILED + attachment.save(update_fields=["upload_state", "updated_at"]) + logger.warning( + "Transcription job %s failed, marking attachment as TRANSCRIPTION_FAILED", + job_id, + ) + return Response({"status": "failed"}) + if isinstance(payload, TranscribeWebhookPendingPayload): + logger.warning( + "Transcription job %s is still in progress.", + job_id, + ) + return Response({"status": "ignored"}) + + try: + transcript_response = http_requests.get(payload.transcription_data_url, timeout=30) + transcript_response.raise_for_status() + whisper_data = WhisperXResponse.model_validate_json(transcript_response.content) + except Exception: # pylint: disable=broad-except + logger.exception("Failed to fetch or parse transcript for job %s", job_id) + return Response({"error": "Failed to fetch transcript"}, status=status.HTTP_200_OK) + + out_str = parse_whisper_response(whisper_data) + + # Save transcript to S3 first, then create the linked text attachment record. + # Writing S3 before the DB record ensures no row points to a missing object if + # the process crashes between the two writes. S3 overwrite is idempotent. + transcript_key = f"{attachment.conversation_id}/attachments/{attachment.file_name}.md" + default_storage.save(transcript_key, BytesIO(out_str.encode("utf-8"))) + models.ChatConversationAttachment.objects.get_or_create( + conversation=attachment.conversation, + conversion_from=attachment.key, + defaults={ + "uploaded_by": attachment.uploaded_by, + "key": transcript_key, + "file_name": f"{attachment.file_name}.md", + "content_type": "text/markdown", + "upload_state": AttachmentStatus.READY, + }, + ) + + attachment.upload_state = AttachmentStatus.READY + attachment.save(update_fields=["upload_state", "updated_at"]) + + return Response({"status": "ok"}) diff --git a/src/backend/chat/webhook_models.py b/src/backend/chat/webhook_models.py new file mode 100644 index 00000000..2e9aeeab --- /dev/null +++ b/src/backend/chat/webhook_models.py @@ -0,0 +1,69 @@ +"""Pydantic models for transcription webhook payloads.""" + +from typing import Annotated, Literal, Union + +from pydantic import BaseModel, Field, TypeAdapter + + +class WhisperXSegment(BaseModel): + """A single segment from a WhisperX transcription.""" + + start: float + end: float + text: str + words: list = Field(default_factory=list) + speaker: str | None = None + + +class WhisperXResponse(BaseModel): + """Full WhisperX transcription response.""" + + segments: list[WhisperXSegment] + word_segments: list = Field(default_factory=list) + + +class BaseWebhook(BaseModel): + """Base webhook payload.""" + + job_id: str = Field( + title="Job ID", + description="The ID of the job document in the receiver system.", + ) + + +class TranscribeWebhookSuccessPayload(BaseWebhook): + """Payload for a successful transcription webhook.""" + + type: Literal["transcript"] = Field(default="transcript") + status: Literal["success"] = Field(default="success") + transcription_data_url: str = Field( + title="Transcript", description="URL to the raw transcription data." + ) + + +class TranscribeWebhookPendingPayload(BaseWebhook): + """Payload for a pending transcription webhook-like response.""" + + type: Literal["transcript"] = Field(default="transcript") + status: Literal["pending"] = Field(default="pending") + + +class TranscribeWebhookFailurePayload(BaseWebhook): + """Payload for a failed transcription webhook.""" + + type: Literal["transcript"] = Field(default="transcript") + status: Literal["failure"] = Field(default="failure") + error_code: str = Field(title="Error code", description="The error code.") + + +TranscribeWebhookPayloads = Annotated[ + Union[ + TranscribeWebhookSuccessPayload, + TranscribeWebhookPendingPayload, + TranscribeWebhookFailurePayload, + ], + Field(discriminator="status"), +] + + +webhook_payload_adapter = TypeAdapter(TranscribeWebhookPayloads) diff --git a/src/backend/conversations/settings.py b/src/backend/conversations/settings.py index 1b5c0d3e..7ad6ee75 100755 --- a/src/backend/conversations/settings.py +++ b/src/backend/conversations/settings.py @@ -707,6 +707,15 @@ class Base(BraveSettings, Configuration): environ_prefix=None, ) + # Audio transcription service (meet/summary) + STT_SERVICE_URL = values.Value(None, environ_name="STT_SERVICE_URL", environ_prefix=None) + STT_SERVICE_API_KEY = values.Value( + None, environ_name="STT_SERVICE_API_KEY", environ_prefix=None + ) + STT_WEBHOOK_API_KEY = values.Value( + None, environ_name="STT_WEBHOOK_API_KEY", environ_prefix=None + ) + # Uploaded files RAG_FILES_ACCEPTED_FORMATS = values.ListValue( default=[ @@ -739,6 +748,14 @@ class Base(BraveSettings, Configuration): "image/gif", "image/webp", "application/vnd.oasis.opendocument.text", + # audio files supported by meet/transcribe + "audio/mp4", + "audio/x-m4a", + "audio/ogg", + "application/ogg", + "audio/webm", + "audio/opus", + "audio/wav", ], environ_name="RAG_FILES_ACCEPTED_FORMATS", environ_prefix=None, @@ -1299,6 +1316,9 @@ class Test(Base): AI_BASE_URL = None AI_API_KEY = None AI_MODEL = None + STT_SERVICE_URL = "http://ai-service.test/api/v2" + STT_SERVICE_API_KEY = "test-api-key" + STT_WEBHOOK_API_KEY = "test-webhook-key" POSTHOG_KEY = None @@ -1430,6 +1450,9 @@ class Feature(Production): nota bene: it should inherit from the Production environment. """ + # TLS is terminated at the nginx ingress; Django sees plain HTTP internally + SECURE_SSL_REDIRECT = False + class Staging(Production): """ diff --git a/src/backend/core/file_upload/enums.py b/src/backend/core/file_upload/enums.py index 6433dda1..87965bb0 100644 --- a/src/backend/core/file_upload/enums.py +++ b/src/backend/core/file_upload/enums.py @@ -14,6 +14,8 @@ class AttachmentStatus(StrEnum): ANALYZING = "analyzing" FILE_TOO_LARGE_TO_ANALYZE = "file_too_large_to_analyze" SUSPICIOUS = "suspicious" + TRANSCRIBING = "transcribing" + TRANSCRIPTION_FAILED = "transcription_failed" READY = "ready" @classmethod diff --git a/src/backend/core/file_upload/utils.py b/src/backend/core/file_upload/utils.py index 534e2d7b..c15e220e 100644 --- a/src/backend/core/file_upload/utils.py +++ b/src/backend/core/file_upload/utils.py @@ -13,6 +13,18 @@ logger = logging.getLogger(__name__) +def is_audio_content_type(content_type: str) -> bool: + """Return True if the content type represents an audio file. + + Covers standard audio/* types as well as container formats like application/ogg + that browsers may report for audio files. + """ + normalized = content_type.split(";")[0].strip().lower() + return normalized.startswith("audio/") or normalized in { + "application/ogg", + } + + def auth_get_original_url(request): """ Extracts and parses the original URL from the "HTTP_X_ORIGINAL_URL" header. diff --git a/src/backend/core/urls.py b/src/backend/core/urls.py index acf88627..3bcd2e4b 100644 --- a/src/backend/core/urls.py +++ b/src/backend/core/urls.py @@ -16,6 +16,7 @@ ChatViewSet, FileStreamView, LLMConfigurationView, + TranscriptionWebhookView, ) # - Main endpoints @@ -40,6 +41,11 @@ path( "llm-configuration/", LLMConfigurationView.as_view(), name="llm-configuration" ), + path( + "transcription-webhook/", + TranscriptionWebhookView.as_view(), + name="transcription-webhook", + ), path( "chats//", include(conversation_router.urls), diff --git a/src/frontend/apps/conversations/src/features/chat/components/MessageItem.tsx b/src/frontend/apps/conversations/src/features/chat/components/MessageItem.tsx index 0bd1f5ad..6cf2c65f 100644 --- a/src/frontend/apps/conversations/src/features/chat/components/MessageItem.tsx +++ b/src/frontend/apps/conversations/src/features/chat/components/MessageItem.tsx @@ -388,16 +388,24 @@ const MessageItemComponent: React.FC = ({ )} - {toolInvocationParts.map((part, partIndex) => - isCurrentlyStreaming && isLastAssistantMessage ? ( + {toolInvocationParts.map((part, partIndex) => { + const isErrorResult = + part.toolInvocation.state === 'result' && + ( + part.toolInvocation.result as { state?: string } | undefined + )?.state === 'error'; + const showDuringStream = + isCurrentlyStreaming && isLastAssistantMessage; + if (!showDuringStream && !isErrorResult) return null; + return ( - ) : null, - )} + ); + })} {message.role === 'assistant' && @@ -504,11 +512,16 @@ const arePropsEqual = ( } // Check parts changes (for streaming tool invocations and sources) - const prevPartsLength = prevProps.message.parts?.length ?? 0; - const nextPartsLength = nextProps.message.parts?.length ?? 0; - if (prevPartsLength !== nextPartsLength) { + const prevParts = prevProps.message.parts ?? []; + const nextParts = nextProps.message.parts ?? []; + if (prevParts.length !== nextParts.length) { return false; } + for (let i = 0; i < prevParts.length; i++) { + if (prevParts[i] !== nextParts[i]) { + return false; + } + } // Check attachments const prevAttachmentsLength = diff --git a/src/frontend/apps/conversations/src/features/chat/components/ToolInvocationItem.tsx b/src/frontend/apps/conversations/src/features/chat/components/ToolInvocationItem.tsx index 1f5a2319..7617e31d 100644 --- a/src/frontend/apps/conversations/src/features/chat/components/ToolInvocationItem.tsx +++ b/src/frontend/apps/conversations/src/features/chat/components/ToolInvocationItem.tsx @@ -18,15 +18,39 @@ export const ToolInvocationItem: React.FC = ({ const { t } = useTranslation(); if (toolInvocation.toolName === 'document_parsing') { - if ( - toolInvocation.state === 'partial-call' || - toolInvocation.state === 'result' - ) { + if (toolInvocation.state === 'partial-call') { return null; } - const documents: unknown = (toolInvocation.args as { documents: unknown }) - ?.documents; + if (toolInvocation.state === 'result') { + const result = toolInvocation.result as { state: string; error?: string }; + if (result?.state !== 'error') { + return null; + } + return ( + + + {result.error ?? + t('An error occurred while processing the document.')} + + + ); + } + + const args = toolInvocation.args as { + documents: unknown; + has_audio?: boolean; + }; + const documents: unknown = args?.documents; + const hasAudio = args?.has_audio ?? false; const documentIdentifiers: string[] = Array.isArray(documents) && documents.every( @@ -48,9 +72,13 @@ export const ToolInvocationItem: React.FC = ({ > - {t('Extracting documents: {{documents}} ...', { - documents: documentIdentifiers.join(', '), - })} + {hasAudio + ? t('Waiting for audio transcript: {{documents}} ...', { + documents: documentIdentifiers.join(', '), + }) + : t('Extracting documents: {{documents}} ...', { + documents: documentIdentifiers.join(', '), + })} ); diff --git a/src/helm/env.d/dev/values.conversations.yaml.gotmpl b/src/helm/env.d/dev/values.conversations.yaml.gotmpl index a83f6e54..676a900e 100644 --- a/src/helm/env.d/dev/values.conversations.yaml.gotmpl +++ b/src/helm/env.d/dev/values.conversations.yaml.gotmpl @@ -13,8 +13,7 @@ backend: replicas: 1 envVars: DJANGO_SECRET_KEY: ThisIsAnExampleKeyForDevPurposeOnly - OIDC_RP_CLIENT_SECRET: ThisIsAnExampleKeyForDevPurposeOnly - AWS_S3_SECRET_ACCESS_KEY: password + OIDC_RP_CLIENT_SECRET: ThisIsAnExampleKeyForDevPurposeOnly AI_BASE_URL: secretKeyRef: name: secret-dev @@ -35,6 +34,33 @@ backend: secretKeyRef: name: secret-dev key: BRAVE_API_KEY + STT_SERVICE_URL: + secretKeyRef: + name: secret-dev + key: STT_SERVICE_URL + STT_SERVICE_API_KEY: + secretKeyRef: + name: secret-dev + key: STT_SERVICE_API_KEY + STT_WEBHOOK_API_KEY: + secretKeyRef: + name: secret-dev + key: STT_WEBHOOK_API_KEY + LANGFUSE_SECRET_KEY: + secretKeyRef: + name: secret-dev + key: LANGFUSE_SECRET_KEY + LANGFUSE_PUBLIC_KEY: + secretKeyRef: + name: secret-dev + key: LANGFUSE_PUBLIC_KEY + LANGFUSE_HOST: + secretKeyRef: + name: secret-dev + key: LANGFUSE_HOST + LANGFUSE_ENABLED: true + LANGFUSE_DEBUG: true + LANGFUSE_MEDIA_UPLOAD_ENABLED: true COLLABORATION_SERVER_SECRET: my-secret DJANGO_CSRF_TRUSTED_ORIGINS: https://conversations.127.0.0.1.nip.io DJANGO_CONFIGURATION: Feature @@ -78,6 +104,7 @@ backend: AI_MODEL: mistral-medium-2508 AWS_S3_ENDPOINT_URL: http://minio.conversations.svc.cluster.local:9000 AWS_S3_ACCESS_KEY_ID: conversations + AWS_S3_SECRET_ACCESS_KEY: password AWS_S3_DOMAIN_REPLACE: https://minio-conversations.127.0.0.1.nip.io AWS_STORAGE_BUCKET_NAME: conversations-media-storage STORAGES_STATICFILES_BACKEND: django.contrib.staticfiles.storage.StaticFilesStorage @@ -85,6 +112,7 @@ backend: FEATURE_FLAG_WEB_SEARCH: ENABLED FEATURE_FLAG_DOCUMENT_UPLOAD: ENABLED LLM_CONFIGURATION_FILE_PATH: /app/conversations/configuration/llm/custom_llm_configuration.json + ATTACHMENT_MAX_SIZE: "104857600" migrate: command: - "/bin/sh"