From 901619b1eafc9e382b63972d7c8c6aed7f1dd9a9 Mon Sep 17 00:00:00 2001 From: Jose Luis Franco Arza Date: Mon, 6 Apr 2026 16:55:41 +0200 Subject: [PATCH 1/3] Add support for collection export. Adds a new option, collectionExport, in the values which allows enabling collection export. Also, exposes the env var EXPORT_DEFAULT_BUCKET which points at the right bucket to export collections in. --- .cicd/test.sh | 7 +++++++ weaviate/templates/weaviateStatefulset.yaml | 9 +++++++++ weaviate/values.yaml | 8 ++++++++ 3 files changed, 24 insertions(+) diff --git a/.cicd/test.sh b/.cicd/test.sh index 42a593a..7815065 100755 --- a/.cicd/test.sh +++ b/.cicd/test.sh @@ -435,5 +435,12 @@ function check_creates_template() { check_string_existence "--set readinessProbe.probeType=exec --set readinessProbe.probe.exec.command={test-probe-cmd}" "command:" check_string_existence "--set readinessProbe.probeType=exec --set readinessProbe.probe.exec.command={test-probe-cmd}" "test-probe-cmd" + # Collection export tests + check_no_setting "" "name: EXPORT_ENABLED" + check_no_setting "" "name: EXPORT_DEFAULT_BUCKET" + check_setting_has_value "--set collectionExport.enabled=true" "name: EXPORT_ENABLED" "value: \"true\"" + check_setting_has_value "--set collectionExport.enabled=true" "name: EXPORT_DEFAULT_BUCKET" "value: \"weaviate-export\"" + check_setting_has_value "--set collectionExport.enabled=true --set collectionExport.envconfig.EXPORT_DEFAULT_BUCKET=my-custom-bucket" "name: EXPORT_DEFAULT_BUCKET" "value: \"my-custom-bucket\"" + echo "Tests successful." ) diff --git a/weaviate/templates/weaviateStatefulset.yaml b/weaviate/templates/weaviateStatefulset.yaml index c1c7fbb..832892a 100644 --- a/weaviate/templates/weaviateStatefulset.yaml +++ b/weaviate/templates/weaviateStatefulset.yaml @@ -494,6 +494,15 @@ spec: - name: MCP_SERVER_CONFIG_PATH value: "/mcp-config/mcp-config.yaml" {{- end }} + {{- if index .Values "collectionExport" "enabled" }} + - name: EXPORT_ENABLED + value: "true" + {{- if index .Values "collectionExport" "envconfig" }} + {{- range $key, $value := index .Values "collectionExport" "envconfig" }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + {{- end }} {{- end }} - name: CLUSTER_JOIN value: {{ .Values.service.name }}-headless.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }} diff --git a/weaviate/values.yaml b/weaviate/values.yaml index ccc3768..a33eafc 100644 --- a/weaviate/values.yaml +++ b/weaviate/values.yaml @@ -607,6 +607,14 @@ backups: # AZURE_STORAGE_CONNECTION_STRING: name-of-the-k8s-secret-containing-connection-string +# Configure collection export +collectionExport: + enabled: false + envconfig: + # Configure bucket where exports should be saved, this setting is mandatory. + # The bucket must exist before enabling collection export, otherwise exports will fail. + EXPORT_DEFAULT_BUCKET: weaviate-export + # modules are extensions to Weaviate, they can be used to support various # ML-models, but also other features unrelated to model inference. # An inference/vectorizer module is not required, you can also run without any From 14b8c5210c0a020b3b1ee4780492774e90e994eb Mon Sep 17 00:00:00 2001 From: Jose Luis Franco Arza Date: Wed, 8 Apr 2026 10:42:29 +0200 Subject: [PATCH 2/3] Add support for EXPORT_DEFAULT_PATH. A new env var got added to specify the PATH. Adding support on the helm charts. --- .cicd/test.sh | 2 ++ weaviate/values.yaml | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/.cicd/test.sh b/.cicd/test.sh index 7815065..45d6cf5 100755 --- a/.cicd/test.sh +++ b/.cicd/test.sh @@ -441,6 +441,8 @@ function check_creates_template() { check_setting_has_value "--set collectionExport.enabled=true" "name: EXPORT_ENABLED" "value: \"true\"" check_setting_has_value "--set collectionExport.enabled=true" "name: EXPORT_DEFAULT_BUCKET" "value: \"weaviate-export\"" check_setting_has_value "--set collectionExport.enabled=true --set collectionExport.envconfig.EXPORT_DEFAULT_BUCKET=my-custom-bucket" "name: EXPORT_DEFAULT_BUCKET" "value: \"my-custom-bucket\"" + check_no_setting "--set collectionExport.enabled=true" "name: EXPORT_DEFAULT_PATH" + check_setting_has_value "--set collectionExport.enabled=true --set collectionExport.envconfig.EXPORT_DEFAULT_PATH=path/inside/bucket" "name: EXPORT_DEFAULT_PATH" "value: \"path/inside/bucket\"" echo "Tests successful." ) diff --git a/weaviate/values.yaml b/weaviate/values.yaml index a33eafc..f49185c 100644 --- a/weaviate/values.yaml +++ b/weaviate/values.yaml @@ -615,6 +615,10 @@ collectionExport: # The bucket must exist before enabling collection export, otherwise exports will fail. EXPORT_DEFAULT_BUCKET: weaviate-export + # Optional setting. Defaults to empty string. + # Set this option if you want to save exports to a given path inside the bucket. + # EXPORT_DEFAULT_PATH: path/inside/bucket + # modules are extensions to Weaviate, they can be used to support various # ML-models, but also other features unrelated to model inference. # An inference/vectorizer module is not required, you can also run without any From c6d90d39d28ecbc01726b9ac86baf1cf37d0e647 Mon Sep 17 00:00:00 2001 From: Jose Luis Franco Arza Date: Thu, 9 Apr 2026 10:11:09 +0200 Subject: [PATCH 3/3] Add support for EXPORT_PARALLELISM and fix pre-existing test failures - Add EXPORT_PARALLELISM commented-out env var to collectionExport config - Add tests for EXPORT_PARALLELISM (absent by default, settable via envconfig) - Fix pre-existing test failures for TRANSFORMERS_PASSAGE/QUERY_INFERENCE_API by passing --namespace default so .Release.Namespace resolves to "default" instead of a random UUID generated by helm template - Make EXPORT_DEFAULT_PATH optional and adapt tests. Co-Authored-By: Claude Sonnet 4.6 --- .cicd/test.sh | 9 ++++++--- weaviate/templates/weaviateStatefulset.yaml | 1 + weaviate/values.yaml | 10 +++++++--- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/.cicd/test.sh b/.cicd/test.sh index 45d6cf5..afcbaea 100755 --- a/.cicd/test.sh +++ b/.cicd/test.sh @@ -357,12 +357,12 @@ function check_creates_template() { check_setting_has_value "--set replicas=9 --set env.RAFT_METADATA_ONLY_VOTERS=true" "name: RAFT_METADATA_ONLY_VOTERS" "value: \"true\"" check_setting_has_value "--set replicas=3 --set env.RAFT_METADATA_ONLY_VOTERS=false" "name: RAFT_BOOTSTRAP_EXPECT" "value: \"3\"" - _settingPassageQueryOn="--set modules.text2vec-contextionary.enabled=false --set modules.text2vec-transformers.passageQueryServices.passage.enabled=true --set modules.text2vec-transformers.passageQueryServices.query.enabled=true" + _settingPassageQueryOn="--namespace default --set modules.text2vec-contextionary.enabled=false --set modules.text2vec-transformers.passageQueryServices.passage.enabled=true --set modules.text2vec-transformers.passageQueryServices.query.enabled=true" check_setting_has_value "$_settingPassageQueryOn" "name: TRANSFORMERS_PASSAGE_INFERENCE_API" "value: http://transformers-inference-passage.default.svc.cluster.local.:8080" check_setting_has_value "$_settingPassageQueryOn" "name: TRANSFORMERS_QUERY_INFERENCE_API" "value: http://transformers-inference-query.default.svc.cluster.local.:8080" check_no_setting "$_settingPassageQueryOn" "name: TRANSFORMERS_INFERENCE_API" - _settingPassageQueryOff="--set modules.text2vec-contextionary.enabled=false --set modules.text2vec-transformers.enabled=true" + _settingPassageQueryOff="--namespace default --set modules.text2vec-contextionary.enabled=false --set modules.text2vec-transformers.enabled=true" check_setting_has_value "$_settingPassageQueryOff" "name: TRANSFORMERS_INFERENCE_API" "value: http://transformers-inference.default.svc.cluster.local.:8080" check_no_setting "$_settingPassageQueryOff" "name: TRANSFORMERS_PASSAGE_INFERENCE_API" check_no_setting "$_settingPassageQueryOff" "name: TRANSFORMERS_QUERY_INFERENCE_API" @@ -438,11 +438,14 @@ function check_creates_template() { # Collection export tests check_no_setting "" "name: EXPORT_ENABLED" check_no_setting "" "name: EXPORT_DEFAULT_BUCKET" + check_no_setting "" "name: EXPORT_DEFAULT_PATH" check_setting_has_value "--set collectionExport.enabled=true" "name: EXPORT_ENABLED" "value: \"true\"" check_setting_has_value "--set collectionExport.enabled=true" "name: EXPORT_DEFAULT_BUCKET" "value: \"weaviate-export\"" check_setting_has_value "--set collectionExport.enabled=true --set collectionExport.envconfig.EXPORT_DEFAULT_BUCKET=my-custom-bucket" "name: EXPORT_DEFAULT_BUCKET" "value: \"my-custom-bucket\"" - check_no_setting "--set collectionExport.enabled=true" "name: EXPORT_DEFAULT_PATH" + check_setting_has_value "--set collectionExport.enabled=true" "name: EXPORT_DEFAULT_PATH" "value: \"\"" check_setting_has_value "--set collectionExport.enabled=true --set collectionExport.envconfig.EXPORT_DEFAULT_PATH=path/inside/bucket" "name: EXPORT_DEFAULT_PATH" "value: \"path/inside/bucket\"" + check_no_setting "--set collectionExport.enabled=true" "name: EXPORT_PARALLELISM" + check_setting_has_value "--set collectionExport.enabled=true --set collectionExport.envconfig.EXPORT_PARALLELISM=4" "name: EXPORT_PARALLELISM" "value: \"4\"" echo "Tests successful." ) diff --git a/weaviate/templates/weaviateStatefulset.yaml b/weaviate/templates/weaviateStatefulset.yaml index 832892a..117ef5a 100644 --- a/weaviate/templates/weaviateStatefulset.yaml +++ b/weaviate/templates/weaviateStatefulset.yaml @@ -494,6 +494,7 @@ spec: - name: MCP_SERVER_CONFIG_PATH value: "/mcp-config/mcp-config.yaml" {{- end }} + {{- end }} {{- if index .Values "collectionExport" "enabled" }} - name: EXPORT_ENABLED value: "true" diff --git a/weaviate/values.yaml b/weaviate/values.yaml index f49185c..711ea06 100644 --- a/weaviate/values.yaml +++ b/weaviate/values.yaml @@ -615,9 +615,13 @@ collectionExport: # The bucket must exist before enabling collection export, otherwise exports will fail. EXPORT_DEFAULT_BUCKET: weaviate-export - # Optional setting. Defaults to empty string. - # Set this option if you want to save exports to a given path inside the bucket. - # EXPORT_DEFAULT_PATH: path/inside/bucket + # Required setting. Bucket path in which to save exports. Defaults to empty string. + # Set this option if you want to save exports to a given path inside the bucket. Must be a valid bucket path. + EXPORT_DEFAULT_PATH: "" + + # Optional setting. Defaults to 0 (GOMAXPROCS at runtime). + # Set this option to control the number of concurrent scan workers per export. + # EXPORT_PARALLELISM: 0 # modules are extensions to Weaviate, they can be used to support various # ML-models, but also other features unrelated to model inference.