From 9030a22b763dbb8a55b0c0dd267094ac568f0135 Mon Sep 17 00:00:00 2001 From: Marcin Antas Date: Thu, 2 Nov 2023 21:47:13 +0100 Subject: [PATCH 1/4] Add Raft configuration --- .cicd/test.sh | 4 ++++ weaviate/templates/_helpers.tpl | 19 +++++++++++++++++++ weaviate/templates/weaviateStatefulset.yaml | 2 ++ weaviate/values.yaml | 8 ++++++++ 4 files changed, 33 insertions(+) diff --git a/.cicd/test.sh b/.cicd/test.sh index 5878ead2..1bcabd27 100755 --- a/.cicd/test.sh +++ b/.cicd/test.sh @@ -166,6 +166,10 @@ function check_creates_template() { check_string_existence "--set modules.text2vec-aws.enabled=true --set modules.text2vec-aws.envSecrets.AWS_ACCESS_KEY_ID=key --set modules.text2vec-aws.envSecrets.AWS_SECRET_ACCESS_KEY=secret" "name: AWS_ACCESS_KEY_ID" check_string_existence "--set modules.text2vec-aws.enabled=true --set modules.text2vec-aws.envSecrets.AWS_ACCESS_KEY_ID=key --set modules.text2vec-aws.envSecrets.AWS_SECRET_ACCESS_KEY=secret" "name: AWS_SECRET_ACCESS_KEY" + check_setting_has_value "--set replicas=3" "name: RAFT_JOIN" "value: \"weaviate-0,weaviate-1,weaviate-2\"" + check_setting_has_value "--set replicas=1" "name: RAFT_JOIN" "value: \"weaviate-0\"" + check_setting_has_value "--set replicas=4" "name: RAFT_BOOTSTRAP_EXPECT" "value: \"4\"" + _settingPassageQueryOn="--set modules.text2vec-contextionary.enabled=false --set modules.text2vec-transformers.passageQueryServices.passage.enabled=true --set modules.text2vec-transformers.passageQueryServices.query.enabled=true" check_setting_has_value "$_settingPassageQueryOn" "name: TRANSFORMERS_PASSAGE_INFERENCE_API" "value: http://transformers-inference-passage.default.svc.cluster.local.:8080" check_setting_has_value "$_settingPassageQueryOn" "name: TRANSFORMERS_QUERY_INFERENCE_API" "value: http://transformers-inference-query.default.svc.cluster.local.:8080" diff --git a/weaviate/templates/_helpers.tpl b/weaviate/templates/_helpers.tpl index c332b07b..6e96f74e 100644 --- a/weaviate/templates/_helpers.tpl +++ b/weaviate/templates/_helpers.tpl @@ -158,3 +158,22 @@ Usage: {{- printf "priorityClassName: %s" $priorityClassName -}} {{- end -}} {{- end -}} + + +{{/* +Raft cluster configuration settings +*/}} +{{- define "raft_configuration" -}} + {{- $replicas := .Values.replicas | int -}} + {{- $nodes := list -}} + {{- range $i := until $replicas -}} + {{- $node_name := list -}} + {{- $node_name = append $node_name "weaviate" -}} + {{- $node_name = append $node_name $i -}} + {{- $nodes = append $nodes (join "-" $node_name) -}} + {{- end }} + - name: RAFT_JOIN + value: "{{ join "," $nodes }}" + - name: RAFT_BOOTSTRAP_EXPECT + value: {{ $replicas | quote }} +{{- end -}} diff --git a/weaviate/templates/weaviateStatefulset.yaml b/weaviate/templates/weaviateStatefulset.yaml index 1eacfe71..f035526d 100644 --- a/weaviate/templates/weaviateStatefulset.yaml +++ b/weaviate/templates/weaviateStatefulset.yaml @@ -9,6 +9,7 @@ metadata: app.kubernetes.io/managed-by: helm spec: replicas: {{ .Values.replicas }} + podManagementPolicy: {{ .Values.podManagementPolicy }} updateStrategy: {{ toYaml .Values.updateStrategy | indent 4}} serviceName: {{ .Values.service.name }}-headless @@ -98,6 +99,7 @@ spec: - name: DEFAULT_VECTORIZER_MODULE value: {{ index .Values "modules" "default_vectorizer_module" | trim }} {{ template "enabled_modules" . }} + {{ template "raft_configuration" . }} {{- if index .Values "modules" "text2vec-transformers" "enabled" }} - name: TRANSFORMERS_INFERENCE_API value: http://{{ index .Values "modules" "text2vec-transformers" "fullnameOverride" }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}:8080 diff --git a/weaviate/values.yaml b/weaviate/values.yaml index 9d6145b4..8eff2cd5 100644 --- a/weaviate/values.yaml +++ b/weaviate/values.yaml @@ -62,6 +62,10 @@ initContainers: # to cases where no data is imported yet. Scaling down after importing data may # break usability. Full dynamic scalability will be added in a future release. replicas: 1 +# Define how pods will be created. Possible values: OrderedReady | Parallel +# OrderedReady - pods will be created one after another +# Parallel - all pods will be created at once +podManagementPolicy: Parallel updateStrategy: type: RollingUpdate resources: {} @@ -230,6 +234,10 @@ env: PROMETHEUS_MONITORING_ENABLED: false PROMETHEUS_MONITORING_GROUP: false + # Raft settings are generated automatically by "raft_configuration" template + # Set Raft cluster bootstrap timeout (in seconds), default is 10 (seconds) + # RAFT_BOOTSTRAP_TIMEOUT: 30 + # Set a MEM limit for the Weaviate Pod so it can help you both increase GC-related # performance as well as avoid GC-related out-of-memory (“OOM”) situations # GOMEMLIMIT: 6GiB From a94486d52852d7e322dfb7bbc30ca7bf949c2553 Mon Sep 17 00:00:00 2001 From: Redouan El Rhazouani <81578195+redouan-rhazouani@users.noreply.github.com> Date: Mon, 11 Dec 2023 10:49:51 +0100 Subject: [PATCH 2/4] Configure cluster with both voting and non-voting members Use BootstrapExpect to specify the number of voters --- weaviate/templates/_helpers.tpl | 6 ++---- weaviate/values.yaml | 9 ++++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/weaviate/templates/_helpers.tpl b/weaviate/templates/_helpers.tpl index 6e96f74e..d4745b5d 100644 --- a/weaviate/templates/_helpers.tpl +++ b/weaviate/templates/_helpers.tpl @@ -164,9 +164,9 @@ Usage: Raft cluster configuration settings */}} {{- define "raft_configuration" -}} - {{- $replicas := .Values.replicas | int -}} + {{- $voters := .Values.env.RAFT_BOOTSTRAP_EXPECT | int -}} {{- $nodes := list -}} - {{- range $i := until $replicas -}} + {{- range $i := until $voters -}} {{- $node_name := list -}} {{- $node_name = append $node_name "weaviate" -}} {{- $node_name = append $node_name $i -}} @@ -174,6 +174,4 @@ Raft cluster configuration settings {{- end }} - name: RAFT_JOIN value: "{{ join "," $nodes }}" - - name: RAFT_BOOTSTRAP_EXPECT - value: {{ $replicas | quote }} {{- end -}} diff --git a/weaviate/values.yaml b/weaviate/values.yaml index 8eff2cd5..d1723cbf 100644 --- a/weaviate/values.yaml +++ b/weaviate/values.yaml @@ -7,8 +7,8 @@ image: # of weaviate. In accordance with Infra-as-code, you should pin this value # down and only change it if you explicitly want to upgrade the Weaviate # version. - tag: 1.22.5 - repo: semitechnologies/weaviate + tag: latest + repo: library/module_test_image_raft # Image pull policy: https://kubernetes.io/docs/concepts/containers/images/#image-pull-policy pullPolicy: IfNotPresent pullSecrets: [] @@ -62,6 +62,7 @@ initContainers: # to cases where no data is imported yet. Scaling down after importing data may # break usability. Full dynamic scalability will be added in a future release. replicas: 1 + # Define how pods will be created. Possible values: OrderedReady | Parallel # OrderedReady - pods will be created one after another # Parallel - all pods will be created at once @@ -236,7 +237,9 @@ env: # Raft settings are generated automatically by "raft_configuration" template # Set Raft cluster bootstrap timeout (in seconds), default is 10 (seconds) - # RAFT_BOOTSTRAP_TIMEOUT: 30 + RAFT_BOOTSTRAP_TIMEOUT: 30 + RAFT_BOOTSTRAP_EXPECT: 1 + # Set a MEM limit for the Weaviate Pod so it can help you both increase GC-related # performance as well as avoid GC-related out-of-memory (“OOM”) situations From 76f31af0b7f4213a7e3dd23cefe1f2b945e10c07 Mon Sep 17 00:00:00 2001 From: Marcin Antas Date: Mon, 11 Dec 2023 12:49:59 +0100 Subject: [PATCH 3/4] Adjust RAFT settings generation --- .cicd/test.sh | 11 ++++++++--- weaviate/templates/_helpers.tpl | 25 ++++++++++++++++++------- weaviate/values.yaml | 26 ++++++++++++++++++-------- 3 files changed, 44 insertions(+), 18 deletions(-) diff --git a/.cicd/test.sh b/.cicd/test.sh index 1bcabd27..78f8e10f 100755 --- a/.cicd/test.sh +++ b/.cicd/test.sh @@ -166,9 +166,14 @@ function check_creates_template() { check_string_existence "--set modules.text2vec-aws.enabled=true --set modules.text2vec-aws.envSecrets.AWS_ACCESS_KEY_ID=key --set modules.text2vec-aws.envSecrets.AWS_SECRET_ACCESS_KEY=secret" "name: AWS_ACCESS_KEY_ID" check_string_existence "--set modules.text2vec-aws.enabled=true --set modules.text2vec-aws.envSecrets.AWS_ACCESS_KEY_ID=key --set modules.text2vec-aws.envSecrets.AWS_SECRET_ACCESS_KEY=secret" "name: AWS_SECRET_ACCESS_KEY" - check_setting_has_value "--set replicas=3" "name: RAFT_JOIN" "value: \"weaviate-0,weaviate-1,weaviate-2\"" - check_setting_has_value "--set replicas=1" "name: RAFT_JOIN" "value: \"weaviate-0\"" - check_setting_has_value "--set replicas=4" "name: RAFT_BOOTSTRAP_EXPECT" "value: \"4\"" + check_setting_has_value "--set replicas=3 --set env.RAFT_BOOTSTRAP_EXPECT=3" "name: RAFT_JOIN" "value: \"weaviate-0,weaviate-1,weaviate-2\"" + check_setting_has_value "--set replicas=8 --set env.RAFT_BOOTSTRAP_EXPECT=3" "name: RAFT_JOIN" "value: \"weaviate-0,weaviate-1,weaviate-2\"" + check_setting_has_value "--set replicas=1 --set env.RAFT_BOOTSTRAP_EXPECT=1" "name: RAFT_JOIN" "value: \"weaviate-0\"" + check_setting_has_value "--set replicas=4 --set env.RAFT_BOOTSTRAP_EXPECT=4" "name: RAFT_BOOTSTRAP_EXPECT" "value: \"4\"" + check_setting_has_value "--set replicas=4 --set env.RAFT_BOOTSTRAP_EXPECT=4" "name: RAFT_JOIN" "value: \"weaviate-0,weaviate-1,weaviate-2,weaviate-3\"" + check_setting_has_value "--set replicas=10 --set env.RAFT_BOOTSTRAP_EXPECT=4" "name: RAFT_JOIN" "value: \"weaviate-0,weaviate-1,weaviate-2,weaviate-3\"" + check_setting_has_value "--set replicas=10 --set env.RAFT_JOIN=weaviate-100" "name: RAFT_JOIN" "value: \"weaviate-100\"" + check_setting_has_value "--set replicas=10 --set env.RAFT_JOIN=weaviate-100 --set env.RAFT_BOOTSTRAP_EXPECT=1" "name: RAFT_JOIN" "value: \"weaviate-100\"" _settingPassageQueryOn="--set modules.text2vec-contextionary.enabled=false --set modules.text2vec-transformers.passageQueryServices.passage.enabled=true --set modules.text2vec-transformers.passageQueryServices.query.enabled=true" check_setting_has_value "$_settingPassageQueryOn" "name: TRANSFORMERS_PASSAGE_INFERENCE_API" "value: http://transformers-inference-passage.default.svc.cluster.local.:8080" diff --git a/weaviate/templates/_helpers.tpl b/weaviate/templates/_helpers.tpl index d4745b5d..e2f89675 100644 --- a/weaviate/templates/_helpers.tpl +++ b/weaviate/templates/_helpers.tpl @@ -164,14 +164,25 @@ Usage: Raft cluster configuration settings */}} {{- define "raft_configuration" -}} + {{- $replicas := .Values.replicas | int -}} {{- $voters := .Values.env.RAFT_BOOTSTRAP_EXPECT | int -}} - {{- $nodes := list -}} - {{- range $i := until $voters -}} - {{- $node_name := list -}} - {{- $node_name = append $node_name "weaviate" -}} - {{- $node_name = append $node_name $i -}} - {{- $nodes = append $nodes (join "-" $node_name) -}} - {{- end }} + {{- if gt $voters $replicas -}} + {{- fail "env.RAFT_BOOTSTRAP_EXPECT value cannot be greater than replicas value" -}} + {{- end -}} + {{- if empty .Values.env.RAFT_JOIN -}} + {{- $nodes := list -}} + {{- range $i := until $voters -}} + {{- $node_name := list -}} + {{- $node_name = append $node_name "weaviate" -}} + {{- $node_name = append $node_name $i -}} + {{- $nodes = append $nodes (join "-" $node_name) -}} + {{- end -}} - name: RAFT_JOIN value: "{{ join "," $nodes }}" + {{- else -}} + {{- $votersCount := len (split "," .Values.env.RAFT_JOIN) -}} + {{- if not (eq $votersCount $voters) -}} + {{- fail "env.RAFT_BOOTSTRAP_EXPECT value needs to be equal to number of env.RAFT_JOIN nodes" -}} + {{- end -}} + {{- end -}} {{- end -}} diff --git a/weaviate/values.yaml b/weaviate/values.yaml index d1723cbf..5ed5c6bf 100644 --- a/weaviate/values.yaml +++ b/weaviate/values.yaml @@ -7,8 +7,8 @@ image: # of weaviate. In accordance with Infra-as-code, you should pin this value # down and only change it if you explicitly want to upgrade the Weaviate # version. - tag: latest - repo: library/module_test_image_raft + tag: 1.22.6 + repo: semitechnologies/weaviate # Image pull policy: https://kubernetes.io/docs/concepts/containers/images/#image-pull-policy pullPolicy: IfNotPresent pullSecrets: [] @@ -228,6 +228,22 @@ debug: false env: CLUSTER_GOSSIP_BIND_PORT: 7000 CLUSTER_DATA_BIND_PORT: 7001 + + # Set RAFT cluster expected number of voter nodes at bootstrap + RAFT_BOOTSTRAP_EXPECT: 1 + + # Set RAFT cluster bootstrap timeout (in seconds), default is 10 (seconds) + # RAFT_BOOTSTRAP_TIMEOUT: 10 + + # Set manually RAFT voter nodes. + # RAFT_JOIN value is automatically generated by "raft_configuration" + # template, but if someone wants to set this value manually then it can be done + # by setting RAFT_JOIN environment variable, example: RAFT_JOIN: "weaviate-0,weaviate-1" + # Please notice that in this case RAFT_BOOTSTRAP_EXPECT setting needs to be also adjusted manually + # to match the number of RAFT voters, so if there are 2 nodes set using RAFT_JOIN variable + # then RAFT_BOOTSTRAP_EXPECT needs to be equal 2 also. + # RAFT_JOIN: "weaviate-0" + # The aggressiveness of the Go Garbage Collector. 100 is the default value. GOGC: 100 @@ -235,12 +251,6 @@ env: PROMETHEUS_MONITORING_ENABLED: false PROMETHEUS_MONITORING_GROUP: false - # Raft settings are generated automatically by "raft_configuration" template - # Set Raft cluster bootstrap timeout (in seconds), default is 10 (seconds) - RAFT_BOOTSTRAP_TIMEOUT: 30 - RAFT_BOOTSTRAP_EXPECT: 1 - - # Set a MEM limit for the Weaviate Pod so it can help you both increase GC-related # performance as well as avoid GC-related out-of-memory (“OOM”) situations # GOMEMLIMIT: 6GiB From 39f9994e156c0624b0de749038159006d56d1eb0 Mon Sep 17 00:00:00 2001 From: Redouan El Rhazouani <81578195+redouan-rhazouani@users.noreply.github.com> Date: Wed, 13 Dec 2023 17:11:42 +0100 Subject: [PATCH 4/4] RAFT: add election an snapshot configurations --- weaviate/values.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/weaviate/values.yaml b/weaviate/values.yaml index 5ed5c6bf..f983aaf7 100644 --- a/weaviate/values.yaml +++ b/weaviate/values.yaml @@ -231,7 +231,12 @@ env: # Set RAFT cluster expected number of voter nodes at bootstrap RAFT_BOOTSTRAP_EXPECT: 1 - + # RAFT_HEARTBEAT_TIMEOUT: 1 + # RAFT_ELECTION_TIMEOUT: 1 + # Control how often raft checks if it should perform a snapshot + # RAFT_SNAPSHOT_INTERVAL: 120 + # Number of outstanding log entries before performing a snapshot + # RAFT_SNAPSHOT_THRESHOLD: 8192 # Set RAFT cluster bootstrap timeout (in seconds), default is 10 (seconds) # RAFT_BOOTSTRAP_TIMEOUT: 10