diff --git a/.github/workflows/enterprise-patching.yaml b/.github/workflows/enterprise-patching.yaml deleted file mode 100644 index ff5ad6ed3..000000000 --- a/.github/workflows/enterprise-patching.yaml +++ /dev/null @@ -1,129 +0,0 @@ -name: Patch and Retag Images - -on: - workflow_dispatch: - workflow_run: - workflows: ["Migrate Images to QUAY"] - types: - - completed - branches: - - main - -jobs: - generate-matrix: - runs-on: ubuntu-latest - if: ${{ github.event.workflow_run.conclusion == 'success' || github.event_name == 'workflow_dispatch' }} - outputs: - images: ${{ steps.generate-matrix.outputs.images }} - steps: - - name: Checkout Repository - uses: actions/checkout@v4 - - - name: Generate Matrix - id: generate-matrix - run: | - images=$(jq -r '.[]' .original-images.json | jq -R -s -c 'split("\n") | map(select(length > 0))') - echo "images=$images" >> $GITHUB_OUTPUT - - patch-and-retag: - needs: generate-matrix - runs-on: ubuntu-latest - strategy: - matrix: - image: ${{ fromJson(needs.generate-matrix.outputs.images) }} - fail-fast: false - steps: - - name: Checkout Repository - uses: actions/checkout@v4 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - with: - driver: docker-container - driver-opts: | - image=moby/buildkit:master - network=host - - - name: Install Copacetic - run: | - wget https://github.com/project-copacetic/copacetic/releases/download/v0.9.0/copa_0.9.0_linux_amd64.tar.gz - tar -xzf copa_0.9.0_linux_amd64.tar.gz - chmod +x copa - sudo mv copa /usr/local/bin/ - - - name: Install Trivy - run: | - TRIVY_VERSION="0.55.0" - wget https://github.com/aquasecurity/trivy/releases/download/v${TRIVY_VERSION}/trivy_${TRIVY_VERSION}_Linux-64bit.tar.gz - tar -xzf trivy_${TRIVY_VERSION}_Linux-64bit.tar.gz - chmod +x trivy - sudo mv trivy /usr/local/bin/ - - - name: Login to Quay.io - uses: docker/login-action@v3 - with: - registry: quay.io - username: ${{ secrets.QUAY_USER }} - password: ${{ secrets.QUAY_TOKEN }} - - - name: Process Image - run: | - sudo apt-get update && sudo apt-get install -y jq python3-pip - image="${{ matrix.image }}" - echo "Processing $image" - base_name=$(echo "$image" | awk -F'/' '{print $NF}' | cut -d':' -f1) - tag=$(echo "$image" | awk -F':' '{print $NF}') - new_image="quay.io/rackspace/rackerlabs-${base_name}:${tag}" - patched_tag="${tag}-enterprise" - patched_image="quay.io/rackspace/rackerlabs-${base_name}:${patched_tag}" - - # Pull the image - docker pull "$new_image" || { echo "Failed to pull $new_image"; exit 1; } - - # Scan all vulnerabilities (OS and language-specific) - trivy image -f json -o "report-${base_name}-${tag}.json" "$new_image" || { echo "Failed to scan $new_image"; exit 1; } - - # Scan OS vulnerabilities with fixes for Copacetic - trivy image --vuln-type os --ignore-unfixed -f json -o "os-report-${base_name}-${tag}.json" "$new_image" || { echo "Failed to scan OS vulnerabilities for $new_image"; exit 1; } - - # Attempt to patch OS vulnerabilities; set intermediate image - if copa patch -i "$new_image" -r "os-report-${base_name}-${tag}.json" -t "$patched_tag"; then - echo "Patched OS vulnerabilities in $new_image" - intermediate_image="$patched_image" - else - echo "No OS vulnerabilities patched for $new_image" - intermediate_image="$new_image" - fi - - # Filter cve/requirements.txt to only update installed packages - docker run --rm -v "$(pwd):/output" "$intermediate_image" sh -c "/var/lib/openstack/bin/pip3 list --format=json > /output/installed.json 2>/dev/null || echo '[]' > /output/installed.json" - python3 cve/filter.py - - if [ -s "filtered-requirements.txt" ]; then - echo "Applying Python package updates from cve/requirements.txt" - echo "FROM $intermediate_image" > Dockerfile.temp - echo "COPY filtered-requirements.txt /tmp/filtered-requirements.txt" >> Dockerfile.temp - echo "RUN /var/lib/openstack/bin/pip3 install -r /tmp/filtered-requirements.txt" >> Dockerfile.temp - docker build -f Dockerfile.temp -t "$patched_image" . || { echo "Failed to build $patched_image with Python patches"; exit 1; } - intermediate_image="$patched_image" - else - echo "No Python packages updated from cve/requirements.txt" - fi - - # Flatten the image - echo "Flattening $patched_image" - container_id=$(docker create "$intermediate_image") - docker export "$container_id" > "flattened-${base_name}-${patched_tag}.tar" - docker import "flattened-${base_name}-${patched_tag}.tar" "$patched_image" - docker rm "$container_id" - rm "flattened-${base_name}-${patched_tag}.tar" - - # Push the flattened image - docker push "$patched_image" || { echo "Failed to push $patched_image"; exit 1; } - echo "Pushed $patched_image" - - # Clean up - rm -f "report-${base_name}-${tag}.json" "os-report-${base_name}-${tag}.json" filtered-requirements.txt Dockerfile.temp installed.json requirements.txt - -env: - DOCKER_CLI_EXPERIMENTAL: enabled diff --git a/.github/workflows/release-cinder-netapp.yml b/.github/workflows/release-cinder-netapp.yml deleted file mode 100644 index abb3b491a..000000000 --- a/.github/workflows/release-cinder-netapp.yml +++ /dev/null @@ -1,111 +0,0 @@ -# -name: Create and publish a Cinder RXT compatible image - -# Configures this workflow to run every time a change is pushed to the branch called `release`. -on: - workflow_dispatch: - inputs: - imageTag: - description: 'Set tag for the image' - required: true - default: 'master-ubuntu_jammy' - type: choice - options: - - master-ubuntu_jammy - - 2023.1-ubuntu_jammy - - 2023.2-ubuntu_jammy - - 2024.1-ubuntu_jammy - -# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. -env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} - -# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. -jobs: - build-and-push-image: - outputs: - MY_DATE: ${{ steps.mydate.outputs.MY_DATE }} - MY_CONTAINER: ${{ steps.mycontainer.outputs.MY_CONTAINER }} - runs-on: ubuntu-latest - # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. - permissions: - contents: read - packages: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. - - name: Log in to the Container registry - uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - # ghcr only allows lowercase repository names - - name: lowercase repo name - run: | - echo "IMAGE_NAME=${GITHUB_REPOSITORY,,}" >>${GITHUB_ENV} - # This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels. - - name: Extract metadata (tags, labels) for Docker - id: meta - uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 - with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - # This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages. - # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository. - # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step. - - name: Dynamically set MY_DATE environment variable - run: echo "MY_DATE=$(date +%s)" >> $GITHUB_ENV - - name: Build and push Docker image - uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 - with: - context: . - file: Containerfiles/Cinder-volume-netapp-Containerfile - push: true - tags: | - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/cinder-volume-rxt:${{ github.event.inputs.imageTag }} - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/cinder-volume-rxt:${{ github.event.inputs.imageTag }}-${{ env.MY_DATE }} - labels: ${{ steps.meta.outputs.labels }} - build-args: | - VERSION=${{ github.event.inputs.imageTag }} - - name: Dynamically set MY_CONTAINER output option - id: mycontainer - run: echo "MY_CONTAINER=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/cinder:${{ github.event.inputs.imageTag }}-${{ env.MY_DATE }}" >> $GITHUB_OUTPUT - - name: Dynamically set MY_DATE output option - id: mydate - run: echo "MY_DATE=${{ env.MY_DATE }}" >> $GITHUB_OUTPUT - - change-original-images: - runs-on: ubuntu-latest - needs: [build-and-push-image] - permissions: - contents: write - pull-requests: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Dynamically update the original images file - run: jq '. + ["${{ needs.build-and-push-image.outputs.MY_CONTAINER }}"] | sort' .original-images.json | tee .original-images.json.new - - name: Rewrite original images file - run: mv .original-images.json.new .original-images.json - - name: Create Pull Request - id: cpr - uses: peter-evans/create-pull-request@v7 - with: - commit-message: Update original images with new container - committer: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> - author: ${{ github.actor }} <${{ github.actor_id }}+${{ github.actor }}@users.noreply.github.com> - signoff: false - branch: ${{ needs.build-and-push-image.outputs.MY_DATE }} - sign-commits: true - delete-branch: true - title: 'chore: Update original images' - body: | - Update container image - - Updated original image file with container ${{needs.build-and-push-image.outputs.MY_CONTAINER}} - change request Auto-generated - labels: | - container images - automated pr - draft: false diff --git a/.github/workflows/release-glance.yml b/.github/workflows/release-glance.yml deleted file mode 100644 index a92b36f1a..000000000 --- a/.github/workflows/release-glance.yml +++ /dev/null @@ -1,118 +0,0 @@ -# -name: Create and publish a Glance compatible image - -# Configures this workflow to run every time a change is pushed to the branch called `release`. -on: - workflow_dispatch: - inputs: - imageTag: - description: 'Set tag for the image' - required: true - default: 'master-ubuntu_jammy' - type: choice - options: - - master-ubuntu_jammy - - 2023.1-ubuntu_jammy - - 2023.2-ubuntu_jammy - - 2024.1-ubuntu_jammy - pluginTag: - description: 'Set release used for the build environment' - required: true - default: 'master' - type: choice - options: - - "master" - - "2023.1" - - "2023.2" - - "2024.1" - -# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. -env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} - -# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. -jobs: - build-and-push-image: - outputs: - MY_DATE: ${{ steps.mydate.outputs.MY_DATE }} - MY_CONTAINER: ${{ steps.mycontainer.outputs.MY_CONTAINER }} - runs-on: ubuntu-latest - # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. - permissions: - contents: read - packages: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. - - name: Log in to the Container registry - uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - # This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels. - - name: Extract metadata (tags, labels) for Docker - id: meta - uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 - with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - # This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages. - # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository. - # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step. - - name: Dynamically set MY_DATE environment variable - run: echo "MY_DATE=$(date +%s)" >> $GITHUB_ENV - - name: Build and push Docker image - uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 - with: - context: . - file: Containerfiles/Glance-Containerfile - push: true - tags: | - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/glance:${{ github.event.inputs.imageTag }} - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/glance:${{ github.event.inputs.imageTag }}-${{ env.MY_DATE }} - labels: ${{ steps.meta.outputs.labels }} - build-args: | - VERSION=${{ github.event.inputs.imageTag }} - PLUGIN_VERSION=${{ github.event.inputs.pluginTag }} - - name: Dynamically set MY_CONTAINER output option - id: mycontainer - run: echo "MY_CONTAINER=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/glance:${{ github.event.inputs.imageTag }}-${{ env.MY_DATE }}" >> $GITHUB_OUTPUT - - name: Dynamically set MY_DATE output option - id: mydate - run: echo "MY_DATE=${{ env.MY_DATE }}" >> $GITHUB_OUTPUT - - change-original-images: - runs-on: ubuntu-latest - needs: [build-and-push-image] - permissions: - contents: write - pull-requests: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Dynamically update the original images file - run: jq '. + ["${{ needs.build-and-push-image.outputs.MY_CONTAINER }}"] | sort' .original-images.json | tee .original-images.json.new - - name: Rewrite original images file - run: mv .original-images.json.new .original-images.json - - name: Create Pull Request - id: cpr - uses: peter-evans/create-pull-request@v7 - with: - commit-message: Update original images with new container - committer: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> - author: ${{ github.actor }} <${{ github.actor_id }}+${{ github.actor }}@users.noreply.github.com> - signoff: false - branch: ${{ needs.build-and-push-image.outputs.MY_DATE }} - sign-commits: true - delete-branch: true - title: 'chore: Update original images' - body: | - Update container image - - Updated original image file with container ${{needs.build-and-push-image.outputs.MY_CONTAINER}} - change request Auto-generated - labels: | - container images - automated pr - draft: false diff --git a/.github/workflows/release-heat-rxt.yml b/.github/workflows/release-heat-rxt.yml deleted file mode 100644 index 4b1a2af45..000000000 --- a/.github/workflows/release-heat-rxt.yml +++ /dev/null @@ -1,115 +0,0 @@ -# -name: Create and Publish a Heat Image - -on: - push: - paths: - - '.github/workflows/release-heat-rxt.yml' - - 'Containerfiles/HeatRXT-Containerfile' - branches: - - development - - main - workflow_dispatch: - inputs: - imageTag: - description: 'Set tag for the image' - required: true - default: '2024.1-ubuntu_jammy' - type: choice - options: - - 2024.1-ubuntu_jammy - -# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. -env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} - DEF_TAG_NAME: 2024.1-ubuntu_jammy - -# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. -jobs: - build-and-push-image: - outputs: - MY_DATE: ${{ steps.mydate.outputs.MY_DATE }} - MY_CONTAINER: ${{ steps.mycontainer.outputs.MY_CONTAINER }} - runs-on: ubuntu-latest - # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. - permissions: - contents: read - packages: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. - - name: Log in to the Container registry - uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - # ghcr only allows lowercase repository names - - name: lowercase repo name - run: | - echo "IMAGE_NAME=${GITHUB_REPOSITORY,,}" >>${GITHUB_ENV} - # This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels. - - name: Extract metadata (tags, labels) for Docker - id: meta - uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 - with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - # This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages. - # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository. - # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step. - - name: Dynamically set MY_DATE environment variable - run: echo "MY_DATE=$(date +%s)" >> $GITHUB_ENV - - name: Build and push Docker image - uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 - with: - context: . - file: Containerfiles/HeatRXT-Containerfile - push: true - tags: | - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/heat:${{ github.event.inputs.imageTag || env.DEF_TAG_NAME }} - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/heat:${{ github.event.inputs.imageTag || env.DEF_TAG_NAME }}-${{ env.MY_DATE }} - labels: ${{ steps.meta.outputs.labels }} - build-args: | - VERSION=${{ github.event.inputs.imageTag || env.DEF_TAG_NAME }} - - name: Dynamically set MY_CONTAINER output option - id: mycontainer - run: echo "MY_CONTAINER=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/heat:${{ github.event.inputs.imageTag }}-${{ env.MY_DATE }}" >> $GITHUB_OUTPUT - - name: Dynamically set MY_DATE output option - id: mydate - run: echo "MY_DATE=${{ env.MY_DATE }}" >> $GITHUB_OUTPUT - - change-original-images: - runs-on: ubuntu-latest - needs: [build-and-push-image] - permissions: - contents: write - pull-requests: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Dynamically update the original images file - run: jq '. + ["${{ needs.build-and-push-image.outputs.MY_CONTAINER }}"] | sort' .original-images.json | tee .original-images.json.new - - name: Rewrite original images file - run: mv .original-images.json.new .original-images.json - - name: Create Pull Request - id: cpr - uses: peter-evans/create-pull-request@v7 - with: - commit-message: Update original images with new container - committer: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> - author: ${{ github.actor }} <${{ github.actor_id }}+${{ github.actor }}@users.noreply.github.com> - signoff: false - branch: ${{ needs.build-and-push-image.outputs.MY_DATE }} - sign-commits: true - delete-branch: true - title: 'chore: Update original images' - body: | - Update container image - - Updated original image file with container ${{needs.build-and-push-image.outputs.MY_CONTAINER}} - change request Auto-generated - labels: | - container images - automated pr - draft: false diff --git a/.github/workflows/release-horizon-rxt.yml b/.github/workflows/release-horizon-rxt.yml deleted file mode 100644 index 9d7f7d8b6..000000000 --- a/.github/workflows/release-horizon-rxt.yml +++ /dev/null @@ -1,118 +0,0 @@ -# -name: Create and publish a Horizon RXT compatible image - -# Configures this workflow to run every time a change is pushed to the branch called `release`. -on: - workflow_dispatch: - inputs: - imageTag: - description: 'Set tag for the image' - required: true - default: 'master-ubuntu_jammy' - type: choice - options: - - master-ubuntu_jammy - - 2023.1-ubuntu_jammy - - 2023.2-ubuntu_jammy - - 2024.1-ubuntu_jammy - pluginTag: - description: 'Set release used for the build environment' - required: true - default: 'master' - type: choice - options: - - "master" - - "2023.1" - - "2023.2" - - "2024.1" - -# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. -env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} - -# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. -jobs: - build-and-push-image: - outputs: - MY_DATE: ${{ steps.mydate.outputs.MY_DATE }} - MY_CONTAINER: ${{ steps.mycontainer.outputs.MY_CONTAINER }} - runs-on: ubuntu-latest - # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. - permissions: - contents: read - packages: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. - - name: Log in to the Container registry - uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - # This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels. - - name: Extract metadata (tags, labels) for Docker - id: meta - uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 - with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - # This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages. - # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository. - # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step. - - name: Dynamically set MY_DATE environment variable - run: echo "MY_DATE=$(date +%s)" >> $GITHUB_ENV - - name: Build and push Docker image - uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 - with: - context: . - file: Containerfiles/HorizonRXT-Containerfile - push: true - tags: | - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/horizon-rxt:${{ github.event.inputs.pluginTag }} - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/horizon-rxt:${{ github.event.inputs.pluginTag }}-${{ env.MY_DATE }} - labels: ${{ steps.meta.outputs.labels }} - build-args: | - VERSION=${{ github.event.inputs.imageTag }} - PLUGIN_VERSION=${{ github.event.inputs.pluginTag }} - - name: Dynamically set MY_CONTAINER output option - id: mycontainer - run: echo "MY_CONTAINER=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/horizon:${{ github.event.inputs.imageTag }}-${{ env.MY_DATE }}" >> $GITHUB_OUTPUT - - name: Dynamically set MY_DATE output option - id: mydate - run: echo "MY_DATE=${{ env.MY_DATE }}" >> $GITHUB_OUTPUT - - change-original-images: - runs-on: ubuntu-latest - needs: [build-and-push-image] - permissions: - contents: write - pull-requests: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Dynamically update the original images file - run: jq '. + ["${{ needs.build-and-push-image.outputs.MY_CONTAINER }}"] | sort' .original-images.json | tee .original-images.json.new - - name: Rewrite original images file - run: mv .original-images.json.new .original-images.json - - name: Create Pull Request - id: cpr - uses: peter-evans/create-pull-request@v7 - with: - commit-message: Update original images with new container - committer: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> - author: ${{ github.actor }} <${{ github.actor_id }}+${{ github.actor }}@users.noreply.github.com> - signoff: false - branch: ${{ needs.build-and-push-image.outputs.MY_DATE }} - sign-commits: true - delete-branch: true - title: 'chore: Update original images' - body: | - Update container image - - Updated original image file with container ${{needs.build-and-push-image.outputs.MY_CONTAINER}} - change request Auto-generated - labels: | - container images - automated pr - draft: false diff --git a/.github/workflows/release-magnum-rxt.yml b/.github/workflows/release-magnum-rxt.yml deleted file mode 100644 index 36f58d782..000000000 --- a/.github/workflows/release-magnum-rxt.yml +++ /dev/null @@ -1,124 +0,0 @@ -# -name: Create and Publish a Magnum Image - -on: - push: - paths: - - .github/workflows/release-magnum-rxt.yml - - Containerfiles/MagnumRXT-Containerfile - branches: - - development - - main - workflow_dispatch: - inputs: - imageTag: - description: Set tag for the image - required: true - default: 2024.1-ubuntu_jammy - type: choice - options: - - master - - 2024.1-ubuntu_jammy - pluginTag: - description: 'Set release used for the build environment' - required: true - default: master - type: choice - options: - - master - -# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. -env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} - DEF_TAG_NAME: 2024.1-ubuntu_jammy - -# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. -jobs: - build-and-push-image: - outputs: - MY_DATE: ${{ steps.mydate.outputs.MY_DATE }} - MY_CONTAINER: ${{ steps.mycontainer.outputs.MY_CONTAINER }} - runs-on: ubuntu-latest - # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. - permissions: - contents: read - packages: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. - - name: Log in to the Container registry - uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - # ghcr only allows lowercase repository names - - name: lowercase repo name - run: | - echo "IMAGE_NAME=${GITHUB_REPOSITORY,,}" >>${GITHUB_ENV} - # This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels. - - name: Extract metadata (tags, labels) for Docker - id: meta - uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 - with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - # This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages. - # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository. - # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step. - - name: Dynamically set MY_DATE environment variable - run: echo "MY_DATE=$(date +%s)" >> $GITHUB_ENV - - name: Build and push Docker image - uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 - with: - context: . - file: Containerfiles/MagnumRXT-Containerfile - push: true - tags: | - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/magnum:${{ github.event.inputs.imageTag || env.DEF_TAG_NAME }} - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/magnum:${{ github.event.inputs.imageTag || env.DEF_TAG_NAME }}-${{ env.MY_DATE }} - labels: ${{ steps.meta.outputs.labels }} - build-args: | - VERSION=${{ github.event.inputs.imageTag || env.DEF_TAG_NAME }} - PLUGIN_VERSION=${{ github.event.inputs.pluginTag }} - - name: Dynamically set MY_CONTAINER output option - id: mycontainer - run: echo "MY_CONTAINER=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/magnum:${{ github.event.inputs.imageTag }}-${{ env.MY_DATE }}" >> $GITHUB_OUTPUT - - name: Dynamically set MY_DATE output option - id: mydate - run: echo "MY_DATE=${{ env.MY_DATE }}" >> $GITHUB_OUTPUT - - change-original-images: - runs-on: ubuntu-latest - needs: [build-and-push-image] - permissions: - contents: write - pull-requests: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Dynamically update the original images file - run: jq '. + ["${{ needs.build-and-push-image.outputs.MY_CONTAINER }}"] | sort' .original-images.json | tee .original-images.json.new - - name: Rewrite original images file - run: mv .original-images.json.new .original-images.json - - name: Create Pull Request - id: cpr - uses: peter-evans/create-pull-request@v7 - with: - commit-message: Update original images with new container - committer: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> - author: ${{ github.actor }} <${{ github.actor_id }}+${{ github.actor }}@users.noreply.github.com> - signoff: false - branch: ${{ needs.build-and-push-image.outputs.MY_DATE }} - sign-commits: true - delete-branch: true - title: 'chore: Update original images' - body: | - Update container image - - Updated original image file with container ${{needs.build-and-push-image.outputs.MY_CONTAINER}} - change request Auto-generated - labels: | - container images - automated pr - draft: false diff --git a/.github/workflows/release-neutron-oslodb.yaml b/.github/workflows/release-neutron-oslodb.yaml deleted file mode 100644 index 3386e9221..000000000 --- a/.github/workflows/release-neutron-oslodb.yaml +++ /dev/null @@ -1,123 +0,0 @@ -# -name: Create and publish a Neutron oslodb patched image - -# Configures this workflow to run every time a change is pushed to the branch called `release`. -on: - workflow_dispatch: - inputs: - imageTag: - description: 'Set tag for the image' - required: true - default: 'master-ubuntu_jammy' - type: choice - options: - - master-ubuntu_jammy - - 2024.1-ubuntu_jammy - pluginTag: - description: 'Set release used for the build environment' - required: true - default: 'master' - type: choice - options: - - "master" - - "2024.1" - NeutronTag: - description: 'Set Neutron version' - required: true - default: 'sync-add-mode' - type: choice - options: - - 'sync-add-mode' - - 'sync-add-mode-2024.1' - -# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. -env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} - -# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. -jobs: - build-and-push-image: - outputs: - MY_DATE: ${{ steps.mydate.outputs.MY_DATE }} - MY_CONTAINER: ${{ steps.mycontainer.outputs.MY_CONTAINER }} - runs-on: ubuntu-latest - # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. - permissions: - contents: read - packages: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. - - name: Log in to the Container registry - uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - # This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels. - - name: Extract metadata (tags, labels) for Docker - id: meta - uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 - with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - # This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages. - # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository. - # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step. - - name: Dynamically set MY_DATE environment variable - run: echo "MY_DATE=$(date +%s)" >> $GITHUB_ENV - - name: Build and push Docker image - uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 - with: - context: . - file: Containerfiles/Neutron-oslo_db-Containerfile - push: true - tags: | - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/neutron-oslodb:${{ github.event.inputs.imageTag }} - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/neutron-oslodb:${{ github.event.inputs.imageTag }}-${{ env.MY_DATE }} - labels: ${{ steps.meta.outputs.labels }} - build-args: | - VERSION=${{ github.event.inputs.imageTag }} - PLUGIN_VERSION=${{ github.event.inputs.pluginTag }} - NEUTRON_VERSION=${{ github.event.inputs.NeutronTag }} - - name: Dynamically set MY_CONTAINER output option - id: mycontainer - run: echo "MY_CONTAINER=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/neutron-oslodb:${{ github.event.inputs.imageTag }}-${{ env.MY_DATE }}" >> $GITHUB_OUTPUT - - name: Dynamically set MY_DATE output option - id: mydate - run: echo "MY_DATE=${{ env.MY_DATE }}" >> $GITHUB_OUTPUT - - change-original-images: - runs-on: ubuntu-latest - needs: [build-and-push-image] - permissions: - contents: write - pull-requests: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Dynamically update the original images file - run: jq '. + ["${{ needs.build-and-push-image.outputs.MY_CONTAINER }}"] | sort' .original-images.json | tee .original-images.json.new - - name: Rewrite original images file - run: mv .original-images.json.new .original-images.json - - name: Create Pull Request - id: cpr - uses: peter-evans/create-pull-request@v7 - with: - commit-message: Update original images with new container - committer: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> - author: ${{ github.actor }} <${{ github.actor_id }}+${{ github.actor }}@users.noreply.github.com> - signoff: false - branch: ${{ needs.build-and-push-image.outputs.MY_DATE }} - sign-commits: true - delete-branch: true - title: 'chore: Update original images' - body: | - Update container image - - Updated original image file with container ${{needs.build-and-push-image.outputs.MY_CONTAINER}} - change request Auto-generated - labels: | - container images - automated pr - draft: false diff --git a/.github/workflows/release-nova-oslodb.yaml b/.github/workflows/release-nova-oslodb.yaml deleted file mode 100644 index d8c54a984..000000000 --- a/.github/workflows/release-nova-oslodb.yaml +++ /dev/null @@ -1,119 +0,0 @@ -# -name: Create and publish a Nova oslodb patched image - -# Configures this workflow to run every time a change is pushed to the branch called `release`. -on: - workflow_dispatch: - inputs: - imageTag: - description: 'Set tag for the image' - required: true - default: 'master-ubuntu_jammy' - type: choice - options: - - master-ubuntu_jammy - - 2023.1-ubuntu_jammy - - 2023.2-ubuntu_jammy - - 2024.1-ubuntu_jammy - pluginTag: - description: 'Set release used for the build environment' - required: true - default: 'master' - type: choice - options: - - "master" - - "2023.1" - - "2023.2" - - "2024.1" - -# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. -env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} - -# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. -jobs: - build-and-push-image: - outputs: - MY_DATE: ${{ steps.mydate.outputs.MY_DATE }} - MY_CONTAINER: ${{ steps.mycontainer.outputs.MY_CONTAINER }} - runs-on: ubuntu-latest - # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. - permissions: - contents: read - packages: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. - - name: Log in to the Container registry - uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - # This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels. - - name: Extract metadata (tags, labels) for Docker - id: meta - uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 - with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - # This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages. - # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository. - # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step. - - name: Dynamically set MY_DATE environment variable - run: echo "MY_DATE=$(date +%s)" >> $GITHUB_ENV - - name: Build and push Docker image - uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 - with: - context: . - file: Containerfiles/Nova-oslo_db-Containerfile - push: true - tags: | - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/nova-oslodb:${{ github.event.inputs.imageTag }} - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/nova-oslodb:${{ github.event.inputs.imageTag }}-${{ env.MY_DATE }} - labels: ${{ steps.meta.outputs.labels }} - build-args: | - VERSION=${{ github.event.inputs.imageTag }} - PLUGIN_VERSION=${{ github.event.inputs.pluginTag }} - - name: Dynamically set MY_CONTAINER output option - id: mycontainer - run: echo "MY_CONTAINER=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/nova-oslodb:${{ github.event.inputs.imageTag }}-${{ env.MY_DATE }}" >> $GITHUB_OUTPUT - - name: Dynamically set MY_DATE output option - id: mydate - run: echo "MY_DATE=${{ env.MY_DATE }}" >> $GITHUB_OUTPUT - - - change-original-images: - runs-on: ubuntu-latest - needs: [build-and-push-image] - permissions: - contents: write - pull-requests: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Dynamically update the original images file - run: jq '. + ["${{ needs.build-and-push-image.outputs.MY_CONTAINER }}"] | sort' .original-images.json | tee .original-images.json.new - - name: Rewrite original images file - run: mv .original-images.json.new .original-images.json - - name: Create Pull Request - id: cpr - uses: peter-evans/create-pull-request@v7 - with: - commit-message: Update original images with new container - committer: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> - author: ${{ github.actor }} <${{ github.actor_id }}+${{ github.actor }}@users.noreply.github.com> - signoff: false - branch: ${{ needs.build-and-push-image.outputs.MY_DATE }} - sign-commits: true - delete-branch: true - title: 'chore: Update original images' - body: | - Update container image - - Updated original image file with container ${{needs.build-and-push-image.outputs.MY_CONTAINER}} - change request Auto-generated - labels: | - container images - automated pr - draft: false diff --git a/.github/workflows/release-nova-uefi.yml b/.github/workflows/release-nova-uefi.yml deleted file mode 100644 index 8a6b88397..000000000 --- a/.github/workflows/release-nova-uefi.yml +++ /dev/null @@ -1,118 +0,0 @@ -# -name: Create and publish a the Nova EFI compatible image - -# Configures this workflow to run every time a change is pushed to the branch called `release`. -on: - workflow_dispatch: - inputs: - imageTag: - description: 'Set tag for the image' - required: true - default: 'master-ubuntu_jammy' - type: choice - options: - - master-ubuntu_jammy - - 2023.1-ubuntu_jammy - - 2023.2-ubuntu_jammy - - 2024.1-ubuntu_jammy - pluginTag: - description: 'Set release used for the build environment' - required: true - default: 'master' - type: choice - options: - - "master" - - "2023.1" - - "2023.2" - - "2024.1" - -# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. -env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} - -# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. -jobs: - build-and-push-image: - outputs: - MY_DATE: ${{ steps.mydate.outputs.MY_DATE }} - MY_CONTAINER: ${{ steps.mycontainer.outputs.MY_CONTAINER }} - runs-on: ubuntu-latest - # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. - permissions: - contents: read - packages: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. - - name: Log in to the Container registry - uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - # This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels. - - name: Extract metadata (tags, labels) for Docker - id: meta - uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 - with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - # This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages. - # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository. - # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step. - - name: Dynamically set MY_DATE environment variable - run: echo "MY_DATE=$(date +%s)" >> $GITHUB_ENV - - name: Build and push Docker image - uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 - with: - context: . - file: Containerfiles/NovaEFI-Containerfile - push: true - tags: | - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/nova-efi:${{ github.event.inputs.imageTag }} - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/nova-efi:${{ github.event.inputs.imageTag }}-${{ env.MY_DATE }} - labels: ${{ steps.meta.outputs.labels }} - build-args: | - VERSION=${{ github.event.inputs.imageTag }} - PLUGIN_VERSION=${{ github.event.inputs.pluginTag }} - - name: Dynamically set MY_CONTAINER output option - id: mycontainer - run: echo "MY_CONTAINER=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/nova-efi:${{ github.event.inputs.imageTag }}-${{ env.MY_DATE }}" >> $GITHUB_OUTPUT - - name: Dynamically set MY_DATE output option - id: mydate - run: echo "MY_DATE=${{ env.MY_DATE }}" >> $GITHUB_OUTPUT - - change-original-images: - runs-on: ubuntu-latest - needs: [build-and-push-image] - permissions: - contents: write - pull-requests: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Dynamically update the original images file - run: jq '. + ["${{ needs.build-and-push-image.outputs.MY_CONTAINER }}"] | sort' .original-images.json | tee .original-images.json.new - - name: Rewrite original images file - run: mv .original-images.json.new .original-images.json - - name: Create Pull Request - id: cpr - uses: peter-evans/create-pull-request@v7 - with: - commit-message: Update original images with new container - committer: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> - author: ${{ github.actor }} <${{ github.actor_id }}+${{ github.actor }}@users.noreply.github.com> - signoff: false - branch: ${{ needs.build-and-push-image.outputs.MY_DATE }} - sign-commits: true - delete-branch: true - title: 'chore: Update original images' - body: | - Update container image - - Updated original image file with container ${{needs.build-and-push-image.outputs.MY_CONTAINER}} - change request Auto-generated - labels: | - container images - automated pr - draft: false diff --git a/.github/workflows/release-octavia-ovn.yml b/.github/workflows/release-octavia-ovn.yml deleted file mode 100644 index 1221fa84b..000000000 --- a/.github/workflows/release-octavia-ovn.yml +++ /dev/null @@ -1,124 +0,0 @@ -name: Create and publish an Octavia compatible image - -# Configures this workflow to run every time a change is pushed to the branch called `release`. -on: - workflow_dispatch: - inputs: - imageTag: - description: 'Set tag for the image' - required: true - default: 'master-ubuntu_jammy' - type: choice - options: - - master-ubuntu_jammy - - 2024.1-ubuntu_jammy - pluginTag: - description: 'Set release used for the build environment' - required: true - default: 'master' - type: choice - options: - - "master" - - "2024.1" - ovnPluginTag: - description: 'Set OVN plugin version' - required: true - default: 'master' - type: choice - options: - - 'master' - - '5.0.0' - - '6.0.0' - - '7.0.0' - -# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. -env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} - -# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. -jobs: - build-and-push-image: - outputs: - MY_DATE: ${{ steps.mydate.outputs.MY_DATE }} - MY_CONTAINER: ${{ steps.mycontainer.outputs.MY_CONTAINER }} - runs-on: ubuntu-latest - # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. - permissions: - contents: read - packages: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. - - name: Log in to the Container registry - uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - # This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels. - - name: Extract metadata (tags, labels) for Docker - id: meta - uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 - with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - # This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages. - # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository. - # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step. - - name: Dynamically set MY_DATE environment variable - run: echo "MY_DATE=$(date +%s)" >> $GITHUB_ENV - - name: Build and push Docker image - uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 - with: - context: . - file: Containerfiles/OctaviaOVN-Containerfile - push: true - tags: | - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/octavia-ovn:${{ github.event.inputs.imageTag }} - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/octavia-ovn:${{ github.event.inputs.imageTag }}-${{ env.MY_DATE }} - labels: ${{ steps.meta.outputs.labels }} - build-args: | - VERSION=${{ github.event.inputs.imageTag }} - PLUGIN_VERSION=${{ github.event.inputs.pluginTag }} - OVN_PLUGIN_VERSION=${{ github.event.inputs.ovnPluginTag }} - - name: Dynamically set MY_CONTAINER output option - id: mycontainer - run: echo "MY_CONTAINER=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/nova-efi:${{ github.event.inputs.imageTag }}-${{ env.MY_DATE }}" >> $GITHUB_OUTPUT - - name: Dynamically set MY_DATE output option - id: mydate - run: echo "MY_DATE=${{ env.MY_DATE }}" >> $GITHUB_OUTPUT - - change-original-images: - runs-on: ubuntu-latest - needs: [build-and-push-image] - permissions: - contents: write - pull-requests: write - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Dynamically update the original images file - run: jq '. + ["${{ needs.build-and-push-image.outputs.MY_CONTAINER }}"] | sort' .original-images.json | tee .original-images.json.new - - name: Rewrite original images file - run: mv .original-images.json.new .original-images.json - - name: Create Pull Request - id: cpr - uses: peter-evans/create-pull-request@v7 - with: - commit-message: Update original images with new container - committer: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> - author: ${{ github.actor }} <${{ github.actor_id }}+${{ github.actor }}@users.noreply.github.com> - signoff: false - branch: ${{ needs.build-and-push-image.outputs.MY_DATE }} - sign-commits: true - delete-branch: true - title: 'chore: Update original images' - body: | - Update container image - - Updated original image file with container ${{needs.build-and-push-image.outputs.MY_CONTAINER}} - change request Auto-generated - labels: | - container images - automated pr - draft: false diff --git a/.github/workflows/smoke-cinder-netapp.yml b/.github/workflows/smoke-cinder-netapp.yml deleted file mode 100644 index 36dc23b92..000000000 --- a/.github/workflows/smoke-cinder-netapp.yml +++ /dev/null @@ -1,45 +0,0 @@ -# -name: Run build check for the Cinder Volume Netapp RXT compatible image - -on: - pull_request: - paths: - - Containerfiles/Cinder-volume-netapp-Containerfile - -# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. -env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} - -# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. -jobs: - build-and-push-image: - runs-on: ubuntu-latest - # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. - permissions: - contents: read - packages: read - steps: - - name: Checkout repository - uses: actions/checkout@v4 - # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. - - name: Log in to the Container registry - uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - # ghcr only allows lowercase repository names - - name: lowercase repo name - run: | - echo "IMAGE_NAME=${GITHUB_REPOSITORY,,}" >>${GITHUB_ENV} - - name: Build Docker image - uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 - with: - context: . - file: Containerfiles/Cinder-volume-netapp-Containerfile - push: false - tags: | - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/keystone-rxt:master-ubuntu_jammy - build-args: | - VERSION=master-ubuntu_jammy diff --git a/.github/workflows/smoke-glance.yml b/.github/workflows/smoke-glance.yml deleted file mode 100644 index 7d0567f6f..000000000 --- a/.github/workflows/smoke-glance.yml +++ /dev/null @@ -1,42 +0,0 @@ -# -name: Run build check for the Glance compatible image - -on: - pull_request: - paths: - - Containerfiles/Glance-Containerfile - -# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. -env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} - -# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. -jobs: - build-and-push-image: - runs-on: ubuntu-latest - # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. - permissions: - contents: read - packages: read - steps: - - name: Checkout repository - uses: actions/checkout@v4 - # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. - - name: Log in to the Container registry - uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Build Docker image - uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 - with: - context: . - file: Containerfiles/Glance-Containerfile - push: false - tags: | - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/glance:master-ubuntu_jammy - build-args: | - VERSION=master-ubuntu_jammy - PLUGIN_VERSION=master diff --git a/.github/workflows/smoke-heat-rxt.yml b/.github/workflows/smoke-heat-rxt.yml deleted file mode 100644 index f038661d0..000000000 --- a/.github/workflows/smoke-heat-rxt.yml +++ /dev/null @@ -1,41 +0,0 @@ -# -name: Run build check for the Heat image - -on: - pull_request: - paths: - - Containerfiles/HeatRXT-Containerfile - -# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. -env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} - -# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. -jobs: - build-and-push-image: - runs-on: ubuntu-latest - # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. - permissions: - contents: read - packages: read - steps: - - name: Checkout repository - uses: actions/checkout@v4 - # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. - - name: Log in to the Container registry - uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Build Docker image - uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 - with: - context: . - file: Containerfiles/HeatRXT-Containerfile - push: false - tags: | - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/heat:2024.1-ubuntu_jammy - build-args: | - VERSION=2024.1-ubuntu_jammy diff --git a/.github/workflows/smoke-horizon-rxt.yml b/.github/workflows/smoke-horizon-rxt.yml deleted file mode 100644 index 5c96b5072..000000000 --- a/.github/workflows/smoke-horizon-rxt.yml +++ /dev/null @@ -1,42 +0,0 @@ -# -name: Run build check for the Horizon RXT compatible image - -on: - pull_request: - paths: - - Containerfiles/HorizonRXT-Containerfile - -# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. -env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} - -# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. -jobs: - build-and-push-image: - runs-on: ubuntu-latest - # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. - permissions: - contents: read - packages: read - steps: - - name: Checkout repository - uses: actions/checkout@v4 - # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. - - name: Log in to the Container registry - uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Build Docker image - uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 - with: - context: . - file: Containerfiles/HorizonRXT-Containerfile - push: false - tags: | - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/horizon-rxt:master-ubuntu_jammy - build-args: | - VERSION=master-ubuntu_jammy - PLUGIN_VERSION=master diff --git a/.github/workflows/smoke-nova-uefi.yml b/.github/workflows/smoke-nova-uefi.yml deleted file mode 100644 index f9aa31d8d..000000000 --- a/.github/workflows/smoke-nova-uefi.yml +++ /dev/null @@ -1,42 +0,0 @@ -# -name: Run build check for the Nova EFI compatible image - -on: - pull_request: - paths: - - Containerfiles/NovaEFI-Containerfile - -# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. -env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} - -# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. -jobs: - build-and-push-image: - runs-on: ubuntu-latest - # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. - permissions: - contents: read - packages: read - steps: - - name: Checkout repository - uses: actions/checkout@v4 - # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. - - name: Log in to the Container registry - uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Build Docker image - uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 - with: - context: . - file: Containerfiles/NovaEFI-Containerfile - push: false - tags: | - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/nova-efi:master-ubuntu_jammy - build-args: | - VERSION=master-ubuntu_jammy - PLUGIN_VERSION=master diff --git a/.github/workflows/smoke-octavia-ovn.yml b/.github/workflows/smoke-octavia-ovn.yml deleted file mode 100644 index ffebf1def..000000000 --- a/.github/workflows/smoke-octavia-ovn.yml +++ /dev/null @@ -1,42 +0,0 @@ -# -name: Run build check for the Octavia OVN compatible image - -on: - pull_request: - paths: - - Containerfiles/OctaviaOVN-Containerfile - -# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. -env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} - -# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. -jobs: - build-and-push-image: - runs-on: ubuntu-latest - # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. - permissions: - contents: read - packages: read - steps: - - name: Checkout repository - uses: actions/checkout@v4 - # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. - - name: Log in to the Container registry - uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Build Docker image - uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 - with: - context: . - file: Containerfiles/OctaviaOVN-Containerfile - push: false - tags: | - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/octavia-ovn:master-ubuntu_jammy - build-args: | - VERSION=master-ubuntu_jammy - PLUGIN_VERSION=master diff --git a/.github/workflows/testing-deploy-openstack.yaml b/.github/workflows/testing-deploy-openstack.yaml deleted file mode 100644 index 60517178f..000000000 --- a/.github/workflows/testing-deploy-openstack.yaml +++ /dev/null @@ -1,46 +0,0 @@ -name: testing-openstack-deploy - -on: - workflow_run: - workflows: - - Migrate Images to QUAY - types: - - completed - workflow_dispatch: - -jobs: - deploy: - - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Set up Python environment - run: sudo apt-get update && sudo apt-get install -y python3-pip - - - name: Install requirements - run: pip3 install -r testing/requirements.txt - - - name: Create OpenStack config directory - run: mkdir -p ~/.config/openstack - - - name: Retrieve clouds.yaml from GitHub secrets - env: - CLOUDS_YAML: ${{ secrets.CLOUDS_YAML }} - run: | - echo "$CLOUDS_YAML" > ~/.config/openstack/clouds.yaml - - - name: Retrieve env.yaml from GitHub secrets - env: - ENV_YAML: ${{ secrets.ENV_YAML }} - run: | - echo "$ENV_YAML" > ~/env.yaml - - - name: Run deployment script - run: bash testing/doit.sh - - - name: Cleanup - if: always() - run: bash testing/cleanup.sh diff --git a/.original-images.json b/.original-images.json index 876901b0e..74abf9c4a 100644 --- a/.original-images.json +++ b/.original-images.json @@ -1,6 +1,6 @@ [ + "cr.fluentbit.io/fluent/fluent-bit:4.0.7", "docker.io/docker:17.07.0", - "docker.io/kolla/centos-source-openvswitch-vswitchd:master", "docker.io/kolla/ubuntu-source-nova-compute-ironic:master", "docker.io/library/postgres:14.5", "docker.io/library/postgres:14.5", @@ -11,13 +11,15 @@ "docker.io/openstackhelm/designate:2024.1-ubuntu_jammy", "docker.io/openstackhelm/glance:2024.1-ubuntu_jammy", "docker.io/openstackhelm/horizon:2023.1-ubuntu_jammy", + "docker.io/openstackhelm/ironic:2024.1-ubuntu_jammy", "docker.io/openstackhelm/magnum:2024.1-ubuntu_jammy", - "docker.io/openstackhelm/masakari:2024.1-ubuntu_jammy", "docker.io/openstackhelm/masakari-monitors:2024.1-ubuntu_jammy", + "docker.io/openstackhelm/masakari:2024.1-ubuntu_jammy", "docker.io/openstackhelm/neutron:2024.1-ubuntu_jammy", "docker.io/openstackhelm/osh-selenium:latest-ubuntu_jammy", "docker.io/openstackhelm/ospurge:latest", "docker.io/openstackhelm/placement:2024.1-ubuntu_jammy", + "docker.io/pbandark/barbican-exporter", "docker.io/rabbitmq:3.13-management", "docker.io/wrouesnel/postgres_exporter:v0.4.6", "docker.io/xrally/xrally-openstack:2.0.0", @@ -27,18 +29,22 @@ "ghcr.io/rackerlabs/genestack/glance:2024.1-ubuntu_jammy-1740121591", "ghcr.io/rackerlabs/genestack/gnocchi:2024.1-ubuntu_jammy-1738626728", "ghcr.io/rackerlabs/genestack/heat:2024.1-ubuntu_jammy-1738626724", + "ghcr.io/rackerlabs/genestack/magnum:2024.1-ubuntu_jammy-1742991496", "ghcr.io/rackerlabs/genestack/neutron-oslodb:2024.1-ubuntu_jammy-1738626982", "ghcr.io/rackerlabs/genestack/neutron-oslodb:2024.1-ubuntu_jammy-1739651767", "ghcr.io/rackerlabs/genestack/neutron-oslodb:2024.1-ubuntu_jammy-1742943886", + "ghcr.io/rackerlabs/genestack/neutron-oslodb:2024.1-ubuntu_jammy-1750715539", + "ghcr.io/rackerlabs/genestack/neutron-oslodb:2024.1-ubuntu_jammy-1750723622", + "ghcr.io/rackerlabs/genestack/neutron-oslodb:2024.1-ubuntu_jammy-1750797661", "ghcr.io/rackerlabs/genestack/nova-efi:2024.1-ubuntu_jammy-1737928811", + "ghcr.io/rackerlabs/genestack/nova-efi:2025.1-ubuntu_jammy-1750943616", "ghcr.io/rackerlabs/genestack/octavia-ovn:2024.1-ubuntu_jammy-1737651745", "ghcr.io/rackerlabs/keystone-rxt:2024.1-ubuntu_jammy-1747958291", - "ghcr.io/rackerlabs/keystone-rxt/shibd:1747958286", "ghcr.io/rackerlabs/skyline-rxt:master-ubuntu_jammy-1748595671", - "docker.io/openstackhelm/ironic:2024.1-ubuntu_jammy", "ghcr.io/vexxhost/netoffload:v1.0.1", "quay.io/airshipit/kubernetes-entrypoint:latest-ubuntu_jammy", "quay.io/airshipit/porthole-postgresql-utility:latest-ubuntu_bionic", - "cr.fluentbit.io/fluent/fluent-bit", - "docker.io/pbandark/barbican-exporter" + "quay.io/airshipit/freezer:2025.1-ubuntu_jammy", + "quay.io/airshipit/freezer-api:2025.1-ubuntu_jammy", + "quay.io/airshipit/blazar:2025.1-ubuntu_jammy" ] diff --git a/Containerfiles/Glance-Containerfile b/Containerfiles/Glance-Containerfile deleted file mode 100644 index a710b16b7..000000000 --- a/Containerfiles/Glance-Containerfile +++ /dev/null @@ -1,15 +0,0 @@ -# Patch oslo_db to help with deadlocks -ARG VERSION=master-ubuntu_jammy -FROM openstackhelm/glance:$VERSION as build -ARG PLUGIN_VERSION=master -RUN apt update && apt install -y git -RUN export ORIG_PLUGIN_VERSION="${PLUGIN_VERSION}"; \ -if [ "${PLUGIN_VERSION}" != 'master' ]; then export PLUGIN_VERSION=stable/${PLUGIN_VERSION}; fi; \ -. /var/lib/openstack/bin/activate; \ -/var/lib/openstack/bin/pip install boto3 os-brick \ - git+https://github.com/openstack/python-cinderclient@${PLUGIN_VERSION}#egg=python-cinderclient \ - git+https://github.com/openstack/oslo.db@${PLUGIN_VERSION}#egg=oslo_db \ - git+https://github.com/openstack/glance@${PLUGIN_VERSION}#egg=glance - -FROM openstackhelm/glance:${VERSION} -COPY --from=build /var/lib/openstack/. /var/lib/openstack/ diff --git a/Containerfiles/HeatRXT-Containerfile b/Containerfiles/HeatRXT-Containerfile deleted file mode 100644 index 3a4d47fe7..000000000 --- a/Containerfiles/HeatRXT-Containerfile +++ /dev/null @@ -1,13 +0,0 @@ -ARG VERSION=master-ubuntu_jammy -FROM openstackhelm/heat:${VERSION} as build -RUN apt-get update && apt-get install -y git && apt clean -RUN /var/lib/openstack/bin/pip install git+https://opendev.org/openstack/heat.git@stable/2024.1 -RUN /var/lib/openstack/bin/pip install --upgrade --force-reinstall pip -RUN find /var/lib/openstack -regex '^.*\(__pycache__\|\.py[co]\)$' -delete - -FROM openstackhelm/heat:${VERSION} -COPY --from=build /var/lib/openstack/. /var/lib/openstack/ -COPY Containerfiles/patches/heat_keystone_v3_patch.diff /tmp/heat_keystone_v3_patch.diff -RUN apt-get update && apt-get install -y git -RUN cd /var/lib/openstack/lib/python3.10/site-packages/ && git apply /tmp/heat_keystone_v3_patch.diff -RUN rm /tmp/heat_keystone_v3_patch.diff diff --git a/Containerfiles/HorizonRXT-Containerfile b/Containerfiles/HorizonRXT-Containerfile deleted file mode 100644 index 696b2c971..000000000 --- a/Containerfiles/HorizonRXT-Containerfile +++ /dev/null @@ -1,15 +0,0 @@ -ARG VERSION=master-ubuntu_jammy -FROM openstackhelm/horizon:${VERSION} as build -ARG PLUGIN_VERSION=master -RUN apt update && apt install -y git -RUN /var/lib/openstack/bin/pip install --upgrade --force-reinstall pip -RUN export ORIG_PLUGIN_VERSION="${PLUGIN_VERSION}"; \ - if [ "${PLUGIN_VERSION}" != 'master' ]; then export PLUGIN_VERSION=stable/${PLUGIN_VERSION}; fi; \ - . /var/lib/openstack/bin/activate; \ - /var/lib/openstack/bin/pip install --constraint=https://releases.openstack.org/constraints/upper/${ORIG_PLUGIN_VERSION} \ - git+https://opendev.org/openstack/heat-dashboard@${PLUGIN_VERSION}#egg=heat_dashboard \ - git+https://opendev.org/openstack/octavia-dashboard@${PLUGIN_VERSION}#egg=octavia_dashboard -RUN find /var/lib/openstack -regex '^.*\(__pycache__\|\.py[co]\)$' -delete - -FROM openstackhelm/horizon:${VERSION} -COPY --from=build /var/lib/openstack/. /var/lib/openstack/ diff --git a/Containerfiles/MagnumRXT-Containerfile b/Containerfiles/MagnumRXT-Containerfile deleted file mode 100644 index 5b6c78b36..000000000 --- a/Containerfiles/MagnumRXT-Containerfile +++ /dev/null @@ -1,14 +0,0 @@ -ARG VERSION=master-ubuntu_jammy -FROM openstackhelm/magnum:${VERSION} as build -ARG PLUGIN_VERSION=master -RUN apt-get update && apt-get install -y git && apt clean -RUN export ORIG_PLUGIN_VERSION="${PLUGIN_VERSION}"; \ -if [ "${PLUGIN_VERSION}" != 'master' ]; then export PLUGIN_VERSION=stable/${PLUGIN_VERSION}; fi; \ -/var/lib/openstack/bin/activate; \ -/var/lib/openstack/bin/pip install git+https://github.com/openstack/oslo.db@${PLUGIN_VERSION}#egg=oslo_db \ - git+https://opendev.org/openstack/magnum-capi-helm@${PLUGIN_VERSION}#egg=magnum_capi_helm -RUN /var/lib/openstack/bin/pip install --upgrade --force-reinstall pip -RUN find /var/lib/openstack -regex '^.*\(__pycache__\|\.py[co]\)$' -delete - -FROM openstackhelm/magnum:${VERSION} -COPY --from=build /var/lib/openstack/. /var/lib/openstack/ diff --git a/Containerfiles/Neutron-oslo_db-Containerfile b/Containerfiles/Neutron-oslo_db-Containerfile deleted file mode 100644 index 45a9647cd..000000000 --- a/Containerfiles/Neutron-oslo_db-Containerfile +++ /dev/null @@ -1,15 +0,0 @@ -# Patch oslo_db to help with deadlocks -ARG VERSION=master-ubuntu_jammy -FROM openstackhelm/neutron:$VERSION as build -ARG PLUGIN_VERSION=master -ARG NEUTRON_VERSION=master -RUN apt update && apt install -y git -RUN export ORIG_PLUGIN_VERSION="${PLUGIN_VERSION}"; \ -if [ "${PLUGIN_VERSION}" != 'master' ]; then export PLUGIN_VERSION=stable/${PLUGIN_VERSION}; fi; \ -. /var/lib/openstack/bin/activate; \ -/var/lib/openstack/bin/pip install --upgrade \ -git+https://github.com/openstack/oslo.db@${PLUGIN_VERSION}#egg=oslo_db \ -git+https://github.com/rackerlabs/neutron@${NEUTRON_VERSION}#egg=neutron - -FROM openstackhelm/neutron:${VERSION} -COPY --from=build /var/lib/openstack/. /var/lib/openstack/ diff --git a/Containerfiles/Nova-oslo_db-Containerfile b/Containerfiles/Nova-oslo_db-Containerfile deleted file mode 100644 index a335e8fa7..000000000 --- a/Containerfiles/Nova-oslo_db-Containerfile +++ /dev/null @@ -1,12 +0,0 @@ -# Patch oslo_db to help with deadlocks -ARG VERSION=master-ubuntu_jammy -FROM openstackhelm/nova:$VERSION as build -ARG PLUGIN_VERSION=master -RUN apt update && apt install -y git -RUN export ORIG_PLUGIN_VERSION="${PLUGIN_VERSION}"; \ -if [ "${PLUGIN_VERSION}" != 'master' ]; then export PLUGIN_VERSION=stable/${PLUGIN_VERSION}; fi; \ -. /var/lib/openstack/bin/activate; \ -/var/lib/openstack/bin/pip install git+https://github.com/openstack/oslo.db@${PLUGIN_VERSION}#egg=oslo_db - -FROM openstackhelm/nova:${VERSION} -COPY --from=build /var/lib/openstack/. /var/lib/openstack/ diff --git a/Containerfiles/NovaEFI-Containerfile b/Containerfiles/NovaEFI-Containerfile deleted file mode 100644 index 0c7ab5e2d..000000000 --- a/Containerfiles/NovaEFI-Containerfile +++ /dev/null @@ -1,22 +0,0 @@ -ARG VERSION=master-ubuntu_jammy -FROM openstackhelm/nova:$VERSION as build -ARG PLUGIN_VERSION=master -RUN apt update && apt install -y git -RUN export ORIG_PLUGIN_VERSION="${PLUGIN_VERSION}"; \ -if [ "${PLUGIN_VERSION}" != 'master' ]; then export PLUGIN_VERSION=stable/${PLUGIN_VERSION}; fi; \ -. /var/lib/openstack/bin/activate; \ -/var/lib/openstack/bin/pip install git+https://github.com/openstack/oslo.db@${PLUGIN_VERSION}#egg=oslo_db && \ -/var/lib/openstack/bin/pip install python-barbicanclient - -FROM openstackhelm/nova:${VERSION} -COPY --from=build /var/lib/openstack/. /var/lib/openstack/ -# Packages for the following features: -# - Nova: EFI -# - Nova: iSCSI -# Py Packages for the following features: -# - Nova: Libosinfo -RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y ovmf open-iscsi multipath-tools libgirepository-1.0-1 libgirepository1.0-dev \ - libcairo2-dev python3-dev gcc libosinfo-bin gir1.2-libosinfo-1.0 nfs-common cryptsetup nvme-cli; \ - rm -rf /var/cache/apt/archives /var/lib/apt/lists; \ - apt clean; /var/lib/openstack/bin/pip install pygobject; \ - find /var/lib/openstack -regex '^.*\(__pycache__\|\.py[co]\)$' -delete diff --git a/Containerfiles/OctaviaOVN-Containerfile b/Containerfiles/OctaviaOVN-Containerfile deleted file mode 100644 index c0d618a8c..000000000 --- a/Containerfiles/OctaviaOVN-Containerfile +++ /dev/null @@ -1,18 +0,0 @@ -ARG VERSION=master-ubuntu_jammy -FROM openstackhelm/octavia:$VERSION as build -ARG PLUGIN_VERSION=master -ARG OVN_PLUGIN_VERSION=master -RUN apt update && apt install -y git -RUN /var/lib/openstack/bin/pip install --index-url https://pypi.python.org/simple --upgrade pip -RUN if [ "${PLUGIN_VERSION}" != 'master' ]; then export PLUGIN_VERSION=stable/${PLUGIN_VERSION}; fi; \ -. /var/lib/openstack/bin/activate; \ -/var/lib/openstack/bin/pip install git+https://github.com/openstack/oslo.db@${PLUGIN_VERSION}#egg=oslo_db -RUN . /var/lib/openstack/bin/activate; \ -if [ "${OVN_PLUGIN_VERSION}" = 'master' ]; then \ -/var/lib/openstack/bin/pip install git+https://github.com/openstack/ovn-octavia-provider@${OVN_PLUGIN_VERSION}#egg=ovn_octavia_provider; \ -else \ -/var/lib/openstack/bin/pip install --index-url https://pypi.python.org/simple ovn-octavia-provider==${OVN_PLUGIN_VERSION}; \ -fi - -FROM openstackhelm/octavia:${VERSION} -COPY --from=build /var/lib/openstack/. /var/lib/openstack/ diff --git a/ansible/playbooks/deploy-cinder-netapp-volumes-reference.yaml b/ansible/playbooks/deploy-cinder-netapp-volumes-reference.yaml index 50cf5db3b..4c1414ac0 100644 --- a/ansible/playbooks/deploy-cinder-netapp-volumes-reference.yaml +++ b/ansible/playbooks/deploy-cinder-netapp-volumes-reference.yaml @@ -8,7 +8,13 @@ cinder_storage_network_interface: ansible_br_storage cinder_storage_network_interface_secondary: ansible_br_storage_secondary cinder_backend_name: "block-ha-performance-at-rest-encrypted,block-ha-standard-at-rest-encrypted,block-ha-performance-end-to-end-encrypted,block-ha-standard-end-to-end-encrypted" + virtualenv_path: "/opt/cinder" storage_network_multipath: false + enable_netapp_ssl: false + netapp_cert_src_dir: "/opt/genestack/ansible/playbooks/templates/" + netapp_cert_filenames: + - "ontap-cluster-host.crt" + - "ontap-vserver-host.crt" handlers: - name: Restart cinder-volume-netapp systemd services ansible.builtin.systemd: @@ -65,6 +71,44 @@ regexp: '^InitiatorName=.*|^GenerateName=.*' line: "InitiatorName={{ initiator_name }}" + - name: Copy NetApp client certificates Debian + ansible.builtin.copy: + src: "{{ netapp_cert_src_dir }}/{{ item }}" + dest: "/usr/local/share/ca-certificates/{{ item }}" + owner: root + group: root + mode: '0644' + when: + - enable_netapp_ssl | bool + - ansible_os_family | lower == "debian" + loop: "{{ netapp_cert_filenames }}" + + - name: Copy NetApp client certificates Redhat + ansible.builtin.copy: + src: "{{ netapp_cert_src_dir }}/{{ item }}" + dest: "/etc/pki/ca-trust/source/anchors/{{ item }}" + owner: root + group: root + mode: '0644' + when: + - enable_netapp_ssl | bool + - ansible_os_family | lower == "redhat" + loop: "{{ netapp_cert_filenames }}" + + - name: Update CA certificate trust Debian + ansible.builtin.command: + cmd: /usr/sbin/update-ca-certificates + when: + - enable_netapp_ssl | bool + - ansible_os_family | lower == "debian" + + - name: Update CA certificate trust Redhat + ansible.builtin.command: + cmd: /usr/sbin/update-ca-trust extract + when: + - enable_netapp_ssl | bool + - ansible_os_family | lower == "redhat" + - name: Upgrade pip and install required packages ansible.builtin.pip: name: @@ -73,9 +117,39 @@ - "git+https://github.com/openstack/cinder@stable/{{ cinder_release }}" - "git+https://github.com/rackerlabs/cinder-rxt.git" state: present - virtualenv: /opt/cinder + virtualenv: "{{ virtualenv_path }}" virtualenv_command: python3 -m venv + - name: "Get Python site-packages path from virtualenv" + command: "{{ virtualenv_path }}/bin/python -c 'import site; print(site.getsitepackages()[0])'" + register: venv_site + changed_when: false + + - name: "Normalize site-packages path" + set_fact: + venv_site_packages: "{{ venv_site.stdout | trim }}" + + - name: "Ensure site-packages exists" + file: + path: "{{ venv_site_packages }}" + state: directory + when: venv_site_packages != "" + + - name: Install eventlet SSL patch + ansible.builtin.copy: + src: "{{ item.src }}" + dest: "{{ item.dest }}" + owner: root + group: root + mode: "{{ item.mode | default('0644') }}" + loop: + - src: "{{ playbook_dir }}/templates/zzz_eventlet_ssl_patch.pth" + dest: "{{ venv_site_packages }}/zzz_eventlet_ssl_patch.pth" + mode: "0644" + - src: "{{ playbook_dir }}/templates/eventlet_ssl_patch.py" + dest: "{{ venv_site_packages }}/eventlet_ssl_patch.py" + mode: "0644" + - name: Create the cinder system user ansible.builtin.user: name: cinder diff --git a/ansible/playbooks/extra/custom_exporters/md_info_detail.sh b/ansible/playbooks/extra/custom_exporters/md_info_detail.sh index 5c21b4c5a..c85eb0461 100644 --- a/ansible/playbooks/extra/custom_exporters/md_info_detail.sh +++ b/ansible/playbooks/extra/custom_exporters/md_info_detail.sh @@ -79,7 +79,7 @@ for MD_DEVICE in /dev/md*; do if echo "$line" | grep -E -qv "^/|Array Size|Used Dev Size|Events|Update Time|Check Status|Rebuild Status" ; then echo -n ", " MDADM_DETAIL_KEY=$(echo "$line" | cut -d ":" -f 1 | tr -cd '[a-zA-Z0-9]._-') - MDADM_DETAIL_VALUE=$(echo "$line" | cut -d ":" -f 2- | sed 's:^ ::') + MDADM_DETAIL_VALUE=$(echo "$line" | cut -d ":" -f 2- | sed 's:^ ::' | sed 's: $::') echo -n "${MDADM_DETAIL_KEY}=\"${MDADM_DETAIL_VALUE}\"" fi fi diff --git a/ansible/playbooks/octavia-preconf-main.yaml b/ansible/playbooks/octavia-preconf-main.yaml index 618a1c7ba..e0bc5076d 100644 --- a/ansible/playbooks/octavia-preconf-main.yaml +++ b/ansible/playbooks/octavia-preconf-main.yaml @@ -14,6 +14,9 @@ octavia_os_identity_api_version: 3 octavia_os_auth_version: 3 octavia_nova_endpoint_type: "{{ octavia_os_endpoint_type }}" + octavia_helm_values_file: "{{ octavia_helm_file }}" + interface: "{{ interface }}" + endpoint_type: "{{ endpoint_type }}" environment: OS_ENDPOINT_TYPE: "{{ octavia_os_endpoint_type }}" OS_INTERFACE: "{{ octavia_os_interface}}" diff --git a/ansible/playbooks/templates/eventlet_ssl_patch.py b/ansible/playbooks/templates/eventlet_ssl_patch.py new file mode 100644 index 000000000..4b5f03098 --- /dev/null +++ b/ansible/playbooks/templates/eventlet_ssl_patch.py @@ -0,0 +1,13 @@ +import os, ssl + +os.environ.setdefault("EVENTLET_NO_GREENDNS", "yes") +try: + from eventlet.green import ssl as gssl + + def _safe_green_create_default_context(*a, **kw): + return ssl._create_default_https_context(*a, **kw) + + gssl.green_create_default_context = _safe_green_create_default_context +except Exception as e: + # don't crash the process if eventlet isn't here yet + pass diff --git a/ansible/playbooks/templates/zzz_eventlet_ssl_patch.pth b/ansible/playbooks/templates/zzz_eventlet_ssl_patch.pth new file mode 100644 index 000000000..2bc84d6c5 --- /dev/null +++ b/ansible/playbooks/templates/zzz_eventlet_ssl_patch.pth @@ -0,0 +1 @@ +import eventlet_ssl_patch diff --git a/ansible/roles/host_setup/files/queue_max.sh b/ansible/roles/host_setup/files/queue_max.sh index 3f593c64b..f341e3f1e 100644 --- a/ansible/roles/host_setup/files/queue_max.sh +++ b/ansible/roles/host_setup/files/queue_max.sh @@ -16,7 +16,7 @@ set -e function ethernetDevs () { # Returns all physical devices ip -details -json link show | jq -r '.[] | - if .linkinfo.info_kind // .link_type == "loopback" or (.ifname | test("idrac+")) then + if .linkinfo.info_kind // .link_type == "loopback" or (.ifname | test("idrac+")) or (.ifname | test("wlp+")) then empty else .ifname diff --git a/ansible/roles/host_setup/handlers/main.yml b/ansible/roles/host_setup/handlers/main.yml index 0c814b8c7..bcb81b0b1 100644 --- a/ansible/roles/host_setup/handlers/main.yml +++ b/ansible/roles/host_setup/handlers/main.yml @@ -65,3 +65,9 @@ ansible.builtin.apt: update_cache: yes cache_valid_time: 600 + +- name: Restart lldpd + ansible.builtin.systemd: + name: "lldpd.service" + state: "restarted" + enabled: true diff --git a/ansible/roles/host_setup/tasks/custom_multipath.yml b/ansible/roles/host_setup/tasks/custom_multipath.yml index 14c6aa96c..188225d3a 100644 --- a/ansible/roles/host_setup/tasks/custom_multipath.yml +++ b/ansible/roles/host_setup/tasks/custom_multipath.yml @@ -16,6 +16,7 @@ - name: Copy over multipath Round Robin configuration file when: - custom_multipath | default(false) | bool + - ('openstack_compute_nodes' in group_names) block: - name: Install Packages ansible.builtin.package: @@ -36,6 +37,8 @@ - name: Install open-iscsi and multipath on nova compute nodes when: - enable_iscsi | default(false) | bool + - custom_multipath | default(false) | bool + - ('openstack_compute_nodes' in group_names) block: - name: Install Packages ansible.builtin.package: @@ -61,3 +64,32 @@ notify: - Restart iscsid - Restart multipathd and multipath-tools service + +- name: Install open-iscsi on block nodes + when: + - enable_iscsi | default(false) | bool + - ('cinder_storage_nodes' in group_names) + block: + - name: Install Packages + ansible.builtin.package: + name: + - open-iscsi + state: "{{ iscsi_package_state | default('present') }}" + update_cache: true + - name: Determine initiator name + set_fact: + initiator_name: > + {% set _iqn = "iqn.2004-10.com." + ansible_distribution |lower() + ":" + ansible_hostname -%} + {% if ansible_iscsi_iqn is defined -%} + {% if (ansible_iscsi_iqn |length >= 15) -%} + {% set _iqn = ansible_iscsi_iqn -%} + {% endif -%} + {% endif -%} + {{ _iqn }} + - name: Set iscsi initiator name + ansible.builtin.lineinfile: + path: /etc/iscsi/initiatorname.iscsi + regexp: '^InitiatorName=.*|^GenerateName=.*' + line: "InitiatorName={{ initiator_name }}" + notify: + - Restart iscsid diff --git a/ansible/roles/host_setup/tasks/main.yml b/ansible/roles/host_setup/tasks/main.yml index 91fcafeef..1b54c5ee5 100644 --- a/ansible/roles/host_setup/tasks/main.yml +++ b/ansible/roles/host_setup/tasks/main.yml @@ -137,6 +137,17 @@ retries: 5 delay: 2 +# NOTE(cloudnull): This configuration will ensure that LLDP is working on all interfaces +# except our overlay and tenant networks. +- name: Create base LLDPD configuration + ansible.builtin.copy: + content: | + DAEMON_ARGS="-c -I *,!tap*,!ovn*,!genev*,!mirror*,!o-hm*" + dest: /etc/default/lldpd + mode: "0644" + notify: + - Restart lldpd + - name: Ensure timesyncd is running ansible.builtin.service: name: systemd-timesyncd diff --git a/ansible/roles/host_setup/tasks/raid_cli_tools.yml b/ansible/roles/host_setup/tasks/raid_cli_tools.yml index fdc4c082e..34dd68921 100644 --- a/ansible/roles/host_setup/tasks/raid_cli_tools.yml +++ b/ansible/roles/host_setup/tasks/raid_cli_tools.yml @@ -3,6 +3,10 @@ ansible.builtin.package_facts: manager: auto +- name: Load additional variables + include_vars: + file: "{{ role_path }}/vars/raid_cli_tools.yml" + - name: Install PERCCLI command line tool for DELL servers when: - ansible_system_vendor | lower == "dell inc." @@ -19,10 +23,13 @@ http_agent: Chrome/1337 validate_certs: false dest: "{{ dell_tools.download_path }}" + status_code: [200, 304] + mode: '0755' - name: Extract PERCCLI tar.gz ansible.builtin.unarchive: src: "{{ dell_tools.download_path }}" dest: "{{ dell_tools.tmp_dir }}" + remote_src: true - name: Install perccli APT when: ansible_os_family | lower == "debian" ansible.builtin.apt: @@ -64,27 +71,69 @@ - (ansible_system_vendor | lower == "hp" or ansible_system_vendor | lower == "hpe") - "'ssacli' not in ansible_facts.packages" block: + - name: Ensure keyring dir and staging dir exist + ansible.builtin.file: + path: "{{ item }}" + state: directory + mode: '0755' + loop: + - "{{ hp_tools.apt.gpg_keyring | dirname }}" + - "/usr/share/keyrings/hpe-keys.d" + become: true - name: Download HP tools apt keys ansible.builtin.uri: url: "{{ item.url }}" dest: "{{ item.download_file }}" - with_items: "{{ hp_tools.repo_keys }}" - when: - - hp_tools.repo_keys is defined + status_code: [200, 304] # accept Not Modified + mode: "0644" + loop: "{{ hp_tools.repo_keys }}" + when: hp_tools.repo_keys is defined register: download_keys_url - until: download_keys_url is success + changed_when: download_keys_url.status == 200 # 304 -> not changed retries: 2 delay: 4 - - name: Add HP tools apt keys to gpg + until: download_keys_url is success + - name: Remove existing consolidated HPE keyring (if corrupted) + ansible.builtin.file: + path: "{{ hp_tools.apt.gpg_keyring }}" + state: absent + become: true + - name: Ensure staging dir for ASCII keys exists + ansible.builtin.file: + path: "/tmp/hpe-keys.asc.d" + state: directory + mode: "0755" + - name: Stage downloaded ASCII keys + ansible.builtin.copy: + src: "{{ item.download_file }}" + dest: "/tmp/hpe-keys.asc.d/{{ item.url | basename }}" + remote_src: true + mode: "0644" + loop: "{{ hp_tools.repo_keys }}" + when: hp_tools.repo_keys is defined + - name: Concatenate ASCII keys + ansible.builtin.assemble: + src: "/tmp/hpe-keys.asc.d" + dest: "/tmp/hpe-keys.asc" + regexp: ".*\\.pub$" + mode: "0644" + - name: Build consolidated HPE keyring from ASCII bundle ansible.builtin.command: - cmd: "cat {{ item.download_file }} | gpg --dearmor | sudo tee -a {{ hp_tools.apt.gpg_keyring }} > /dev/null" - with_items: "{{ hp_tools.repo_keys }}" - when: - - hp_tools.repo_keys is defined - register: add_keys_url - until: add_keys_url is success - retries: 2 - delay: 2 + cmd: >- + gpg --batch --yes --dearmor + --output '{{ hp_tools.apt.gpg_keyring }}' + '/tmp/hpe-keys.asc' + register: dearmor_out + changed_when: dearmor_out.rc == 0 + become: true + - name: Remove ASCII staging files + ansible.builtin.file: + path: "/tmp/hpe-keys.asc.d" + state: absent + - name: Remove ASCII bundle + ansible.builtin.file: + path: "/tmp/hpe-keys.asc" + state: absent - name: Add HP tools MCP apt repositories ansible.builtin.apt_repository: repo: "{{ hp_tools.apt.deb_repo }}" diff --git a/ansible/roles/host_setup/vars/debian.yml b/ansible/roles/host_setup/vars/debian.yml index 7cf625a0c..e1adb329a 100644 --- a/ansible/roles/host_setup/vars/debian.yml +++ b/ansible/roles/host_setup/vars/debian.yml @@ -41,8 +41,8 @@ _host_distro_packages: - apt-utils - bridge-utils - cgroup-tools - - curl - cryptsetup + - curl - dmeventd - dstat - ebtables @@ -50,6 +50,8 @@ _host_distro_packages: - iptables - irqbalance - libkmod2 + - lldpd + - lsscsi - lvm2 - nfs-client - nvme-cli diff --git a/ansible/roles/host_setup/vars/raid-cli-tools.yml b/ansible/roles/host_setup/vars/raid-cli-tools.yml deleted file mode 100644 index ac8d87a39..000000000 --- a/ansible/roles/host_setup/vars/raid-cli-tools.yml +++ /dev/null @@ -1,33 +0,0 @@ ---- -hp_tools: - sdr_url: "https://downloads.linux.hpe.com/SDR" - repo_keys: - - {url: "{{hp_tools.sdr_url}}/hpPublicKey2048_key1.pub", download_file: "/tmp/hpPublicKey2048_key1.pub"} - - {url: "{{hp_tools.sdr_url}}/hpePublicKey2048_key1.pub", download_file: "/tmp/hpePublicKey2048_key1.pub"} - - {url: "{{hp_tools.sdr_url}}/hpePublicKey2048_key2.pub", download_file: "/tmp/hpePublicKey2048_key2.pub"} - apt: - mcp_version: "current" - gpg_keyring: "/usr/share/keyrings/hpePublicKey.gpg" - repo_str: "[signed-by={{hp_tools.apt.gpg_keyring}}] https://downloads.linux.hpe.com/SDR/repo/mcp" - deb_repo: "deb {{hp_tools.apt.repo_str}} {{ansible_lsb.codename}}/{{hp_tools.apt.mcp_version}} non-free" - deb_src_repo: "deb-src {{hp_tools.apt.repo_str}} {{ansible_lsb.codename}}/{{hp_tools.apt.mcp_version}} non-free" - state: "present" - repo_list_file: "mcp" - rpm: - url: "https://downloads.linux.hpe.com/repo/mcp" - dist: "{{ansible_distribution}}" - dist_ver: "{{ansible_distribution_version}}" - arch: "{{ansible_architecture}}" - filename: "mcp" - package: ssacli - -dell_tools: - file_tar_gz: "PERCCLI_7.2616.0_Linux.tar.gz" - url: "https://dl.dell.com/FOLDER11213122M/1/{{dell_tools.file_tar_gz}}" - tmp_dir: "/tmp/perccli" - download_path: "{{dell_tools.tmp_dir}}/{{dell_tools.file_tar_gz}}" - deb_file: "{{dell_tools.tmp_dir}}/perccli_007.2616.0000.0000_all.deb" - rpm_file: "{{dell_tools.tmp_dir}}/perccli-007.2616.0000.0000-1.noarch.rpm" - perccli_path: "/opt/MegaRAID/perccli/perccli64" - symlink_path: "/usr/sbin/perccli" - symlink_64_path: "/usr/sbin/perccli64" diff --git a/ansible/roles/host_setup/vars/raid_cli_tools.yml b/ansible/roles/host_setup/vars/raid_cli_tools.yml new file mode 100644 index 000000000..6a9f81bcc --- /dev/null +++ b/ansible/roles/host_setup/vars/raid_cli_tools.yml @@ -0,0 +1,47 @@ +--- +# --------- HPE / HP --------- +hpe_sdr_url: "https://downloads.linux.hpe.com/SDR" +hpe_gpg_keyring: "/usr/share/keyrings/hpePublicKey.gpg" +hpe_repo_base: "https://downloads.linux.hpe.com/SDR/repo/mcp" +hpe_mcp_version: "current" + +hpe_repo_str: "[signed-by={{ hpe_gpg_keyring }}] {{ hpe_repo_base }}" + +hpe_repo_keys: + - {url: "{{ hpe_sdr_url }}/hpPublicKey2048_key1.pub", download_file: "/tmp/hpPublicKey2048_key1.pub"} + - {url: "{{ hpe_sdr_url }}/hpePublicKey2048_key1.pub", download_file: "/tmp/hpePublicKey2048_key1.pub"} + - {url: "{{ hpe_sdr_url }}/hpePublicKey2048_key2.pub", download_file: "/tmp/hpePublicKey2048_key2.pub"} + +hp_tools: + sdr_url: "{{ hpe_sdr_url }}" + repo_keys: "{{ hpe_repo_keys }}" + apt: + mcp_version: "{{ hpe_mcp_version }}" + gpg_keyring: "{{ hpe_gpg_keyring }}" + repo_str: "{{ hpe_repo_str }}" + deb_repo: "deb {{ hpe_repo_str }} {{ ansible_lsb.codename }}/{{ hpe_mcp_version }} non-free" + deb_src_repo: "deb-src {{ hpe_repo_str }} {{ ansible_lsb.codename }}/{{ hpe_mcp_version }} non-free" + state: "present" + repo_list_file: "mcp" + rpm: + url: "https://downloads.linux.hpe.com/repo/mcp" + dist: "{{ ansible_distribution }}" + dist_ver: "{{ ansible_distribution_version }}" + arch: "{{ ansible_architecture }}" + filename: "mcp" + package: "ssacli" + +# --------- Dell --------- +dell_perc_file_tar_gz: "PERCCLI_7.2616.0_Linux.tar.gz" +dell_tmp_dir: "/tmp/perccli" + +dell_tools: + file_tar_gz: "{{ dell_perc_file_tar_gz }}" + url: "https://dl.dell.com/FOLDER11213122M/1/{{ dell_perc_file_tar_gz }}" + tmp_dir: "{{ dell_tmp_dir }}" + download_path: "{{ dell_tmp_dir }}/{{ dell_perc_file_tar_gz }}" + deb_file: "{{ dell_tmp_dir }}/perccli_007.2616.0000.0000_all.deb" + rpm_file: "{{ dell_tmp_dir }}/perccli-007.2616.0000.0000-1.noarch.rpm" + perccli_path: "/opt/MegaRAID/perccli/perccli64" + symlink_path: "/usr/sbin/perccli" + symlink_64_path: "/usr/sbin/perccli64" diff --git a/ansible/roles/host_setup/vars/ubuntu.yml b/ansible/roles/host_setup/vars/ubuntu.yml index 3e547cac9..429cc103c 100644 --- a/ansible/roles/host_setup/vars/ubuntu.yml +++ b/ansible/roles/host_setup/vars/ubuntu.yml @@ -49,6 +49,8 @@ _host_distro_packages: - iptables - irqbalance - libkmod2 + - lldpd + - lsscsi - lvm2 - nfs-client - nvme-cli diff --git a/ansible/roles/octavia_preconf/defaults/main.yml b/ansible/roles/octavia_preconf/defaults/main.yml index ff2e3b6bb..670b020a8 100644 --- a/ansible/roles/octavia_preconf/defaults/main.yml +++ b/ansible/roles/octavia_preconf/defaults/main.yml @@ -26,6 +26,7 @@ lb_mgmt_subnet_gateway: '172.16.29.1' amphora_icmp_enabled: true amphora_ssh_enabled: true lb_health_mgr_secgrp_name: "lb-health-mgr-secgroup" +lb_worker_secgrp_name: "lb-worker-secgroup" lb_mgmt_secgrp_name: "lb-mgmt-secgroup" # these are the defaults for the flavor, image and ssh keypair diff --git a/ansible/roles/octavia_preconf/files/create_worker_ports.sh b/ansible/roles/octavia_preconf/files/create_worker_ports.sh new file mode 100755 index 000000000..d4cb7149c --- /dev/null +++ b/ansible/roles/octavia_preconf/files/create_worker_ports.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# We need to create the ports with shell scripts +# the ansible module currently doesn't provide +# --host argument + +set -xe + +# Obtain the network_id and secgroup_id from and +# cloud name from ansible task +NET_ID=$1 +SECGRP_ID=$2 +CLOUD_NAME=$3 + +export OS_CLOUD=$CLOUD_NAME + +# Obtain the list of kubernetes nodes with +# "openstack-control-plane=enabled" label +CONTROLLER_IP_PORT_LIST=() +CTRLS=$(kubectl get nodes -l openstack-control-plane=enabled -o name | awk -F"/" '{print $2}') +for node in $CTRLS +do + node_short=$(echo "$node" | awk -F"." '{print $1}') + PORTNAME=octavia-worker-port-$node_short + if ! PORT_DATA=$(openstack port show "$PORTNAME" -c fixed_ips -f json); then + PORT_DATA=$(openstack port create "$PORTNAME" --security-group "$SECGRP_ID" \ + --device-owner Octavia:worker \ + --host="$node" \ + --network "$NET_ID" \ + -c fixed_ips \ + -f json) + fi + for IP in $(echo "$PORT_DATA" | awk 'BEGIN { FS = "\"" } /ip_address/ { print $(NF - 1) }'); do + CONTROLLER_IP_PORT_LIST+=("$IP:5555") + done +done + +readarray -t sorted < <(for item in "${CONTROLLER_IP_PORT_LIST[@]}"; do echo "${item}"; done | sort) +echo $(IFS=,; echo "${sorted[*]}") > /tmp/octavia_worker_controller_ip_port_list diff --git a/ansible/roles/octavia_preconf/tasks/main.yml b/ansible/roles/octavia_preconf/tasks/main.yml index 05ba0195f..afe6911d3 100644 --- a/ansible/roles/octavia_preconf/tasks/main.yml +++ b/ansible/roles/octavia_preconf/tasks/main.yml @@ -21,6 +21,11 @@ tags: - always +- name: import tasks to create worker ports + import_tasks: octavia_worker_ports.yml + tags: + - always + - name: import tasks to create amphora image, flavor and ssh keypair import_tasks: octavia_amphora_keypair_image_flavor.yml tags: diff --git a/ansible/roles/octavia_preconf/tasks/octavia_amphora_keypair_image_flavor.yml b/ansible/roles/octavia_preconf/tasks/octavia_amphora_keypair_image_flavor.yml index 2dfcdaa14..7ade3dc41 100644 --- a/ansible/roles/octavia_preconf/tasks/octavia_amphora_keypair_image_flavor.yml +++ b/ansible/roles/octavia_preconf/tasks/octavia_amphora_keypair_image_flavor.yml @@ -49,10 +49,10 @@ filename: /tmp/test-only-amphora-x64-haproxy-ubuntu-{{ amphora_image_version }}.qcow2 container_format: bare disk_format: qcow2 - visibility: public + visibility: private protected: true tags: - - amphora + - amphora interface: public register: push_amphora_image until: push_amphora_image is success diff --git a/ansible/roles/octavia_preconf/tasks/octavia_lb_net_setup.yml b/ansible/roles/octavia_preconf/tasks/octavia_lb_net_setup.yml index 679cdf46e..e1313b44f 100644 --- a/ansible/roles/octavia_preconf/tasks/octavia_lb_net_setup.yml +++ b/ansible/roles/octavia_preconf/tasks/octavia_lb_net_setup.yml @@ -30,3 +30,21 @@ until: create_lb_mgmt_subnet is success retries: 5 delay: 5 + +# Bit of a hack to ensure gateway is disabled until ansible-playbook releases the fix.. +# disabling the gateway works on 'updates' as-is but not on creates yet.. +- name: Update subnet for lb-mgmt-net + openstack.cloud.subnet: + name: lb-mgmt-subnet + state: present + enable_dhcp: true + cidr: "{{ lb_mgmt_subnet_cidr }}" + allocation_pool_start: "{{ lb_mgmt_subnet_pool_start }}" + allocation_pool_end: "{{ lb_mgmt_subnet_pool_end }}" + disable_gateway_ip: true + network_name: lb-mgmt-net + interface: public + register: create_lb_mgmt_subnet + until: create_lb_mgmt_subnet is success + retries: 5 + delay: 5 diff --git a/ansible/roles/octavia_preconf/tasks/octavia_sec_group.yml b/ansible/roles/octavia_preconf/tasks/octavia_sec_group.yml index d7180176f..3ed5c0070 100644 --- a/ansible/roles/octavia_preconf/tasks/octavia_sec_group.yml +++ b/ansible/roles/octavia_preconf/tasks/octavia_sec_group.yml @@ -81,3 +81,27 @@ until: lb_health_mgr_secgroup_r1 is success retries: 5 delay: 5 + +- name: Create security group for worker ports + openstack.cloud.security_group: + name: "{{ lb_worker_secgrp_name }}" + state: present + description: "security group for worker ports" + interface: public + register: create_lb_worker_secgroup + until: create_lb_worker_secgroup is success + retries: 5 + delay: 5 + +- name: Create Security group rule to allow traffic on port 5555 for worker + openstack.cloud.security_group_rule: + security_group: "{{ create_lb_worker_secgroup.security_group.id }}" + state: present + protocol: udp + port_range_min: 5555 + port_range_max: 5555 + interface: public + register: lb_worker_secgroup_r1 + until: lb_worker_secgroup_r1 is success + retries: 5 + delay: 5 diff --git a/ansible/roles/octavia_preconf/tasks/octavia_worker_ports.yml b/ansible/roles/octavia_preconf/tasks/octavia_worker_ports.yml new file mode 100644 index 000000000..96d92524f --- /dev/null +++ b/ansible/roles/octavia_preconf/tasks/octavia_worker_ports.yml @@ -0,0 +1,22 @@ +--- +# These are the tasks for creating health_mgr +# ports for octavia; the ports are created with +# a shell script as the ansible modules currently +# don't support all the required params for creating +# ports +- name: Obtain the UUID of the lb-mgmt-net + openstack.cloud.networks_info: + name: lb-mgmt-net + interface: public + register: lb_mgmt_info + +- name: Obtain the UUID of the worker secgroup + openstack.cloud.security_group_info: + name: "{{ lb_worker_secgrp_name }}" + interface: public + register: lb_worker_secgrp_info + +- name: run the shell script to create worker ports if required + script: + cmd: create_worker_ports.sh {{ lb_mgmt_info.networks[0].id }} {{ lb_worker_secgrp_info.security_groups[0].id }} {{ lookup('env', 'OS_CLOUD') | default('openstack_helm') }} + creates: /tmp/octavia_worker_controller_ip_port_list diff --git a/base-helm-configs/barbican/barbican-helm-overrides.yaml b/base-helm-configs/barbican/barbican-helm-overrides.yaml index 543ef7074..5b7346e29 100644 --- a/base-helm-configs/barbican/barbican-helm-overrides.yaml +++ b/base-helm-configs/barbican/barbican-helm-overrides.yaml @@ -1,18 +1,18 @@ --- images: tags: - barbican_api: "quay.io/rackspace/rackerlabs-barbican:2024.1-ubuntu_jammy" - barbican_db_sync: "quay.io/rackspace/rackerlabs-barbican:2024.1-ubuntu_jammy" - bootstrap: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - db_drop: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - db_init: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - dep_check: "quay.io/rackspace/rackerlabs-kubernetes-entrypoint:latest-ubuntu_jammy" - image_repo_sync: "quay.io/rackspace/rackerlabs-docker:17.07.0" - ks_endpoints: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_service: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_user: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - rabbit_init: "quay.io/rackspace/rackerlabs-rabbitmq:3.13-management" - scripted_test: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" + barbican_api: "ghcr.io/rackerlabs/genestack-images/barbican:2024.1-latest" + barbican_db_sync: "ghcr.io/rackerlabs/genestack-images/barbican:2024.1-latest" + bootstrap: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + db_drop: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + db_init: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + dep_check: "ghcr.io/rackerlabs/genestack-images/kubernetes-entrypoint:latest" + image_repo_sync: null + ks_endpoints: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_service: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_user: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + rabbit_init: null + scripted_test: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" # NOTE: (brew) CPU requests values based on a three node # hyperconverged lab (/scripts/hyperconverged-lab.sh). @@ -25,6 +25,14 @@ pod: memory: "256Mi" cpu: "100m" limits: {} + lifecycle: + upgrades: + deployments: + revision_history: 3 + pod_replacement_strategy: RollingUpdate + rolling_update: + max_unavailable: 20% + max_surge: 3 dependencies: static: diff --git a/base-helm-configs/ceilometer/ceilometer-helm-overrides.yaml b/base-helm-configs/ceilometer/ceilometer-helm-overrides.yaml index 34a66a2cf..8808dbd6f 100644 --- a/base-helm-configs/ceilometer/ceilometer-helm-overrides.yaml +++ b/base-helm-configs/ceilometer/ceilometer-helm-overrides.yaml @@ -1,17 +1,17 @@ --- images: tags: - test: "quay.io/rackspace/rackerlabs-xrally-openstack:2.0.0" + test: null ceilometer_db_sync: "quay.io/rackspace/rackerlabs-ceilometer:2024.1-ubuntu_jammy" - rabbit_init: "quay.io/rackspace/rackerlabs-rabbitmq:3.13-management" - ks_user: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_service: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" + rabbit_init: null + ks_user: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_service: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" ceilometer_central: "quay.io/rackspace/rackerlabs-ceilometer:2024.1-ubuntu_jammy" ceilometer_compute: "quay.io/rackspace/rackerlabs-ceilometer:2024.1-ubuntu_jammy" ceilometer_ipmi: "quay.io/rackspace/rackerlabs-ceilometer:2024.1-ubuntu_jammy" ceilometer_notification: "quay.io/rackspace/rackerlabs-ceilometer:2024.1-ubuntu_jammy" - dep_check: "quay.io/rackspace/rackerlabs-kubernetes-entrypoint:latest-ubuntu_jammy" - image_repo_sync: "quay.io/rackspace/rackerlabs-docker:17.07.0" + dep_check: "ghcr.io/rackerlabs/genestack-images/kubernetes-entrypoint:latest" + image_repo_sync: null pull_policy: "Always" conf: @@ -504,7 +504,9 @@ conf: name: fields: payload.router.name resource_id: - fields: ["payload.router.id", "payload.id"] + fields: payload.router.id + event_type: + fields: event_type - event_type: floatingip.* traits: <<: *network_traits @@ -1051,9 +1053,13 @@ conf: ip.floating: event_create: - floatingip.create.end - event_delete: floatingip.delete.end + - router.create.end + event_delete: + - floatingip.delete.end + - router.delete.end event_update: - floatingip.update.end + - router.update.end event_attributes: id: resource_id user_id: user_id @@ -1691,7 +1697,6 @@ dependencies: central: jobs: - ceilometer-db-sync - - ceilometer-rabbit-init - ceilometer-ks-user services: - endpoint: internal @@ -1701,7 +1706,6 @@ dependencies: ipmi: jobs: - ceilometer-db-sync - - ceilometer-rabbit-init - ceilometer-ks-user services: - endpoint: internal @@ -1711,7 +1715,6 @@ dependencies: compute: jobs: - ceilometer-db-sync - - ceilometer-rabbit-init - ceilometer-ks-user services: - endpoint: internal @@ -1724,7 +1727,6 @@ dependencies: notification: jobs: - ceilometer-db-sync - - ceilometer-rabbit-init - ceilometer-ks-user services: - endpoint: internal @@ -1776,34 +1778,13 @@ endpoints: pod: resources: enabled: true - compute: - requests: - memory: {} - cpu: {} - limits: - memory: {} - cpu: {} notification: requests: - memory: {} - cpu: {} + memory: 256 + cpu: 500 limits: - memory: {} - cpu: {} - central: - requests: - memory: {} - cpu: {} - limits: - memory: {} - cpu: {} - ipmi: - requests: - memory: {} - cpu: {} - limits: - memory: {} - cpu: {} + memory: 2Gi + cpu: "2000m" replicas: central: 1 notification: 1 @@ -1813,7 +1794,7 @@ pod: revision_history: 3 pod_replacement_strategy: RollingUpdate rolling_update: - max_unavailable: 1 + max_unavailable: 20% max_surge: 3 daemonsets: pod_replacement_strategy: RollingUpdate @@ -1821,12 +1802,19 @@ pod: enabled: true min_ready_seconds: 0 max_unavailable: 20% + disruption_budget: + api: + min_available: 0 + termination_grace_period: + api: + timeout: 600 manifests: deployment_api: false deployment_collector: false ingress_api: false # using gnocchi so no db init + job_rabbit_init: false job_db_init: false job_db_init_mongodb: false job_ks_endpoints: false diff --git a/base-helm-configs/cinder/cinder-helm-overrides.yaml b/base-helm-configs/cinder/cinder-helm-overrides.yaml index 3b9df2bdc..b1d3c70a7 100644 --- a/base-helm-configs/cinder/cinder-helm-overrides.yaml +++ b/base-helm-configs/cinder/cinder-helm-overrides.yaml @@ -7,29 +7,43 @@ labels: images: tags: - bootstrap: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - cinder_api: "quay.io/rackspace/rackerlabs-cinder:2024.1-ubuntu_jammy" - cinder_backup: "quay.io/rackspace/rackerlabs-cinder:2024.1-ubuntu_jammy" + bootstrap: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + cinder_api: "ghcr.io/rackerlabs/genestack-images/cinder:2024.1-latest" + cinder_backup: "ghcr.io/rackerlabs/genestack-images/cinder:2024.1-latest" cinder_backup_storage_init: "quay.io/rackspace/rackerlabs-ceph-config-helper:latest-ubuntu_jammy" - cinder_db_sync: "quay.io/rackspace/rackerlabs-cinder:2024.1-ubuntu_jammy" - cinder_scheduler: "quay.io/rackspace/rackerlabs-cinder:2024.1-ubuntu_jammy" + cinder_db_sync: "ghcr.io/rackerlabs/genestack-images/cinder:2024.1-latest" + cinder_scheduler: "ghcr.io/rackerlabs/genestack-images/cinder:2024.1-latest" cinder_storage_init: "quay.io/rackspace/rackerlabs-ceph-config-helper:latest-ubuntu_jammy" - cinder_volume: "quay.io/rackspace/rackerlabs-cinder:2024.1-ubuntu_jammy" - cinder_volume_usage_audit: "quay.io/rackspace/rackerlabs-cinder:2024.1-ubuntu_jammy" - db_drop: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - db_init: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - dep_check: "quay.io/rackspace/rackerlabs-kubernetes-entrypoint:latest-ubuntu_jammy" - image_repo_sync: "quay.io/rackspace/rackerlabs-docker:17.07.0" - ks_endpoints: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_service: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_user: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - rabbit_init: "quay.io/rackspace/rackerlabs-rabbitmq:3.13-management" - test: "quay.io/rackspace/rackerlabs-xrally-openstack:2.0.0" + cinder_volume: "ghcr.io/rackerlabs/genestack-images/cinder:2024.1-latest" + cinder_volume_usage_audit: "ghcr.io/rackerlabs/genestack-images/cinder:2024.1-latest" + db_drop: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + db_init: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + dep_check: "ghcr.io/rackerlabs/genestack-images/kubernetes-entrypoint:latest" + image_repo_sync: null + ks_endpoints: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_service: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_user: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + rabbit_init: null + test: null # NOTE: (brew) requests cpu/mem values based on a three node # hyperconverged lab (/scripts/hyperconverged-lab.sh). # limit values based on defaults from the openstack-helm charts unless defined pod: + lifecycle: + upgrades: + deployments: + revision_history: 3 + pod_replacement_strategy: RollingUpdate + rolling_update: + max_unavailable: 20% + max_surge: 3 + disruption_budget: + api: + min_available: 0 + termination_grace_period: + api: + timeout: 60 resources: enabled: true api: @@ -89,6 +103,8 @@ conf: volume_clear: zero volume_driver: cinder_rxt.rackspace.RXTLVM volume_group: cinder-volumes-1 + policy: + "volume_extension:types_extra_specs:read_sensitive": "rule:xena_system_admin_or_project_reader" cinder: DEFAULT: allow_availability_zone_fallback: true diff --git a/base-helm-configs/designate/designate-helm-overrides.yaml b/base-helm-configs/designate/designate-helm-overrides.yaml index f7bfd57b5..8ca01d795 100644 --- a/base-helm-configs/designate/designate-helm-overrides.yaml +++ b/base-helm-configs/designate/designate-helm-overrides.yaml @@ -43,22 +43,22 @@ labels: images: tags: - bootstrap: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - db_init: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - db_drop: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - rabbit_init: "quay.io/rackspace/rackerlabs-rabbitmq:3.13-management" - ks_user: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_service: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_endpoints: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - dep_check: "quay.io/rackspace/rackerlabs-kubernetes-entrypoint:latest-ubuntu_jammy" - designate_db_sync: "quay.io/rackspace/rackerlabs-designate:2024.1-ubuntu_jammy" - designate_api: "quay.io/rackspace/rackerlabs-designate:2024.1-ubuntu_jammy" - designate_central: "quay.io/rackspace/rackerlabs-designate:2024.1-ubuntu_jammy" - designate_mdns: "quay.io/rackspace/rackerlabs-designate:2024.1-ubuntu_jammy" - designate_worker: "quay.io/rackspace/rackerlabs-designate:2024.1-ubuntu_jammy" - designate_producer: "quay.io/rackspace/rackerlabs-designate:2024.1-ubuntu_jammy" - designate_sink: "quay.io/rackspace/rackerlabs-designate:2024.1-ubuntu_jammy" - image_repo_sync: "quay.io/rackspace/rackerlabs-docker:17.07.0" + bootstrap: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + db_init: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + db_drop: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + rabbit_init: null + ks_user: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_service: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_endpoints: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + dep_check: "ghcr.io/rackerlabs/genestack-images/kubernetes-entrypoint:latest" + designate_db_sync: "ghcr.io/rackerlabs/genestack-images/designate:2024.1-latest" + designate_api: "ghcr.io/rackerlabs/genestack-images/designate:2024.1-latest" + designate_central: "ghcr.io/rackerlabs/genestack-images/designate:2024.1-latest" + designate_mdns: "ghcr.io/rackerlabs/genestack-images/designate:2024.1-latest" + designate_worker: "ghcr.io/rackerlabs/genestack-images/designate:2024.1-latest" + designate_producer: "ghcr.io/rackerlabs/genestack-images/designate:2024.1-latest" + designate_sink: "ghcr.io/rackerlabs/genestack-images/designate:2024.1-latest" + image_repo_sync: null pull_policy: "IfNotPresent" local_registry: active: false @@ -121,7 +121,7 @@ pod: revision_history: 3 pod_replacement_strategy: RollingUpdate rolling_update: - max_unavailable: 1 + max_unavailable: 20% max_surge: 3 disruption_budget: api: @@ -138,9 +138,9 @@ pod: min_available: 0 termination_grace_period: api: - timeout: 30 + timeout: 60 mdns: - timeout: 30 + timeout: 60 resources: enabled: true api: diff --git a/base-helm-configs/glance/glance-helm-overrides.yaml b/base-helm-configs/glance/glance-helm-overrides.yaml index 7c09c62ed..43314bbc7 100644 --- a/base-helm-configs/glance/glance-helm-overrides.yaml +++ b/base-helm-configs/glance/glance-helm-overrides.yaml @@ -4,21 +4,21 @@ storage: pvc images: tags: - test: "quay.io/rackspace/rackerlabs-xrally-openstack:2.0.0" + test: null glance_storage_init: "quay.io/rackspace/rackerlabs-ceph-config-helper:latest-ubuntu_jammy" - glance_metadefs_load: "quay.io/rackspace/rackerlabs-glance:2024.1-ubuntu_jammy" - db_init: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - glance_db_sync: "quay.io/rackspace/rackerlabs-glance:2024.1-ubuntu_jammy" - db_drop: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_user: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_service: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_endpoints: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - rabbit_init: "quay.io/rackspace/rackerlabs-rabbitmq:3.13-management" - glance_api: "quay.io/rackspace/rackerlabs-glance:2024.1-ubuntu_jammy" + glance_metadefs_load: "ghcr.io/rackerlabs/genestack-images/glance:2024.1-latest" + db_init: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + glance_db_sync: "ghcr.io/rackerlabs/genestack-images/glance:2024.1-latest" + db_drop: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_user: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_service: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_endpoints: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + rabbit_init: null + glance_api: "ghcr.io/rackerlabs/genestack-images/glance:2024.1-latest" # Bootstrap image requires curl - bootstrap: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - dep_check: "quay.io/rackspace/rackerlabs-kubernetes-entrypoint:latest-ubuntu_jammy" - image_repo_sync: "quay.io/rackspace/rackerlabs-docker:17.07.0" + bootstrap: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + dep_check: "ghcr.io/rackerlabs/genestack-images/kubernetes-entrypoint:latest" + image_repo_sync: null bootstrap: enabled: true @@ -255,14 +255,14 @@ pod: revision_history: 3 pod_replacement_strategy: RollingUpdate rolling_update: - max_unavailable: 1 + max_unavailable: 20% max_surge: 3 disruption_budget: api: min_available: 0 termination_grace_period: api: - timeout: 30 + timeout: 60 probes: api: glance-api: diff --git a/base-helm-configs/gnocchi/gnocchi-helm-overrides.yaml b/base-helm-configs/gnocchi/gnocchi-helm-overrides.yaml index d3adf7d9c..1b413091e 100644 --- a/base-helm-configs/gnocchi/gnocchi-helm-overrides.yaml +++ b/base-helm-configs/gnocchi/gnocchi-helm-overrides.yaml @@ -4,16 +4,16 @@ images: db_init: "quay.io/rackspace/rackerlabs-gnocchi:2024.1-ubuntu_jammy" db_init_indexer: "quay.io/rackspace/rackerlabs-postgres:14.5" db_sync: "quay.io/rackspace/rackerlabs-gnocchi:2024.1-ubuntu_jammy" - dep_check: "quay.io/rackspace/rackerlabs-kubernetes-entrypoint:latest-ubuntu_jammy" + dep_check: "ghcr.io/rackerlabs/genestack-images/kubernetes-entrypoint:latest" gnocchi_api: "quay.io/rackspace/rackerlabs-gnocchi:2024.1-ubuntu_jammy" gnocchi_metricd: "quay.io/rackspace/rackerlabs-gnocchi:2024.1-ubuntu_jammy" gnocchi_resources_cleaner: "quay.io/rackspace/rackerlabs-gnocchi:2024.1-ubuntu_jammy" gnocchi_statsd: "quay.io/rackspace/rackerlabs-gnocchi:2024.1-ubuntu_jammy" gnocchi_storage_init: "quay.io/rackspace/rackerlabs-ceph-config-helper:latest-ubuntu_jammy" - image_repo_sync: "quay.io/rackspace/rackerlabs-docker:17.07.0" - ks_endpoints: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_service: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_user: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" + image_repo_sync: null + ks_endpoints: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_service: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_user: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" ceph_client: user_secret_name: gnocchi-temp-keyring @@ -45,7 +45,7 @@ conf: gnocchi_api_wsgi: wsgi: processes: 2 - threads: 4 + threads: 1 paste: "app:gnocchiv1": paste.app_factory: "gnocchi.rest.app:app_factory" @@ -100,14 +100,28 @@ pod: limits: {} lifecycle: upgrades: + deployments: + revision_history: 3 + pod_replacement_strategy: RollingUpdate + rolling_update: + max_unavailable: 20% + max_surge: 3 daemonsets: + pod_replacement_strategy: RollingUpdate metricd: - enabled: true + enabled: false + min_ready_seconds: 0 max_unavailable: 20% - pod_replacement_strategy: RollingUpdate statsd: - enabled: true + enabled: false + min_ready_seconds: 0 max_unavailable: 20% + disruption_budget: + api: + min_available: 0 + termination_grace_period: + api: + timeout: 60 endpoints: fluentd: diff --git a/base-helm-configs/grafana/grafana-helm-overrides.yaml b/base-helm-configs/grafana/grafana-helm-overrides.yaml index df672ec91..6662ba0fa 100644 --- a/base-helm-configs/grafana/grafana-helm-overrides.yaml +++ b/base-helm-configs/grafana/grafana-helm-overrides.yaml @@ -1,16 +1,26 @@ --- + +# Set the custom_host variable to the desired hostname for Grafana +# This is used to set the domain and root_url in the grafana.ini file +# and the value of the custom_host variable must be a valid domain. custom_host: grafana.example.com + persistence: type: pvc enabled: true + storageClassName: general accessModes: - ReadWriteMany + nodeSelector: openstack-control-plane: enabled + ingress: enabled: false + image: - tag: 10.3.3 + tag: "10.3.3" + grafana.ini: paths: data: /var/lib/grafana/ @@ -24,14 +34,15 @@ grafana.ini: grafana_net: url: https://grafana.net server: - domain: "{{ .Values.custom_host }}" - root_url: https://{{ .Values.custom_host }} + domain: "{{ .Values.custom_host }}" # Ref: custom_host variable above + root_url: "https://{{ .Values.custom_host }}" # Ref: custom_host variable above database: type: mysql host: mariadb-cluster.grafana.svc:3306 user: $__file{/etc/secrets/grafana-db/username} password: $__file{/etc/secrets/grafana-db/password} name: grafana + datasources: datasources.yaml: apiversion: 1 @@ -46,6 +57,7 @@ datasources: access: proxy url: http://loki-gateway.{{ $.Release.Namespace }}.svc.cluster.local:80 editable: false + alerting: rules.yaml: groups: @@ -106,13 +118,66 @@ alerting: notifications: - uid: prom-alertmanager-notification annotations: - description: Checks app=ovs (ovs-ovn) pod logs for lines with string + description: >- + Checks app=ovs (ovs-ovn) pod logs for lines with string 'binding|INFO|cr-lrp' - summary: This alerts on rapid port claims for cr-lrp ports on OVN gateway nodes, - which overloads the OVN south database and interferes with the - function of the affected ports. + summary: >- + This alerts on rapid port claims for cr-lrp ports on OVN + gateway nodes, which overloads the OVN south database and + interferes with the function of the affected ports. labels: {} isPaused: false + # Generated UUID using 'uuidgen' + - uid: c14dd8fd-54ec-4e15-9813-e02cc3269899 + title: Neutron IPAM Duplicate Entry Error + condition: B + data: + - refId: A + queryType: instant + relativeTimeRange: + from: 60 + to: 0 + # Using same loki datasource as rule#ba943125-33ca-4e4e-85f8-13359a8e4d65 + datasourceUid: P8E80F9AEF21F6940 + model: + expr: rate({app="fluentbit"} |= `Duplicate entry|ERROR` [1m]) + queryType: instant + refId: A + - refId: B + relativeTimeRange: + # Past 60 seconds (can be adjusted further) + from: 60 + # 0 denotes till current time + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 1 + - 0 + type: gt + operator: + type: and + reducer: + type: avg + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: A + refId: B + type: threshold + noDataState: OK + execErrState: Error + for: 0s + notifications: + - uid: prom-alertmanager-notification + annotations: + summary: > + Checks for log lines containing 'Duplicate entry|ERROR' in nova logs. + isPaused: false contactpoints.yaml: secret: apiVersion: 1 @@ -134,11 +199,13 @@ alerting: group_wait: 1s group_interval: 1s repeat_interval: 1s + plugins: - camptocamp-prometheus-alertmanager-datasource + extraSecretMounts: - name: grafana-db-secret-mount secretName: grafana-db - defaultMode: 440 + defaultMode: 0440 mountPath: /etc/secrets/grafana-db readOnly: true diff --git a/base-helm-configs/heat/heat-helm-overrides.yaml b/base-helm-configs/heat/heat-helm-overrides.yaml index 31c5d1f31..0fd78df91 100644 --- a/base-helm-configs/heat/heat-helm-overrides.yaml +++ b/base-helm-configs/heat/heat-helm-overrides.yaml @@ -1,28 +1,52 @@ --- images: tags: - bootstrap: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - db_drop: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - db_init: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - dep_check: "quay.io/rackspace/rackerlabs-kubernetes-entrypoint:latest-ubuntu_jammy" - heat_api: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - heat_cfn: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - heat_cloudwatch: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - heat_db_sync: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - heat_engine: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - heat_engine_cleaner: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - heat_purge_deleted: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - image_repo_sync: "quay.io/rackspace/rackerlabs-docker:17.07.0" - ks_endpoints: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_service: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_user: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - rabbit_init: "quay.io/rackspace/rackerlabs-rabbitmq:3.13-management" - test: "quay.io/rackspace/rackerlabs-xrally-openstack:2.0.0" + bootstrap: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + db_drop: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + db_init: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + dep_check: "ghcr.io/rackerlabs/genestack-images/kubernetes-entrypoint:latest" + heat_api: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + heat_cfn: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + heat_cloudwatch: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + heat_db_sync: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + heat_engine: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + heat_engine_cleaner: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + heat_purge_deleted: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + image_repo_sync: null + ks_endpoints: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_service: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_user: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + rabbit_init: null + test: null # NOTE: (brew) requests cpu/mem values based on a three node # hyperconverged lab (/scripts/hyperconverged-lab.sh). # limit values based on defaults from the openstack-helm charts unless defined pod: + lifecycle: + upgrades: + deployments: + revision_history: 3 + pod_replacement_strategy: RollingUpdate + rolling_update: + max_unavailable: 20% + max_surge: 3 + disruption_budget: + api: + min_available: 0 + cfn: + min_available: 0 + cloudwatch: + min_available: 0 + termination_grace_period: + api: + timeout: 60 + cfn: + timeout: 60 + cloudwatch: + timeout: 60 + engine: + timeout: 60 resources: enabled: true api: diff --git a/base-helm-configs/horizon/horizon-helm-overrides.yaml b/base-helm-configs/horizon/horizon-helm-overrides.yaml index f543a492f..f27c28f78 100644 --- a/base-helm-configs/horizon/horizon-helm-overrides.yaml +++ b/base-helm-configs/horizon/horizon-helm-overrides.yaml @@ -1,13 +1,12 @@ --- images: tags: - db_init: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - db_drop: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - horizon_db_sync: "quay.io/rackspace/rackerlabs-horizon:2024.1-ubuntu_jammy" - horizon: "quay.io/rackspace/rackerlabs-horizon:2024.1-ubuntu_jammy" - test: "quay.io/rackspace/rackerlabs-osh-selenium:latest-ubuntu_jammy" - dep_check: "quay.io/rackspace/rackerlabs-kubernetes-entrypoint:latest-ubuntu_jammy" - image_repo_sync: "quay.io/rackspace/rackerlabs-docker:17.07.0" + db_init: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + db_drop: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + horizon_db_sync: "ghcr.io/rackerlabs/genestack-images/horizon:2024.1-latest" + horizon: "ghcr.io/rackerlabs/genestack-images/horizon:2024.1-latest" + dep_check: "ghcr.io/rackerlabs/genestack-images/kubernetes-entrypoint:latest" + image_repo_sync: null # NOTE: (brew) requests cpu/mem values based on a three node # hyperconverged lab (/scripts/hyperconverged-lab.sh). diff --git a/base-helm-configs/ironic/ironic-helm-overrides.yaml b/base-helm-configs/ironic/ironic-helm-overrides.yaml index 249d89cc1..97a599842 100644 --- a/base-helm-configs/ironic/ironic-helm-overrides.yaml +++ b/base-helm-configs/ironic/ironic-helm-overrides.yaml @@ -6,33 +6,33 @@ --- images: tags: - ironic_manage_cleaning_network: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ironic_retrive_cleaning_network: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ironic_retrive_swift_config: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - bootstrap: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - db_drop: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - db_init: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ironic_db_sync: "quay.io/rackspace/rackerlabs-ironic:2024.1-ubuntu_jammy" - ks_user: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_service: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_endpoints: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - rabbit_init: "quay.io/rackspace/rackerlabs-rabbitmq:3.13-management" - ironic_api: "quay.io/rackspace/rackerlabs-ironic:2024.1-ubuntu_jammy" - ironic_conductor: "quay.io/rackspace/rackerlabs-ironic:2024.1-ubuntu_jammy" - ironic_pxe: "quay.io/rackspace/rackerlabs-ironic:2024.1-ubuntu_jammy" - ironic_pxe_init: "quay.io/rackspace/rackerlabs-ironic:2024.1-ubuntu_jammy" + ironic_manage_cleaning_network: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ironic_retrive_cleaning_network: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ironic_retrive_swift_config: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + bootstrap: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + db_drop: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + db_init: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ironic_db_sync: "ghcr.io/rackerlabs/genestack-images/ironic-api:2024.1-latest" + ks_user: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_service: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_endpoints: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + rabbit_init: null + ironic_api: "ghcr.io/rackerlabs/genestack-images/ironic-api:2024.1-latest" + ironic_conductor: "ghcr.io/rackerlabs/genestack-images/ironic-conductor:2024.1-latest" + ironic_pxe: "ghcr.io/rackerlabs/genestack-images/ironic-pxe:2024.1-latest" + ironic_pxe_init: "ghcr.io/rackerlabs/genestack-images/ironic-api:2024.1-latest" ironic_pxe_http: "docker.io/nginx:1.13.3" # Retained from openstack-helm default - ironic_inspector: "quay.io/rackspace/rackerlabs-ironic-inspector:2024.1-ubuntu_jammy" - ironic_inspector_db_sync: "quay.io/rackspace/rackerlabs-ironic-inspector:2024.1-ubuntu_jammy" - dep_check: "quay.io/rackspace/rackerlabs-kubernetes-entrypoint:latest-ubuntu_jammy" - image_repo_sync: "quay.io/rackspace/rackerlabs-docker:17.07.0" + ironic_inspector: "ghcr.io/rackerlabs/genestack-images/ironic-inspector:2024.1-latest" + ironic_inspector_db_sync: "ghcr.io/rackerlabs/genestack-images/ironic-inspector:2024.1-latest" + dep_check: "ghcr.io/rackerlabs/genestack-images/kubernetes-entrypoint:latest" + image_repo_sync: null pull_policy: "IfNotPresent" conf: ironic: DEFAULT: log_config_append: /etc/ironic/logging.conf - tempdir: /var/lib/openstack-helm/tmp # Matches openstack-helm default + tempdir: /tmp default_deploy_interface: "direct" default_inspect_interface: "inspector" default_network_interface: "neutron" @@ -41,6 +41,7 @@ conf: enabled_deploy_interfaces: "direct,ramdisk" enabled_inspect_interfaces: "inspector,no-inspect" enabled_management_interfaces: "ipmitool,redfish" + enabled_network_interfaces: "flat,neutron" enabled_power_interfaces: "ipmitool,redfish" enabled_raid_interfaces: "no-raid" database: @@ -112,6 +113,32 @@ network: neutron_subnet_alloc_end: 172.24.6.200 neutron_subnet_dns_nameserver: 8.8.8.8 # Aligned with Neutron's OVN DNS +bootstrap: + image: + enabled: true + openstack: + enabled: true + ks_user: ironic + # NOTE: if source_base is null the source will be used as is + source_base: http://tarballs.openstack.org/ironic-python-agent/tinyipa/files + structured: + ironic-agent.initramfs: + source: tinyipa-stable-2024.1.gz + disk_format: ari + container_format: ari + ironic-agent.kernel: + source: tinyipa-stable-2024.1.vmlinuz + disk_format: aki + container_format: aki + network: + enabled: false + openstack: + enabled: false + object_store: + enabled: false + openstack: + enabled: false + dependencies: static: api: @@ -119,8 +146,6 @@ dependencies: - ironic-db-sync - ironic-ks-user - ironic-ks-endpoints - - ironic-manage-cleaning-network - - ironic-rabbit-init services: - endpoint: internal service: oslo_db @@ -133,8 +158,6 @@ dependencies: - ironic-db-sync - ironic-ks-user - ironic-ks-endpoints - - ironic-manage-cleaning-network - - ironic-rabbit-init services: - endpoint: internal service: oslo_db @@ -233,12 +256,12 @@ manifests: ingress_api: false job_bootstrap: false job_db_drop: false - job_db_init: true + job_db_init: false job_db_sync: true job_ks_endpoints: true job_ks_service: true job_ks_user: true - job_manage_cleaning_network: true - job_rabbit_init: true + job_manage_cleaning_network: false + job_rabbit_init: false service_ingress_api: false statefulset_conductor: true diff --git a/base-helm-configs/keystone/keystone-helm-overrides.yaml b/base-helm-configs/keystone/keystone-helm-overrides.yaml index f2c34a22d..4abc23dbc 100644 --- a/base-helm-configs/keystone/keystone-helm-overrides.yaml +++ b/base-helm-configs/keystone/keystone-helm-overrides.yaml @@ -1,27 +1,41 @@ --- images: tags: - bootstrap: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - db_drop: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - db_init: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - dep_check: "quay.io/rackspace/rackerlabs-kubernetes-entrypoint:latest-ubuntu_jammy" - image_repo_sync: "quay.io/rackspace/rackerlabs-docker:17.07.0" - keystone_api: "quay.io/rackspace/rackerlabs-keystone-rxt:2024.1-ubuntu_jammy-1747958291" - keystone_credential_cleanup: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - keystone_credential_rotate: "quay.io/rackspace/rackerlabs-keystone-rxt:2024.1-ubuntu_jammy-1747958291" - keystone_credential_setup: "quay.io/rackspace/rackerlabs-keystone-rxt:2024.1-ubuntu_jammy-1747958291" - keystone_db_sync: "quay.io/rackspace/rackerlabs-keystone-rxt:2024.1-ubuntu_jammy-1747958291" - keystone_domain_manage: "quay.io/rackspace/rackerlabs-keystone-rxt:2024.1-ubuntu_jammy-1747958291" - keystone_fernet_rotate: "quay.io/rackspace/rackerlabs-keystone-rxt:2024.1-ubuntu_jammy-1747958291" - keystone_fernet_setup: "quay.io/rackspace/rackerlabs-keystone-rxt:2024.1-ubuntu_jammy-1747958291" - ks_user: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - rabbit_init: "quay.io/rackspace/rackerlabs-rabbitmq:3.13-management" - test: "quay.io/rackspace/rackerlabs-xrally-openstack:2.0.0" + bootstrap: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + db_drop: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + db_init: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + dep_check: "ghcr.io/rackerlabs/genestack-images/kubernetes-entrypoint:latest" + image_repo_sync: null + keystone_api: "ghcr.io/rackerlabs/genestack-images/keystone:2024.1-latest" + keystone_credential_cleanup: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + keystone_credential_rotate: "ghcr.io/rackerlabs/genestack-images/keystone:2024.1-latest" + keystone_credential_setup: "ghcr.io/rackerlabs/genestack-images/keystone:2024.1-latest" + keystone_db_sync: "ghcr.io/rackerlabs/genestack-images/keystone:2024.1-latest" + keystone_domain_manage: "ghcr.io/rackerlabs/genestack-images/keystone:2024.1-latest" + keystone_fernet_rotate: "ghcr.io/rackerlabs/genestack-images/keystone:2024.1-latest" + keystone_fernet_setup: "ghcr.io/rackerlabs/genestack-images/keystone:2024.1-latest" + ks_user: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + rabbit_init: null + test: null # NOTE: (brew) requests cpu/mem values based on a three node # hyperconverged lab (/scripts/hyperconverged-lab.sh). # limit values based on defaults from the openstack-helm charts unless defined pod: + lifecycle: + upgrades: + deployments: + revision_history: 3 + pod_replacement_strategy: RollingUpdate + rolling_update: + max_unavailable: 20% + max_surge: 3 + disruption_budget: + api: + min_available: 0 + termination_grace_period: + api: + timeout: 60 resources: enabled: true api: @@ -85,6 +99,11 @@ conf: rackspace: role_attribute: os_flex role_attribute_enforcement: false + # NOTE(cloudnull): See https://docs.openstack.org/keystone/latest/admin/configuration.html#security-compliance-and-pci-dss for more + security_compliance: + lockout_failure_attempts: 6 + lockout_duration: 1800 + disable_user_account_days_inactive: 90 logging: logger_root: handlers: diff --git a/base-helm-configs/kube-ovn/kube-ovn-helm-overrides.yaml b/base-helm-configs/kube-ovn/kube-ovn-helm-overrides.yaml index 961c044ff..3296fea38 100644 --- a/base-helm-configs/kube-ovn/kube-ovn-helm-overrides.yaml +++ b/base-helm-configs/kube-ovn/kube-ovn-helm-overrides.yaml @@ -3,13 +3,13 @@ # Declare variables to be passed into your templates. global: registry: - address: docker.io/kubeovn + address: ghcr.io/rackerlabs/genestack-images imagePullSecrets: [] images: kubeovn: repository: kube-ovn vpcRepository: vpc-nat-gateway - tag: v1.13.13 + tag: v1.13.14-latest support_arm: true thirdparty: true @@ -41,27 +41,27 @@ networking: #VLAN_NAME: "ovn-vlan" #VLAN_ID: "100" EXCHANGE_LINK_NAME: false - ENABLE_EIP_SNAT: true + ENABLE_EIP_SNAT: false DEFAULT_SUBNET: "ovn-default" DEFAULT_VPC: "ovn-cluster" NODE_SUBNET: "join" #mesh network - ENABLE_ECMP: false + ENABLE_ECMP: true ENABLE_METRICS: true # comma-separated string of nodelocal DNS ip addresses NODE_LOCAL_DNS_IP: "" - PROBE_INTERVAL: 180000 - OVN_NORTHD_PROBE_INTERVAL: 5000 - OVN_LEADER_PROBE_INTERVAL: 5 - OVN_REMOTE_PROBE_INTERVAL: 10000 + PROBE_INTERVAL: 60000 + OVN_NORTHD_PROBE_INTERVAL: 15000 + OVN_LEADER_PROBE_INTERVAL: 15 + OVN_REMOTE_PROBE_INTERVAL: 30000 OVN_REMOTE_OPENFLOW_INTERVAL: 180 - OVN_NORTHD_N_THREADS: 1 - ENABLE_COMPACT: false + OVN_NORTHD_N_THREADS: 4 # Number of threads for ovn-northd, default is 4 production environments could set it to a higher value. + ENABLE_COMPACT: true func: ENABLE_LB: true ENABLE_NP: true ENABLE_EXTERNAL_VPC: true - HW_OFFLOAD: false + HW_OFFLOAD: false # Enable hardware offload, if supported by the underlying network hardware. ENABLE_LB_SVC: false ENABLE_KEEP_VM_IP: true LS_DNAT_MOD_DL_DST: true @@ -77,8 +77,8 @@ func: ENABLE_OVN_IPSEC: false ENABLE_ANP: false SET_VXLAN_TX_OFF: false - OVSDB_CON_TIMEOUT: 3 - OVSDB_INACTIVITY_TIMEOUT: 10 + OVSDB_CON_TIMEOUT: 5 + OVSDB_INACTIVITY_TIMEOUT: 30 ENABLE_LIVE_MIGRATION_OPTIMIZE: true ipv4: @@ -91,8 +91,8 @@ ipv4: performance: GC_INTERVAL: 0 - INSPECT_INTERVAL: 20 - OVS_VSCTL_CONCURRENCY: 100 + INSPECT_INTERVAL: 300 + OVS_VSCTL_CONCURRENCY: 150 debug: ENABLE_MIRROR: false diff --git a/base-helm-configs/libvirt/libvirt-helm-overrides.yaml b/base-helm-configs/libvirt/libvirt-helm-overrides.yaml index fb36e8157..1ad90d51d 100644 --- a/base-helm-configs/libvirt/libvirt-helm-overrides.yaml +++ b/base-helm-configs/libvirt/libvirt-helm-overrides.yaml @@ -1,9 +1,9 @@ --- images: tags: - libvirt: docker.io/openstackhelm/libvirt:2024.1-ubuntu_jammy + libvirt: ghcr.io/rackerlabs/genestack-images/libvirt:latest ceph_config_helper: docker.io/openstackhelm/ceph-config-helper:ubuntu_jammy_18.2.2-1-20240312 - dep_check: quay.io/airshipit/kubernetes-entrypoint:latest-ubuntu_jammy + dep_check: ghcr.io/rackerlabs/genestack-images/kubernetes-entrypoint:latest network: backend: - ovn @@ -18,3 +18,12 @@ dependencies: ovn: libvirt: pod: [] # In a hybrid deployment, we don't want to run ovn-controller on the same node as libvirt +pod: + lifecycle: + upgrades: + daemonsets: + pod_replacement_strategy: RollingUpdate + libvirt: + enabled: true + min_ready_seconds: 0 + max_unavailable: 20% diff --git a/base-helm-configs/magnum/magnum-helm-overrides.yaml b/base-helm-configs/magnum/magnum-helm-overrides.yaml index 047bf3481..d2cc00d26 100644 --- a/base-helm-configs/magnum/magnum-helm-overrides.yaml +++ b/base-helm-configs/magnum/magnum-helm-overrides.yaml @@ -1,23 +1,37 @@ --- images: tags: - bootstrap: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - db_drop: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - db_init: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - dep_check: "quay.io/rackspace/rackerlabs-kubernetes-entrypoint:latest-ubuntu_jammy" - image_repo_sync: "quay.io/rackspace/rackerlabs-docker:17.07.0" - ks_endpoints: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_service: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_user: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - magnum_api: "quay.io/rackspace/rackerlabs-magnum:2024.1-ubuntu_jammy" - magnum_conductor: "quay.io/rackspace/rackerlabs-magnum:2024.1-ubuntu_jammy" - magnum_db_sync: "quay.io/rackspace/rackerlabs-magnum:2024.1-ubuntu_jammy" - rabbit_init: "quay.io/rackspace/rackerlabs-rabbitmq:3.13-management" + bootstrap: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + db_drop: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + db_init: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + dep_check: "ghcr.io/rackerlabs/genestack-images/kubernetes-entrypoint:latest" + image_repo_sync: null + ks_endpoints: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_service: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_user: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + magnum_api: "ghcr.io/rackerlabs/genestack-images/magnum:2024.1-latest" + magnum_conductor: "ghcr.io/rackerlabs/genestack-images/magnum:2024.1-latest" + magnum_db_sync: "ghcr.io/rackerlabs/genestack-images/magnum:2024.1-latest" + rabbit_init: null # NOTE: (brew) requests cpu/mem values based on a three node # hyperconverged lab (/scripts/hyperconverged-lab.sh). # limit values based on defaults from the openstack-helm charts unless defined pod: + lifecycle: + upgrades: + deployments: + revision_history: 3 + pod_replacement_strategy: RollingUpdate + rolling_update: + max_unavailable: 20% + max_surge: 3 + disruption_budget: + api: + min_available: 0 + termination_grace_period: + api: + timeout: 60 resources: enabled: true api: diff --git a/base-helm-configs/masakari/masakari-helm-overrides.yaml b/base-helm-configs/masakari/masakari-helm-overrides.yaml index 530fc4e9a..61fc9948d 100644 --- a/base-helm-configs/masakari/masakari-helm-overrides.yaml +++ b/base-helm-configs/masakari/masakari-helm-overrides.yaml @@ -13,26 +13,50 @@ --- images: tags: - db_init: quay.io/airshipit/heat:2024.1-ubuntu_jammy + db_init: ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest db_sync: docker.io/openstackhelm/masakari:2024.1-ubuntu_jammy - db_drop: quay.io/airshipit/heat:2024.1-ubuntu_jammy - ks_endpoints: quay.io/airshipit/heat:2024.1-ubuntu_jammy - ks_service: quay.io/airshipit/heat:2024.1-ubuntu_jammy - ks_user: quay.io/airshipit/heat:2024.1-ubuntu_jammy - masakari_api: quay.io/rackspace/rackerlabs-masakari:2024.1-ubuntu_jammy - masakari_engine: quay.io/rackspace/rackerlabs-masakari:2024.1-ubuntu_jammy + db_drop: ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest + ks_endpoints: ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest + ks_service: ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest + ks_user: ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest + masakari_api: ghcr.io/rackerlabs/genestack-images/masakari:2024.1-latest + masakari_engine: ghcr.io/rackerlabs/genestack-images/masakari:2024.1-latest # TEMP HOST-MONITOR IMAGE TO FIX: https://review.opendev.org/c/openstack/masakari-monitors/+/951336 masakari_host_monitor: kernelpanic53/rackerlabs-masakari-monitors:zhmarvi-ubuntu_jammy_v1.0 - masakari_process_monitor: quay.io/rackspace/rackerlabs-masakari-monitors:2024.1-ubuntu_jammy - masakari_instance_monitor: quay.io/rackspace/rackerlabs-masakari-monitors:2024.1-ubuntu_jammy + masakari_process_monitor: ghcr.io/rackerlabs/genestack-images/masakari-monitors:2024.1-latest + masakari_instance_monitor: ghcr.io/rackerlabs/genestack-images/masakari-monitors:2024.1-latest rabbit_init: docker.io/rabbitmq:3.13-management - dep_check: quay.io/airshipit/kubernetes-entrypoint:latest-ubuntu_focal + dep_check: ghcr.io/rackerlabs/genestack-images/kubernetes-entrypoint:latest pull_policy: "IfNotPresent" # NOTE: (brew) requests cpu/mem values based on a three node # hyperconverged lab (/scripts/hyperconverged-lab.sh). # limit values based on defaults from the openstack-helm charts unless defined pod: + lifecycle: + upgrades: + deployments: + revision_history: 3 + pod_replacement_strategy: RollingUpdate + rolling_update: + max_unavailable: 20% + max_surge: 3 + daemonsets: + pod_replacement_strategy: RollingUpdate + compute: + enabled: true + min_ready_seconds: 0 + max_unavailable: 20% + disruption_budget: + masakari_api: + min_available: 0 + masakari_engine: + min_available: 0 + termination_grace_period: + masakari_api: + timeout: 60 + masakari_engine: + timeout: 60 resources: enabled: true masakari_api: diff --git a/base-helm-configs/neutron/neutron-helm-overrides.yaml b/base-helm-configs/neutron/neutron-helm-overrides.yaml index 2ccf08bd0..69956bf77 100644 --- a/base-helm-configs/neutron/neutron-helm-overrides.yaml +++ b/base-helm-configs/neutron/neutron-helm-overrides.yaml @@ -1,35 +1,35 @@ --- images: tags: - bootstrap: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - db_init: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - db_drop: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_user: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_service: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_endpoints: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - neutron_db_sync: "quay.io/rackspace/rackerlabs-neutron:2024.1-ubuntu_jammy" - neutron_dhcp: "quay.io/rackspace/rackerlabs-neutron:2024.1-ubuntu_jammy" - neutron_l3: "quay.io/rackspace/rackerlabs-neutron:2024.1-ubuntu_jammy" - neutron_l2gw: "quay.io/rackspace/rackerlabs-neutron:2024.1-ubuntu_jammy" - neutron_linuxbridge_agent: "quay.io/rackspace/rackerlabs-neutron:2024.1-ubuntu_jammy" - neutron_metadata: "quay.io/rackspace/rackerlabs-neutron:2024.1-ubuntu_jammy" - neutron_ovn_metadata: "quay.io/rackspace/rackerlabs-neutron:2024.1-ubuntu_jammy" - neutron_ovn_vpn: "quay.io/rackspace/rackerlabs-neutron:2024.1-ubuntu_jammy" - neutron_openvswitch_agent: "quay.io/rackspace/rackerlabs-neutron:2024.1-ubuntu_jammy" - neutron_server: "quay.io/rackspace/rackerlabs-neutron:2024.1-ubuntu_jammy" - neutron_rpc_server: "quay.io/rackspace/rackerlabs-neutron:2024.1-ubuntu_jammy" - neutron_bagpipe_bgp: "quay.io/rackspace/rackerlabs-neutron:2024.1-ubuntu_jammy" - neutron_netns_cleanup_cron: "quay.io/rackspace/rackerlabs-neutron:2024.1-ubuntu_jammy" - test: "quay.io/rackspace/rackerlabs-xrally-openstack:2.0.0" + bootstrap: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + db_init: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + db_drop: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_user: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_service: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_endpoints: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + neutron_db_sync: "ghcr.io/rackerlabs/genestack-images/neutron:2024.1-latest" + neutron_dhcp: "ghcr.io/rackerlabs/genestack-images/neutron:2024.1-latest" + neutron_l3: "ghcr.io/rackerlabs/genestack-images/neutron:2024.1-latest" + neutron_l2gw: "ghcr.io/rackerlabs/genestack-images/neutron:2024.1-latest" + neutron_linuxbridge_agent: "ghcr.io/rackerlabs/genestack-images/neutron:2024.1-latest" + neutron_metadata: "ghcr.io/rackerlabs/genestack-images/neutron:2024.1-latest" + neutron_ovn_metadata: "ghcr.io/rackerlabs/genestack-images/neutron:2024.1-latest" + neutron_ovn_vpn: "ghcr.io/rackerlabs/genestack-images/neutron:2024.1-latest" + neutron_openvswitch_agent: "ghcr.io/rackerlabs/genestack-images/neutron:2024.1-latest" + neutron_server: "ghcr.io/rackerlabs/genestack-images/neutron:2024.1-latest" + neutron_rpc_server: "ghcr.io/rackerlabs/genestack-images/neutron:2024.1-latest" + neutron_bagpipe_bgp: "ghcr.io/rackerlabs/genestack-images/neutron:2024.1-latest" + neutron_netns_cleanup_cron: "ghcr.io/rackerlabs/genestack-images/neutron:2024.1-latest" + test: null purge_test: "quay.io/rackspace/rackerlabs-ospurge:latest" - rabbit_init: "quay.io/rackspace/rackerlabs-rabbitmq:3.13-management" - netoffload: "quay.io/rackspace/rackerlabs-netoffload:v1.0.1" - neutron_sriov_agent: "quay.io/rackspace/rackerlabs-neutron:2024.1-ubuntu_jammy" - neutron_sriov_agent_init: "quay.io/rackspace/rackerlabs-neutron:2024.1-ubuntu_jammy" - neutron_bgp_dragent: "quay.io/rackspace/rackerlabs-neutron:2024.1-ubuntu_jammy" - neutron_ironic_agent: "quay.io/rackspace/rackerlabs-neutron:2024.1-ubuntu_jammy" - dep_check: "quay.io/rackspace/rackerlabs-kubernetes-entrypoint:latest-ubuntu_jammy" - image_repo_sync: "quay.io/rackspace/rackerlabs-docker:17.07.0" + rabbit_init: null + netoffload: null + neutron_sriov_agent: "ghcr.io/rackerlabs/genestack-images/neutron:2024.1-latest" + neutron_sriov_agent_init: "ghcr.io/rackerlabs/genestack-images/neutron:2024.1-latest" + neutron_bgp_dragent: "ghcr.io/rackerlabs/genestack-images/neutron:2024.1-latest" + neutron_ironic_agent: "ghcr.io/rackerlabs/genestack-images/neutron:2024.1-latest" + dep_check: "ghcr.io/rackerlabs/genestack-images/kubernetes-entrypoint:latest" + image_repo_sync: null labels: ovs: @@ -53,6 +53,8 @@ dependencies: jobs: null ovn_metadata: pod: [] + ovn_vpn_agent: + pod: [] ovs_agent: jobs: null rpc_server: @@ -86,6 +88,62 @@ pod: cpu: "3000m" use_fqdn: neutron_agent: false + lifecycle: + upgrades: + deployments: + revision_history: 3 + pod_replacement_strategy: RollingUpdate + rolling_update: + max_unavailable: 20% + max_surge: 3 + daemonsets: + pod_replacement_strategy: RollingUpdate + dhcp_agent: + enabled: true + min_ready_seconds: 0 + max_unavailable: 20% + l3_agent: + enabled: true + min_ready_seconds: 0 + max_unavailable: 20% + lb_agent: + enabled: true + min_ready_seconds: 0 + max_unavailable: 20% + metadata_agent: + enabled: true + min_ready_seconds: 0 + max_unavailable: 20% + ovn_metadata_agent: + enabled: true + min_ready_seconds: 0 + max_unavailable: 20% + ovn_vpn_agent: + enabled: true + min_ready_seconds: 0 + max_unavailable: 20% + ovs_agent: + enabled: true + min_ready_seconds: 0 + max_unavailable: 20% + sriov_agent: + enabled: true + min_ready_seconds: 0 + max_unavailable: 20% + netns_cleanup_cron: + enabled: true + min_ready_seconds: 0 + max_unavailable: 20% + disruption_budget: + server: + min_available: 0 + termination_grace_period: + server: + timeout: 60 + rpc_server: + timeout: 60 + ironic_agent: + timeout: 60 conf: dhcp_agent: @@ -111,8 +169,21 @@ conf: router_scheduler_driver: neutron.scheduler.l3_agent_scheduler.AZLeastRoutersScheduler rpc_state_report_workers: 2 rpc_workers: 2 - service_plugins: "ovn-router,qos,metering,trunk,segments" + # NOTE(cloudnull): in 2025.1 we can add firewall_v2 + service_plugins: "ovn-router,ovn-vpnaas,qos,metering,trunk,segments" + service_providers: + service_provider: + type: multistring + values: + - "VPN:strongswan:neutron_vpnaas.services.vpn.service_drivers.ovn_ipsec.IPsecOvnVPNDriver:default" + # - "FIREWALL_V2:fwaas_db:neutron_fwaas.services.firewall.service_drivers.ovn.firewall_l3_driver.OVNFwaasDriver:default" + fwaas: + agent_version: v2 + driver: neutron_fwaas.services.firewall.service_drivers.ovn.firewall_l3_driver.OVNFwaasDriver + # NOTE(cloudnull): in 2025.1 we can enable this + enabled: False agent: + extensions: vpnaas availability_zone: az1 database: connection_debug: 0 @@ -121,6 +192,8 @@ conf: idle_timeout: 3600 mysql_sql_mode: {} use_db_reconnect: true + max_pool_size: 30 + max_overflow: 60 pool_timeout: 60 max_retries: -1 oslo_messaging_rabbit: @@ -149,6 +222,13 @@ conf: ovn_l3_scheduler: leastloaded ovn_nb_connection: "tcp:127.0.0.1:6641" ovn_sb_connection: "tcp:127.0.0.1:6642" + metadata_rate_limiting: + rate_limit_enabled: true + ip_versions: 4 + base_window_duration: 60 + base_query_rate_limit: 6 + burst_window_duration: 10 + burst_query_rate_limit: 2 neutron_api_uwsgi: uwsgi: processes: 2 @@ -160,11 +240,17 @@ conf: metadata_workers: 2 ovs: ovsdb_connection: "tcp:127.0.0.1:6640" + ovn_vpn_agent: + ovs: + ovsdb_connection: "tcp:127.0.0.1:6640" plugins: ml2_conf: agent: availability_zone: az1 + # NOTE(cloudnull): in 2025.1 we can add fwaas_v2 extensions: "fip_qos,gateway_ip_qos" + fwaas: + firewall_l2_driver: noop ml2: extension_drivers: "port_security,qos" mechanism_drivers: ovn @@ -257,6 +343,7 @@ manifests: daemonset_metadata_agent: false daemonset_ovn_metadata_agent: true daemonset_ovs_agent: false + daemonset_ovn_vpn_agent: true ingress_server: false job_db_init: false job_rabbit_init: false @@ -265,3 +352,4 @@ manifests: secret_ingress_tls: false secret_rabbitmq: false service_ingress_server: false + deployment_rpc_server: false diff --git a/base-helm-configs/nova/nova-helm-overrides.yaml b/base-helm-configs/nova/nova-helm-overrides.yaml index eb1ef1fbe..24c84c968 100644 --- a/base-helm-configs/nova/nova-helm-overrides.yaml +++ b/base-helm-configs/nova/nova-helm-overrides.yaml @@ -1,43 +1,37 @@ --- images: tags: - bootstrap: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - db_drop: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - db_init: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - dep_check: "quay.io/rackspace/rackerlabs-kubernetes-entrypoint:latest-ubuntu_jammy" - image_repo_sync: "quay.io/rackspace/rackerlabs-docker:17.07.0" - ks_endpoints: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_service: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_user: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - nova_api: "quay.io/rackspace/rackerlabs-nova-efi:2024.1-ubuntu_jammy-1737928811" - nova_archive_deleted_rows: "quay.io/rackspace/rackerlabs-nova-efi:2024.1-ubuntu_jammy-1737928811" - nova_cell_setup: "quay.io/rackspace/rackerlabs-nova-efi:2024.1-ubuntu_jammy-1737928811" - nova_cell_setup_init: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - nova_compute: "quay.io/rackspace/rackerlabs-nova-efi:2024.1-ubuntu_jammy-1737928811" + bootstrap: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + db_drop: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + db_init: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + dep_check: "ghcr.io/rackerlabs/genestack-images/kubernetes-entrypoint:latest" + image_repo_sync: null + ks_endpoints: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_service: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_user: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + nova_api: "ghcr.io/rackerlabs/genestack-images/nova:2024.1-latest" + nova_archive_deleted_rows: "ghcr.io/rackerlabs/genestack-images/nova:2024.1-latest" + nova_cell_setup: "ghcr.io/rackerlabs/genestack-images/nova:2024.1-latest" + nova_cell_setup_init: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + nova_compute: "ghcr.io/rackerlabs/genestack-images/nova:2024.1-latest" nova_compute_ironic: "docker.io/kolla/ubuntu-source-nova-compute-ironic:wallaby" - nova_compute_ssh: "quay.io/rackspace/rackerlabs-nova-efi:2024.1-ubuntu_jammy-1737928811" - nova_conductor: "quay.io/rackspace/rackerlabs-nova-efi:2024.1-ubuntu_jammy-1737928811" - nova_db_sync: "quay.io/rackspace/rackerlabs-nova-efi:2024.1-ubuntu_jammy-1737928811" - nova_novncproxy: "quay.io/rackspace/rackerlabs-nova-efi:2024.1-ubuntu_jammy-1737928811" - nova_novncproxy_assets: "quay.io/rackspace/rackerlabs-nova-efi:2024.1-ubuntu_jammy-1737928811" - nova_scheduler: "quay.io/rackspace/rackerlabs-nova-efi:2024.1-ubuntu_jammy-1737928811" + nova_compute_ssh: "ghcr.io/rackerlabs/genestack-images/nova:2024.1-latest" + nova_conductor: "ghcr.io/rackerlabs/genestack-images/nova:2024.1-latest" + nova_db_sync: "ghcr.io/rackerlabs/genestack-images/nova:2024.1-latest" + nova_novncproxy: "ghcr.io/rackerlabs/genestack-images/nova:2024.1-latest" + nova_novncproxy_assets: "ghcr.io/rackerlabs/genestack-images/nova:2024.1-latest" + nova_scheduler: "ghcr.io/rackerlabs/genestack-images/nova:2024.1-latest" nova_service_cleaner: "quay.io/rackspace/rackerlabs-ceph-config-helper:latest-ubuntu_jammy" - nova_spiceproxy: "quay.io/rackspace/rackerlabs-nova-efi:2024.1-ubuntu_jammy-1737928811" - nova_spiceproxy_assets: "quay.io/rackspace/rackerlabs-nova-efi:2024.1-ubuntu_jammy-1737928811" + nova_spiceproxy: "ghcr.io/rackerlabs/genestack-images/nova:2024.1-latest" + nova_spiceproxy_assets: "ghcr.io/rackerlabs/genestack-images/nova:2024.1-latest" nova_storage_init: "quay.io/rackspace/rackerlabs-ceph-config-helper:latest-ubuntu_jammy" nova_wait_for_computes_init: "quay.io/rackspace/rackerlabs-hyperkube-amd64:v1.11.6" - rabbit_init: "quay.io/rackspace/rackerlabs-rabbitmq:3.13-management" - test: "quay.io/rackspace/rackerlabs-xrally-openstack:2.0.0" + rabbit_init: null + test: null network: backend: - ovn - lifecycle: - upgrades: - daemonsets: - compute: - enabled: true - max_unavailable: 20% ssh: enabled: true @@ -88,6 +82,8 @@ conf: handlers: - stdout level: INFO + ceph: + enabled: false nova: DEFAULT: block_device_allocate_retries: 180 @@ -104,6 +100,9 @@ conf: vif_plugging_timeout: 300 cross_az_attach: true network_allocate_retries: 3 + api: + vendordata_providers: ['StaticJSON'] + vendordata_jsonfile_path: /etc/nova/vendor_data.json api_database: connection_debug: 0 connection_recycle_time: 600 @@ -170,7 +169,7 @@ conf: # https://lists.openstack.org/pipermail/openstack-discuss/2023-April/033314.html # https://review.opendev.org/c/openstack/oslo.messaging/+/866617 kombu_reconnect_delay: 0.5 - schedule: + scheduler: workers: 2 workarounds: skip_cpu_compare_at_startup: false @@ -280,6 +279,30 @@ endpoints: # hyperconverged lab (/scripts/hyperconverged-lab.sh). # limit values based on defaults from the openstack-helm charts unless defined pod: + lifecycle: + upgrades: + deployments: + revision_history: 3 + pod_replacement_strategy: RollingUpdate + rolling_update: + max_unavailable: 20% + max_surge: 3 + daemonsets: + pod_replacement_strategy: RollingUpdate + compute: + enabled: true + min_ready_seconds: 0 + max_unavailable: 20% + disruption_budget: + metadata: + min_available: 0 + osapi: + min_available: 0 + termination_grace_period: + metadata: + timeout: 60 + osapi: + timeout: 60 resources: enabled: true compute: @@ -326,6 +349,31 @@ pod: readOnlyRootFilesystem: false use_fqdn: compute: false + mounts: + nova_compute: + init_container: null + nova_compute: + volumeMounts: + - name: metadata-api-static-vendordata + mountPath: /etc/nova/vendor_data.json + subPath: vendor_data.json + readOnly: true + volumes: + - name: metadata-api-static-vendordata + configMap: + name: static-vendor-data + nova_api_metadata: + init_container: null + nova_api_metadata: + volumeMounts: + - name: metadata-api-static-vendordata + mountPath: /etc/nova/vendor_data.json + subPath: vendor_data.json + readOnly: true + volumes: + - name: metadata-api-static-vendordata + configMap: + name: static-vendor-data manifests: deployment_spiceproxy: false diff --git a/base-helm-configs/octavia/octavia-helm-overrides.yaml b/base-helm-configs/octavia/octavia-helm-overrides.yaml index d7d65b2f6..3b0adf587 100644 --- a/base-helm-configs/octavia/octavia-helm-overrides.yaml +++ b/base-helm-configs/octavia/octavia-helm-overrides.yaml @@ -1,23 +1,23 @@ --- images: tags: - bootstrap: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - db_drop: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - db_init: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - dep_check: "quay.io/rackspace/rackerlabs-kubernetes-entrypoint:latest-ubuntu_jammy" - image_repo_sync: "quay.io/rackspace/rackerlabs-docker:17.07.0" - ks_endpoints: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_service: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_user: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - octavia_api: "quay.io/rackspace/rackerlabs-octavia-ovn:2024.1-ubuntu_jammy-1737651745" - octavia_db_sync: "quay.io/rackspace/rackerlabs-octavia-ovn:2024.1-ubuntu_jammy-1737651745" - octavia_health_manager: "quay.io/rackspace/rackerlabs-octavia-ovn:2024.1-ubuntu_jammy-1737651745" - octavia_health_manager_init: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - octavia_housekeeping: "quay.io/rackspace/rackerlabs-octavia-ovn:2024.1-ubuntu_jammy-1737651745" - octavia_worker: "quay.io/rackspace/rackerlabs-octavia-ovn:2024.1-ubuntu_jammy-1737651745" - openvswitch_vswitchd: "docker.io/kolla/centos-source-openvswitch-vswitchd:rocky" - rabbit_init: "quay.io/rackspace/rackerlabs-rabbitmq:3.13-management" - test: "quay.io/rackspace/rackerlabs-xrally-openstack:2.0.0" + bootstrap: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + db_drop: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + db_init: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + dep_check: "ghcr.io/rackerlabs/genestack-images/kubernetes-entrypoint:latest" + image_repo_sync: null + ks_endpoints: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_service: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_user: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + octavia_api: "ghcr.io/rackerlabs/genestack-images/octavia:2024.1-latest" + octavia_db_sync: "ghcr.io/rackerlabs/genestack-images/octavia:2024.1-latest" + octavia_health_manager: "ghcr.io/rackerlabs/genestack-images/octavia:2024.1-latest" + octavia_health_manager_init: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + octavia_housekeeping: "ghcr.io/rackerlabs/genestack-images/octavia:2024.1-latest" + octavia_worker: "ghcr.io/rackerlabs/genestack-images/octavia:2024.1-latest" + openvswitch_vswitchd: "ghcr.io/rackerlabs/genestack-images/ovs:v3.5.1-latest" + rabbit_init: null + test: null dependencies: static: @@ -75,6 +75,8 @@ conf: use_db_reconnect: true pool_timeout: 60 max_retries: -1 + max_overflow: 60 + max_pool_size: 30 driver_agent: enabled_provider_agents: ovn glance: @@ -181,6 +183,26 @@ endpoints: # hyperconverged lab (/scripts/hyperconverged-lab.sh). # limit values based on defaults from the openstack-helm charts unless defined pod: + lifecycle: + upgrades: + deployments: + revision_history: 3 + pod_replacement_strategy: RollingUpdate + rolling_update: + max_unavailable: 20% + max_surge: 3 + daemonsets: + pod_replacement_strategy: RollingUpdate + health_manager: + enabled: true + min_ready_seconds: 0 + max_unavailable: 20% + disruption_budget: + api: + min_available: 0 + termination_grace_period: + api: + timeout: 60 resources: enabled: true api: diff --git a/base-helm-configs/placement/placement-helm-overrides.yaml b/base-helm-configs/placement/placement-helm-overrides.yaml index 8b9fa436b..1a8aff451 100644 --- a/base-helm-configs/placement/placement-helm-overrides.yaml +++ b/base-helm-configs/placement/placement-helm-overrides.yaml @@ -1,20 +1,34 @@ --- images: tags: - db_drop: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - db_init: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - dep_check: "quay.io/rackspace/rackerlabs-kubernetes-entrypoint:latest-ubuntu_jammy" - image_repo_sync: "quay.io/rackspace/rackerlabs-docker:17.07.0" - ks_endpoints: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_service: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - ks_user: "quay.io/rackspace/rackerlabs-heat:2024.1-ubuntu_jammy" - placement: "quay.io/rackspace/rackerlabs-placement:2024.1-ubuntu_jammy" - placement_db_sync: "quay.io/rackspace/rackerlabs-placement:2024.1-ubuntu_jammy" + db_drop: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + db_init: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + dep_check: "ghcr.io/rackerlabs/genestack-images/kubernetes-entrypoint:latest" + image_repo_sync: null + ks_endpoints: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_service: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + ks_user: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" + placement: "ghcr.io/rackerlabs/genestack-images/placement:2024.1-latest" + placement_db_sync: "ghcr.io/rackerlabs/genestack-images/placement:2024.1-latest" # NOTE: (brew) requests cpu/mem values based on a three node # hyperconverged lab (/scripts/hyperconverged-lab.sh). # limit values based on defaults from the openstack-helm charts unless defined pod: + lifecycle: + upgrades: + deployments: + revision_history: 3 + pod_replacement_strategy: RollingUpdate + rolling_update: + max_unavailable: 20% + max_surge: 3 + disruption_budget: + api: + min_available: 0 + termination_grace_period: + api: + timeout: 60 resources: enabled: true api: diff --git a/base-helm-configs/prometheus-mysql-exporter/values.yaml b/base-helm-configs/prometheus-mysql-exporter/values.yaml index b6718430e..5d2205c69 100644 --- a/base-helm-configs/prometheus-mysql-exporter/values.yaml +++ b/base-helm-configs/prometheus-mysql-exporter/values.yaml @@ -137,36 +137,36 @@ config: {} # logFormat: "logger:stderr" collectors: - # auto_increment.columns: false - # binlog_size: false + auto_increment.columns: true + binlog_size: true # engine_innodb_status: false # engine_tokudb_status: false # global_status: true # global_variables: true # info_schema.clientstats: false - # info_schema.innodb_metrics: false + info_schema.innodb_metrics: true # info_schema.innodb_tablespaces: false # info_schema.innodb_cmp: false # info_schema.innodb_cmpmem: false - # info_schema.processlist: false + info_schema.processlist: true # info_schema.processlist.min_time: 0 - # info_schema.query_response_time: false + info_schema.query_response_time: true # info_schema.tables: true # info_schema.tables.databases: '*' - # info_schema.tablestats: false + info_schema.tablestats: true # info_schema.schemastats: false - # info_schema.userstats: false + info_schema.userstats: true # perf_schema.eventsstatements: false # perf_schema.eventsstatements.digest_text_limit: 120 # perf_schema.eventsstatements.limit: false # perf_schema.eventsstatements.timelimit: 86400 - # perf_schema.eventswaits: false - # perf_schema.file_events: false + perf_schema.eventswaits: true + perf_schema.file_events: true # perf_schema.file_instances: false - # perf_schema.indexiowaits: false - # perf_schema.tableiowaits: false - # perf_schema.tablelocks: false - perf_schema.replication_group_member_stats: true + perf_schema.indexiowaits: true + perf_schema.tableiowaits: true + perf_schema.tablelocks: true + # perf_schema.replication_group_member_stats: true # slave_status: true # slave_hosts: false # heartbeat: false @@ -188,7 +188,7 @@ mysql: # secret with full config my.cnf existingConfigSecret: name: "mariadb-monitor" - key: "my.conf" + key: "my.cnf" # secret only containing the password existingPasswordSecret: name: "mariadb-monitoring" diff --git a/base-helm-configs/redis-operator/redis-operator-helm-overrides.yaml b/base-helm-configs/redis-operator/redis-operator-helm-overrides.yaml new file mode 100644 index 000000000..7c465cd11 --- /dev/null +++ b/base-helm-configs/redis-operator/redis-operator-helm-overrides.yaml @@ -0,0 +1,226 @@ +# Redis Operator and Cluster Overrides +# Defines custom settings to override defaults in base values.yaml + +# -- Cluster DNS name +clusterName: ${CLUSTER_NAME} + +# Namespace configuration +# Controls the namespace for the Redis operator and cluster +namespace: + create: true + name: redis-systems + +# Redis Cluster Configuration Overrides +redisCluster: + # -- Name of the Redis cluster (optional, defaults to empty) + name: "redis-cluster" + # -- Number of shards in the cluster (implied by leader/follower replicas) + clusterSize: 3 + # -- Redis version to use + clusterVersion: v7 + # -- Enable persistence for the cluster + persistenceEnabled: true + # -- Image configuration for Redis pods + image: quay.io/opstree/redis + tag: v7.0.15 + imagePullPolicy: IfNotPresent + # -- Secrets for image pull (optional) + imagePullSecrets: [] + # - name: Secret with Registry credentials + # -- Redis authentication secret (optional) + redisSecret: + secretName: "" + secretKey: "" + # -- Resource requests and limits (optional) + resources: {} + # requests: + # cpu: 100m + # memory: 128Mi + # limits: + # cpu: 100m + # memory: 128Mi + # -- Minimum seconds a pod must be ready before considered available + minReadySeconds: 0 + # -- Some fields of statefulset are immutable, such as volumeClaimTemplates. + # When set to true, the operator will delete the statefulset and recreate it. + #Default is false. + recreateStatefulSetOnUpdateInvalid: false + # -- Enable pod anti-affinity between leader and follower pods by adding the + # appropriate label. + # Notice that this requires the operator to have its mutating webhook enabled, + # otherwise it will only add an annotation to the RedisCluster CR. Default is + # false. + enableMasterSlaveAntiAffinity: false + # -- Leader configuration + leader: + replicas: 3 + serviceType: ClusterIP + affinity: {} + # nodeAffinity: + # requiredDuringSchedulingIgnoredDuringExecution: + # nodeSelectorTerms: + # - matchExpressions: + # - key: disktype + # operator: In + # values: + # - ssd + tolerations: [] + # - key: "key" + # operator: "Equal" + # value: "value" + # effect: "NoSchedule" + nodeSelector: {} + # memory: medium + securityContext: {} + pdb: + enabled: false + maxUnavailable: 1 + minAvailable: 1 + livenessProbe: {} + # timeoutSeconds: 30 + # periodSeconds: 45 + # successThreshold: 1 + # failureThreshold: 4 + # initialDelaySeconds: 15 + readinessProbe: {} + # timeoutSeconds: 30 + # periodSeconds: 45 + # successThreshold: 1 + # failureThreshold: 4 + # initialDelaySeconds: 15 + # -- Follower configuration + follower: + replicas: 3 + serviceType: ClusterIP + affinity: {} + # nodeAffinity: + # requiredDuringSchedulingIgnoredDuringExecution: + # nodeSelectorTerms: + # - matchExpressions: + # - key: disktype + # operator: In + # values: + # - ssd + tolerations: [] + # - key: "key" + # operator: "Equal" + # value: "value" + # effect: "NoSchedule" + nodeSelector: {} + # memory: medium + securityContext: {} + pdb: + enabled: false + maxUnavailable: 1 + minAvailable: 1 + livenessProbe: {} + # timeoutSeconds: 30 + # periodSeconds: 45 + # successThreshold: 1 + # failureThreshold: 4 + # initialDelaySeconds: 15 + readinessProbe: {} + # timeoutSeconds: 30 + # periodSeconds: 45 + # successThreshold: 1 + # failureThreshold: 4 + # initialDelaySeconds: 15 + +# -- Labels and Annotations +labels: {} + # foo: bar + # test: echo + +# -- External Configuration +externalConfig: + enabled: false + data: {} + # tcp-keepalive 400 + # slowlog-max-len 158 + # stream-node-max-bytes 2048 + +# -- External Service Configuration +externalService: + enabled: false + serviceType: {} + port: {} + annotations: {} + # foo: bar + +# -- Monitoring and Exporter +serviceMonitor: + enabled: false + interval: {} + scrapeTimeout: {} + namespace: {} + extraLabels: {} + # foo: bar + # team: devops +redisExporter: + enabled: false + image: {} + tag: {} + imagePullPolicy: {} + resources: {} + # requests: + # cpu: 100m + # memory: 128Mi + # limits: + # cpu: 100m + # memory: 128Mi + env: [] + # - name: VAR_NAME + # value: "value1" + securityContext: {} + +# -- Sidecars and Init Containers +sidecars: {} +initContainer: + enabled: false + image: {} + imagePullPolicy: {} + resources: {} + # requests: + # memory: "64Mi" + # cpu: "250m" + # limits: + # memory: "128Mi" + # cpu: "500m" + env: [] + command: [] + args: [] + +# -- Priority and Security +priorityClassName: "" +podSecurityContext: {} +TLS: + ca: {} + cert: {} + key: {} + secret: + secretName: "" +acl: + secret: + secretName: "" +env: [] + # - name: VAR_NAME + # value: "value1" +serviceAccountName: "" + +# -- Storage Specification +storageSpec: + volumeClaimTemplate: + spec: + # storageClassName: standard + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 1Gi + nodeConfVolume: true + nodeConfVolumeClaimTemplate: + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 1Gi + # selector: {} diff --git a/base-kustomize/barbican/base/barbican-rabbitmq-queue.yaml b/base-kustomize/barbican/base/barbican-rabbitmq-queue.yaml index dcbe1893d..9dd6f2505 100644 --- a/base-kustomize/barbican/base/barbican-rabbitmq-queue.yaml +++ b/base-kustomize/barbican/base/barbican-rabbitmq-queue.yaml @@ -4,14 +4,16 @@ kind: User metadata: name: barbican namespace: openstack + labels: + app.kubernetes.io/managed-by: "Helm" annotations: helm.sh/resource-policy: keep meta.helm.sh/release-name: "barbican" meta.helm.sh/release-namespace: "openstack" spec: tags: - - management # available tags are 'management', 'policymaker', 'monitoring' and 'administrator' - - policymaker + - management # available tags are 'management', 'policymaker', 'monitoring' and 'administrator' + - policymaker rabbitmqClusterReference: name: rabbitmq # rabbitmqCluster must exist in the same namespace as this resource namespace: openstack diff --git a/base-kustomize/cinder/netapp/kustomization.yaml b/base-kustomize/cinder/netapp/kustomization.yaml index 91f2e9c4d..eb6cddc10 100644 --- a/base-kustomize/cinder/netapp/kustomization.yaml +++ b/base-kustomize/cinder/netapp/kustomization.yaml @@ -1,10 +1,10 @@ images: - name: image-kubernetes-entrypoint-init - newName: quay.io/airshipit/kubernetes-entrypoint - newTag: v1.0.0 + newName: ghcr.io/rackerlabs/genestack-images/kubernetes-entrypoint + newTag: latest - name: image-heat-conf-init - newName: docker.io/openstackhelm/heat - newTag: 2024.1-ubuntu_jammy + newName: ghcr.io/rackerlabs/genestack-images/heat + newTag: 2024.1-latest - name: image-cinder-volume-netapp-init newName: ghcr.io/rackerlabs/genestack/cinder-volume-rxt newTag: 2024.1-ubuntu_jammy diff --git a/base-kustomize/envoyproxy-gateway/base/envoy-service-monitor.yaml b/base-kustomize/envoyproxy-gateway/base/envoy-service-monitor.yaml new file mode 100644 index 000000000..a2e67f338 --- /dev/null +++ b/base-kustomize/envoyproxy-gateway/base/envoy-service-monitor.yaml @@ -0,0 +1,15 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: envoy-gateway-monitor + namespace: prometheus +spec: + endpoints: + - interval: 15s + port: metrics + namespaceSelector: + matchNames: + - envoyproxy-gateway-system + selector: + matchLabels: + app.kubernetes.io/instance: envoyproxy-gateway diff --git a/base-kustomize/envoyproxy-gateway/base/kustomization.yaml b/base-kustomize/envoyproxy-gateway/base/kustomization.yaml index 9befd997c..aa708aa44 100644 --- a/base-kustomize/envoyproxy-gateway/base/kustomization.yaml +++ b/base-kustomize/envoyproxy-gateway/base/kustomization.yaml @@ -7,3 +7,4 @@ resources: - envoy-gatewayclass.yaml - envoy-gateway.yaml - envoy-endpoint-policies.yaml + - envoy-service-monitor.yaml diff --git a/base-kustomize/glance/base/glance-rabbitmq-queue.yaml b/base-kustomize/glance/base/glance-rabbitmq-queue.yaml index f3066cf28..2c48dc15d 100644 --- a/base-kustomize/glance/base/glance-rabbitmq-queue.yaml +++ b/base-kustomize/glance/base/glance-rabbitmq-queue.yaml @@ -4,15 +4,16 @@ kind: User metadata: name: glance namespace: openstack + labels: + app.kubernetes.io/managed-by: "Helm" annotations: helm.sh/resource-policy: keep - app.kubernetes.io/managed-by: "Helm" meta.helm.sh/release-name: "glance" meta.helm.sh/release-namespace: "openstack" spec: tags: - - management # available tags are 'management', 'policymaker', 'monitoring' and 'administrator' - - policymaker + - management # available tags are 'management', 'policymaker', 'monitoring' and 'administrator' + - policymaker rabbitmqClusterReference: name: rabbitmq # rabbitmqCluster must exist in the same namespace as this resource namespace: openstack diff --git a/base-kustomize/heat/base/heat-rabbitmq-queue.yaml b/base-kustomize/heat/base/heat-rabbitmq-queue.yaml index d9b6e0f87..1daaa7a87 100644 --- a/base-kustomize/heat/base/heat-rabbitmq-queue.yaml +++ b/base-kustomize/heat/base/heat-rabbitmq-queue.yaml @@ -4,10 +4,16 @@ kind: User metadata: name: heat namespace: openstack + labels: + app.kubernetes.io/managed-by: "Helm" + annotations: + helm.sh/resource-policy: keep + meta.helm.sh/release-name: "heat" + meta.helm.sh/release-namespace: "openstack" spec: tags: - - management # available tags are 'management', 'policymaker', 'monitoring' and 'administrator' - - policymaker + - management # available tags are 'management', 'policymaker', 'monitoring' and 'administrator' + - policymaker rabbitmqClusterReference: name: rabbitmq # rabbitmqCluster must exist in the same namespace as this resource namespace: openstack @@ -19,6 +25,12 @@ kind: Vhost metadata: name: heat-vhost namespace: openstack + labels: + app.kubernetes.io/managed-by: "Helm" + annotations: + helm.sh/resource-policy: keep + meta.helm.sh/release-name: "heat" + meta.helm.sh/release-namespace: "openstack" spec: name: "heat" # vhost name; required and cannot be updated defaultQueueType: quorum # default queue type for this vhost; require RabbitMQ version 3.11.12 or above @@ -38,6 +50,12 @@ kind: Queue metadata: name: heat-queue namespace: openstack + labels: + app.kubernetes.io/managed-by: "Helm" + annotations: + helm.sh/resource-policy: keep + meta.helm.sh/release-name: "heat" + meta.helm.sh/release-namespace: "openstack" spec: name: heat-qq # name of the queue vhost: "heat" # default to '/' if not provided @@ -53,6 +71,12 @@ kind: Permission metadata: name: heat-permission namespace: openstack + labels: + app.kubernetes.io/managed-by: "Helm" + annotations: + helm.sh/resource-policy: keep + meta.helm.sh/release-name: "heat" + meta.helm.sh/release-namespace: "openstack" spec: vhost: "heat" # name of a vhost userReference: diff --git a/base-kustomize/heat/base/kustomization.yaml b/base-kustomize/heat/base/kustomization.yaml index 8da8bc0bf..c686f86e4 100644 --- a/base-kustomize/heat/base/kustomization.yaml +++ b/base-kustomize/heat/base/kustomization.yaml @@ -37,7 +37,7 @@ patches: failureThreshold: 3 httpGet: path: / - port: 8004 + port: 8000 scheme: HTTP initialDelaySeconds: 10 periodSeconds: 60 diff --git a/base-kustomize/ironic/aoi/kustomization.yaml b/base-kustomize/ironic/aio/kustomization.yaml similarity index 100% rename from base-kustomize/ironic/aoi/kustomization.yaml rename to base-kustomize/ironic/aio/kustomization.yaml diff --git a/base-kustomize/ironic/base/hpa-ironic-api.yaml b/base-kustomize/ironic/base/hpa-ironic-api.yaml index dfe154f48..f3b498042 100644 --- a/base-kustomize/ironic/base/hpa-ironic-api.yaml +++ b/base-kustomize/ironic/base/hpa-ironic-api.yaml @@ -4,7 +4,7 @@ kind: HorizontalPodAutoscaler metadata: name: ironic-api namespace: openstack - spec: +spec: maxReplicas: 9 minReplicas: 2 metrics: diff --git a/base-kustomize/ironic/base/hpa-iconic-conductor.yaml b/base-kustomize/ironic/base/hpa-ironic-conductor.yaml similarity index 100% rename from base-kustomize/ironic/base/hpa-iconic-conductor.yaml rename to base-kustomize/ironic/base/hpa-ironic-conductor.yaml diff --git a/base-kustomize/ironic/base/ironic-mariadb-database.yaml b/base-kustomize/ironic/base/ironic-mariadb-database.yaml index 332004146..7bea3bae6 100644 --- a/base-kustomize/ironic/base/ironic-mariadb-database.yaml +++ b/base-kustomize/ironic/base/ironic-mariadb-database.yaml @@ -4,8 +4,12 @@ kind: Database metadata: name: ironic namespace: openstack + labels: + app.kubernetes.io/managed-by: "Helm" annotations: helm.sh/resource-policy: keep + meta.helm.sh/release-name: "ironic" + meta.helm.sh/release-namespace: "openstack" spec: # If you want the database to be created with a different name than the resource name # name: data-custom @@ -20,8 +24,12 @@ kind: User metadata: name: ironic namespace: openstack + labels: + app.kubernetes.io/managed-by: "Helm" annotations: helm.sh/resource-policy: keep + meta.helm.sh/release-name: "ironic" + meta.helm.sh/release-namespace: "openstack" spec: # If you want the user to be created with a different name than the resource name # name: user-custom @@ -40,8 +48,12 @@ kind: Grant metadata: name: ironic-grant namespace: openstack + labels: + app.kubernetes.io/managed-by: "Helm" annotations: helm.sh/resource-policy: keep + meta.helm.sh/release-name: "ironic" + meta.helm.sh/release-namespace: "openstack" spec: mariaDbRef: name: mariadb-cluster diff --git a/base-kustomize/ironic/base/ironic-rabbitmq-queue.yaml b/base-kustomize/ironic/base/ironic-rabbitmq-queue.yaml index 2846e95c6..89d710459 100644 --- a/base-kustomize/ironic/base/ironic-rabbitmq-queue.yaml +++ b/base-kustomize/ironic/base/ironic-rabbitmq-queue.yaml @@ -4,8 +4,12 @@ kind: User metadata: name: ironic namespace: openstack + labels: + app.kubernetes.io/managed-by: "Helm" annotations: helm.sh/resource-policy: keep + meta.helm.sh/release-name: "ironic" + meta.helm.sh/release-namespace: "openstack" spec: tags: - management # available tags are 'management', 'policymaker', 'monitoring' and 'administrator' @@ -21,8 +25,12 @@ kind: Vhost metadata: name: ironic-vhost namespace: openstack + labels: + app.kubernetes.io/managed-by: "Helm" annotations: helm.sh/resource-policy: keep + meta.helm.sh/release-name: "ironic" + meta.helm.sh/release-namespace: "openstack" spec: name: "ironic" # vhost name; required and cannot be updated defaultQueueType: quorum # default queue type for this vhost; require RabbitMQ version 3.11.12 or above @@ -42,8 +50,12 @@ kind: Queue metadata: name: ironic-queue namespace: openstack + labels: + app.kubernetes.io/managed-by: "Helm" annotations: helm.sh/resource-policy: keep + meta.helm.sh/release-name: "ironic" + meta.helm.sh/release-namespace: "openstack" spec: name: ironic-qq # name of the queue vhost: "ironic" # default to '/' if not provided @@ -59,8 +71,12 @@ kind: Permission metadata: name: ironic-permission namespace: openstack + labels: + app.kubernetes.io/managed-by: "Helm" annotations: helm.sh/resource-policy: keep + meta.helm.sh/release-name: "ironic" + meta.helm.sh/release-namespace: "openstack" spec: vhost: "ironic" # name of a vhost userReference: diff --git a/base-kustomize/keystone/base/keystone-rabbitmq-queue.yaml b/base-kustomize/keystone/base/keystone-rabbitmq-queue.yaml index 9d129a725..50a350e17 100644 --- a/base-kustomize/keystone/base/keystone-rabbitmq-queue.yaml +++ b/base-kustomize/keystone/base/keystone-rabbitmq-queue.yaml @@ -2,40 +2,41 @@ apiVersion: rabbitmq.com/v1beta1 kind: User metadata: - name: keystone - namespace: openstack - annotations: - helm.sh/resource-policy: keep - app.kubernetes.io/managed-by: "Helm" - meta.helm.sh/release-name: "keystone" - meta.helm.sh/release-namespace: "openstack" + name: keystone + namespace: openstack + labels: + app.kubernetes.io/managed-by: "Helm" + annotations: + helm.sh/resource-policy: keep + meta.helm.sh/release-name: "keystone" + meta.helm.sh/release-namespace: "openstack" spec: - tags: - - management - - policymaker - rabbitmqClusterReference: - name: rabbitmq # rabbitmqCluster must exist in the same namespace as this resource - namespace: openstack - importCredentialsSecret: - name: keystone-rabbitmq-password + tags: + - management + - policymaker + rabbitmqClusterReference: + name: rabbitmq # rabbitmqCluster must exist in the same namespace as this resource + namespace: openstack + importCredentialsSecret: + name: keystone-rabbitmq-password --- apiVersion: rabbitmq.com/v1beta1 kind: Vhost metadata: - name: keystone-vhost - namespace: openstack - labels: - app.kubernetes.io/managed-by: "Helm" - annotations: - helm.sh/resource-policy: keep - meta.helm.sh/release-name: "keystone" - meta.helm.sh/release-namespace: "openstack" + name: keystone-vhost + namespace: openstack + labels: + app.kubernetes.io/managed-by: "Helm" + annotations: + helm.sh/resource-policy: keep + meta.helm.sh/release-name: "keystone" + meta.helm.sh/release-namespace: "openstack" spec: - name: "keystone" # vhost name; required and cannot be updated - defaultQueueType: quorum # default queue type for this vhost; require RabbitMQ version 3.11.12 or above - rabbitmqClusterReference: - name: rabbitmq # rabbitmqCluster must exist in the same namespace as this resource - namespace: openstack + name: "keystone" # vhost name; required and cannot be updated + defaultQueueType: quorum # default queue type for this vhost; require RabbitMQ version 3.11.12 or above + rabbitmqClusterReference: + name: rabbitmq # rabbitmqCluster must exist in the same namespace as this resource + namespace: openstack # status: # conditions: # - lastTransitionTime: "" @@ -47,50 +48,50 @@ spec: apiVersion: rabbitmq.com/v1beta1 kind: Queue metadata: - name: keystone-queue - namespace: openstack - labels: - app.kubernetes.io/managed-by: "Helm" - annotations: - helm.sh/resource-policy: keep - meta.helm.sh/release-name: "keystone" - meta.helm.sh/release-namespace: "openstack" + name: keystone-queue + namespace: openstack + labels: + app.kubernetes.io/managed-by: "Helm" + annotations: + helm.sh/resource-policy: keep + meta.helm.sh/release-name: "keystone" + meta.helm.sh/release-namespace: "openstack" spec: - name: keystone-qq # name of the queue - vhost: "keystone" # default to '/' if not provided - type: quorum # without providing a queue type, rabbitmq creates a classic queue - autoDelete: false - durable: true # setting 'durable' to false means this queue won't survive a server restart - rabbitmqClusterReference: - name: rabbitmq # rabbitmqCluster must exist in the same namespace as this resource - namespace: openstack + name: keystone-qq # name of the queue + vhost: "keystone" # default to '/' if not provided + type: quorum # without providing a queue type, rabbitmq creates a classic queue + autoDelete: false + durable: true # setting 'durable' to false means this queue won't survive a server restart + rabbitmqClusterReference: + name: rabbitmq # rabbitmqCluster must exist in the same namespace as this resource + namespace: openstack --- apiVersion: rabbitmq.com/v1beta1 kind: Permission metadata: - name: keystone-permission - namespace: openstack - labels: - app.kubernetes.io/managed-by: "Helm" - annotations: - helm.sh/resource-policy: keep - meta.helm.sh/release-name: "keystone" - meta.helm.sh/release-namespace: "openstack" + name: keystone-permission + namespace: openstack + labels: + app.kubernetes.io/managed-by: "Helm" + annotations: + helm.sh/resource-policy: keep + meta.helm.sh/release-name: "keystone" + meta.helm.sh/release-namespace: "openstack" spec: - vhost: "keystone" # name of a vhost - userReference: - name: "keystone" # name of a user.rabbitmq.com in the same namespace; must specify either spec.userReference or spec.user - permissions: - write: ".*" - configure: ".*" - read: ".*" - rabbitmqClusterReference: - name: rabbitmq # rabbitmqCluster must exist in the same namespace as this resource - namespace: openstack + vhost: "keystone" # name of a vhost + userReference: + name: "keystone" # name of a user.rabbitmq.com in the same namespace; must specify either spec.userReference or spec.user + permissions: + write: ".*" + configure: ".*" + read: ".*" + rabbitmqClusterReference: + name: rabbitmq # rabbitmqCluster must exist in the same namespace as this resource + namespace: openstack # status: # conditions: # - lastTransitionTime: "" # status: "True" # true, false, or unknown # type: Ready # Reason: "SuccessfulCreateOrUpdate" # status false result in reason FailedCreateOrUpdate -# Message: "" # set when status is false \ No newline at end of file +# Message: "" # set when status is false diff --git a/base-kustomize/keystone/federation/kustomization.yaml b/base-kustomize/keystone/federation/kustomization.yaml index 34a394d6f..630078f64 100644 --- a/base-kustomize/keystone/federation/kustomization.yaml +++ b/base-kustomize/keystone/federation/kustomization.yaml @@ -6,8 +6,8 @@ resources: images: - name: keystone-shib - newName: ghcr.io/rackerlabs/keystone-rxt/shibd - newTag: "1747958286" + newName: ghcr.io/rackerlabs/genestack-images/shibd + newTag: "latest" patches: - target: diff --git a/base-kustomize/masakari/base/masakari-rabbitmq-queue.yaml b/base-kustomize/masakari/base/masakari-rabbitmq-queue.yaml index a770034ef..3a5bdbfd0 100644 --- a/base-kustomize/masakari/base/masakari-rabbitmq-queue.yaml +++ b/base-kustomize/masakari/base/masakari-rabbitmq-queue.yaml @@ -4,8 +4,12 @@ kind: User metadata: name: masakari namespace: openstack + labels: + app.kubernetes.io/managed-by: "Helm" annotations: helm.sh/resource-policy: keep + meta.helm.sh/release-name: "masakari" + meta.helm.sh/release-namespace: "openstack" spec: tags: - management # available tags are 'management', 'policymaker', 'monitoring' and 'administrator' @@ -21,8 +25,12 @@ kind: Vhost metadata: name: masakari-vhost namespace: openstack + labels: + app.kubernetes.io/managed-by: "Helm" annotations: helm.sh/resource-policy: keep + meta.helm.sh/release-name: "masakari" + meta.helm.sh/release-namespace: "openstack" spec: name: "masakari" # vhost name; required and cannot be updated defaultQueueType: quorum # default queue type for this vhost; require RabbitMQ version 3.11.12 or above @@ -35,8 +43,12 @@ kind: Queue metadata: name: masakari-queue namespace: openstack + labels: + app.kubernetes.io/managed-by: "Helm" annotations: helm.sh/resource-policy: keep + meta.helm.sh/release-name: "masakari" + meta.helm.sh/release-namespace: "openstack" spec: name: masakari-qq # name of the queue vhost: "masakari" # default to '/' if not provided @@ -52,8 +64,12 @@ kind: Permission metadata: name: masakari-permission namespace: openstack + labels: + app.kubernetes.io/managed-by: "Helm" annotations: helm.sh/resource-policy: keep + meta.helm.sh/release-name: "masakari" + meta.helm.sh/release-namespace: "openstack" spec: vhost: "masakari" # name of a vhost userReference: diff --git a/base-kustomize/neutron/base/hpa-neutron-rpc-server.yaml b/base-kustomize/neutron/base/hpa-neutron-rpc-server.yaml deleted file mode 100644 index 0ac8c8d1e..000000000 --- a/base-kustomize/neutron/base/hpa-neutron-rpc-server.yaml +++ /dev/null @@ -1,26 +0,0 @@ ---- -apiVersion: autoscaling/v2 -kind: HorizontalPodAutoscaler -metadata: - name: neutron-rpc-server - namespace: openstack -spec: - maxReplicas: 9 - minReplicas: 2 - metrics: - - resource: - name: cpu - target: - averageUtilization: 80 - type: Utilization - type: Resource - - resource: - name: memory - target: - averageUtilization: 80 - type: Utilization - type: Resource - scaleTargetRef: - apiVersion: apps/v1 - kind: Deployment - name: neutron-rpc-server diff --git a/base-kustomize/neutron/base/kustomization.yaml b/base-kustomize/neutron/base/kustomization.yaml index 677186a8f..98617a65a 100644 --- a/base-kustomize/neutron/base/kustomization.yaml +++ b/base-kustomize/neutron/base/kustomization.yaml @@ -5,7 +5,6 @@ resources: - neutron-rabbitmq-queue.yaml - all.yaml - hpa-neutron-server.yaml - - hpa-neutron-rpc-server.yaml - policies.yaml patches: diff --git a/base-kustomize/nova/base/kustomization.yaml b/base-kustomize/nova/base/kustomization.yaml index 570c66098..9766f8f2c 100644 --- a/base-kustomize/nova/base/kustomization.yaml +++ b/base-kustomize/nova/base/kustomization.yaml @@ -11,3 +11,4 @@ resources: - hpa-nova-novncproxy.yaml - hpa-nova-scheduler.yaml - policies.yaml + - static-vendordata-configmap.yaml diff --git a/base-kustomize/nova/base/static-vendordata-configmap.yaml b/base-kustomize/nova/base/static-vendordata-configmap.yaml new file mode 100644 index 000000000..291f523ea --- /dev/null +++ b/base-kustomize/nova/base/static-vendordata-configmap.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: static-vendor-data + namespace: openstack +data: + vendor_data.json: '{}' diff --git a/base-kustomize/octavia/base/kustomization.yaml b/base-kustomize/octavia/base/kustomization.yaml index e6c15ac33..3377cd48b 100644 --- a/base-kustomize/octavia/base/kustomization.yaml +++ b/base-kustomize/octavia/base/kustomization.yaml @@ -4,11 +4,11 @@ sortOptions: images: - name: image-kubernetes-entrypoint-init - newName: quay.io/airshipit/kubernetes-entrypoint - newTag: v1.0.0 + newName: ghcr.io/rackerlabs/genestack-images/kubernetes-entrypoint + newTag: latest - name: image-octavia-ovn - newName: quay.io/rackspace/rackerlabs-octavia-ovn - newTag: 2024.1-ubuntu_jammy-1737651745 + newName: ghcr.io/rackerlabs/genestack-images/octavia + newTag: 2024.1-latest resources: - octavia-mariadb-database.yaml @@ -20,6 +20,24 @@ resources: # To run the OVN driver, the octavia-api container must have an agent container within the same pod. patches: + - target: + kind: Secret + name: octavia-etc + patch: |- + - op: add + path: /data/policy.yaml + value: b3NfbG9hZC1iYWxhbmNlcl9hcGk6Zmxhdm9yLXByb2ZpbGU6Z2V0X29uZTogcnVsZTpsb2FkLWJhbGFuY2VyOnJlYWQKb3NfbG9hZC1iYWxhbmNlcl9hcGk6Zmxhdm9yLXByb2ZpbGU6Z2V0X2FsbDogcnVsZTpsb2FkLWJhbGFuY2VyOnJlYWQ= + - target: + kind: Deployment + name: octavia-api + patch: |- + - op: add + path: /spec/template/spec/containers/0/volumeMounts/- + value: + name: octavia-etc + mountPath: /etc/octavia/policy.yaml + subPath: policy.yaml + readOnly: true - target: kind: Deployment name: octavia-api diff --git a/base-kustomize/ovn/base/ovn-setup.yaml b/base-kustomize/ovn/base/ovn-setup.yaml index bbb539b9e..94cf706e2 100644 --- a/base-kustomize/ovn/base/ovn-setup.yaml +++ b/base-kustomize/ovn/base/ovn-setup.yaml @@ -140,7 +140,7 @@ spec: emptyDir: {} initContainers: - name: init - image: "quay.io/airshipit/kubernetes-entrypoint:latest-ubuntu_jammy" + image: "ghcr.io/rackerlabs/genestack-images/kubernetes-entrypoint:latest" imagePullPolicy: IfNotPresent securityContext: allowPrivilegeEscalation: true @@ -213,7 +213,7 @@ spec: - 'echo "$$SCRIPT" > /tmp/script && ash /tmp/script' containers: - name: ovn-setup-exec - image: "docker.io/openstackhelm/ovn:ubuntu_jammy" + image: "ghcr.io/rackerlabs/genestack-images/ovs:v3.5.1-latest" imagePullPolicy: IfNotPresent command: - bash diff --git a/base-kustomize/skyline/base/deployment-apiserver.yaml b/base-kustomize/skyline/base/deployment-apiserver.yaml index 8145908b5..a438485ca 100644 --- a/base-kustomize/skyline/base/deployment-apiserver.yaml +++ b/base-kustomize/skyline/base/deployment-apiserver.yaml @@ -64,7 +64,7 @@ spec: defaultMode: 0555 initContainers: - name: init - image: "quay.io/airshipit/kubernetes-entrypoint:latest-ubuntu_jammy" + image: "ghcr.io/rackerlabs/genestack-images/kubernetes-entrypoint:latest" imagePullPolicy: IfNotPresent securityContext: allowPrivilegeEscalation: true @@ -101,7 +101,7 @@ spec: - kubernetes-entrypoint volumeMounts: [] - name: skyline-apiserver-service-init - image: "docker.io/openstackhelm/heat:2023.1-ubuntu_jammy" + image: "ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest" imagePullPolicy: IfNotPresent resources: limits: @@ -335,11 +335,11 @@ spec: key: prometheus_endpoint optional: true - name: skyline-apiserver-db-migrate - image: "quay.io/rackspace/rackerlabs-skyline-rxt:master-ubuntu_jammy-1748595671" + image: "ghcr.io/rackerlabs/genestack-images/skyline:2024.2-latest" imagePullPolicy: IfNotPresent resources: requests: - memory: "64Mi" + memory: "256Mi" cpu: "100m" limits: memory: "4096Mi" @@ -358,14 +358,14 @@ spec: readOnly: true containers: - name: skyline-apiserver - image: "quay.io/rackspace/rackerlabs-skyline-rxt:master-ubuntu_jammy-1748595671" + image: "ghcr.io/rackerlabs/genestack-images/skyline:2024.2-latest" imagePullPolicy: IfNotPresent resources: - limits: - memory: "1Gi" requests: - cpu: "0.25" - memory: "64Mi" + memory: "256Mi" + cpu: "100m" + limits: + memory: "4096Mi" command: - bash - -c diff --git a/bin/create-secrets.sh b/bin/create-secrets.sh index 01d2fbc30..eda025b04 100755 --- a/bin/create-secrets.sh +++ b/bin/create-secrets.sh @@ -2,7 +2,7 @@ # shellcheck disable=SC2086 usage() { - echo "Usage: $0 [--region default: RegionOne]" + echo "Usage: $0 [--region " exit 1 } @@ -66,7 +66,7 @@ designate_admin_password=$(generate_password 32) neutron_rabbitmq_password=$(generate_password 64) neutron_db_password=$(generate_password 32) neutron_admin_password=$(generate_password 32) -horizon_secret_key_password=$(generate_password 64) +horizon_secret_key=$(generate_password 64) horizon_db_password=$(generate_password 32) skyline_service_password=$(generate_password 32) skyline_db_password=$(generate_password 32) @@ -102,6 +102,14 @@ ironic_rabbitmq_password=$(generate_password 32) OUTPUT_FILE="/etc/genestack/kubesecrets.yaml" +if [[ -f ${OUTPUT_FILE} ]]; then + echo "Error: ${OUTPUT_FILE} already exists. Please remove it before running this script." + echo " This will replace an existing file and will lead to mass rotation, which is" + echo " likely not what you want to do. If you really want to break your system, please" + echo " make sure you know what you're doing." + exit 99 +fi + cat < $OUTPUT_FILE --- apiVersion: v1 @@ -390,8 +398,7 @@ metadata: namespace: openstack type: Opaque data: - username: $(echo -n "horizon" | base64) - password: $(echo -n $horizon_secret_key_password | base64 -w0) + horizon_secret_key: $(echo -n $horizon_secret_key | base64 -w0) --- apiVersion: v1 kind: Secret diff --git a/bin/install-envoy-gateway.sh b/bin/install-envoy-gateway.sh index f3102550a..ead616d5a 100755 --- a/bin/install-envoy-gateway.sh +++ b/bin/install-envoy-gateway.sh @@ -4,7 +4,7 @@ GLOBAL_OVERRIDES_DIR="/etc/genestack/helm-configs/global_overrides" SERVICE_CONFIG_DIR="/etc/genestack/helm-configs/envoyproxy-gateway" BASE_OVERRIDES="/opt/genestack/base-helm-configs/envoyproxy-gateway/envoy-gateway-helm-overrides.yaml" -ENVOY_VERSION="v1.3.0" +ENVOY_VERSION="v1.4.2" HELM_CMD="helm upgrade --install envoyproxy-gateway oci://docker.io/envoyproxy/gateway-helm \ --version ${ENVOY_VERSION} \ --namespace envoyproxy-gateway-system \ diff --git a/bin/install-fluentbit.sh b/bin/install-fluentbit.sh index bbb2c759d..8bf442464 100755 --- a/bin/install-fluentbit.sh +++ b/bin/install-fluentbit.sh @@ -3,8 +3,12 @@ GLOBAL_OVERRIDES_DIR="/etc/genestack/helm-configs/global_overrides" SERVICE_CONFIG_DIR="/etc/genestack/helm-configs/fluentbit" +FLUENTBIT_CHART_VERSION="0.52.0" -HELM_CMD="helm upgrade --install --namespace fluentbit --create-namespace fluentbit fluent/fluent-bit" +HELM_CMD="helm upgrade --install \ + --version $FLUENTBIT_CHART_VERSION \ + --namespace fluentbit \ + --create-namespace fluentbit fluent/fluent-bit" HELM_CMD+=" -f /opt/genestack/base-helm-configs/fluentbit/fluentbit-helm-overrides.yaml" diff --git a/bin/install-horizon.sh b/bin/install-horizon.sh index be49ad4e4..2cd9968da 100755 --- a/bin/install-horizon.sh +++ b/bin/install-horizon.sh @@ -23,7 +23,7 @@ done HELM_CMD+=" --set endpoints.identity.auth.admin.password=\"\$(kubectl --namespace openstack get secret keystone-admin -o jsonpath='{.data.password}' | base64 -d)\"" HELM_CMD+=" --set endpoints.oslo_cache.auth.memcache_secret_key=\"\$(kubectl --namespace openstack get secret os-memcached -o jsonpath='{.data.memcache_secret_key}' | base64 -d)\"" -HELM_CMD+=" --set conf.horizon.local_settings.config.horizon_secret_key=\"\$(kubectl --namespace openstack get secret horizon-secret-key -o jsonpath='{.data.root-password}' | base64 -d)\"" +HELM_CMD+=" --set conf.horizon.local_settings.config.horizon_secret_key=\"\$(kubectl --namespace openstack get secret horizon-secret-key -o jsonpath='{.data.horizon_secret_key}' | base64 -d)\"" HELM_CMD+=" --set endpoints.oslo_db.auth.admin.password=\"\$(kubectl --namespace openstack get secret mariadb -o jsonpath='{.data.root-password}' | base64 -d)\"" HELM_CMD+=" --set endpoints.oslo_db.auth.horizon.password=\"\$(kubectl --namespace openstack get secret horizon-db-password -o jsonpath='{.data.password}' | base64 -d)\"" diff --git a/bin/install-kube-ovn.sh b/bin/install-kube-ovn.sh index 7590ef284..3a29573fd 100755 --- a/bin/install-kube-ovn.sh +++ b/bin/install-kube-ovn.sh @@ -4,7 +4,7 @@ GLOBAL_OVERRIDES_DIR="/etc/genestack/helm-configs/global_overrides" SERVICE_CONFIG_DIR="/etc/genestack/helm-configs/kube-ovn" BASE_OVERRIDES="/opt/genestack/base-helm-configs/kube-ovn/kube-ovn-helm-overrides.yaml" -KUBE_OVN_VERSION="v1.13.13" +KUBE_OVN_VERSION="v1.13.14" MASTER_NODES=$(kubectl get nodes -l kube-ovn/role=master -o json | jq -r '[.items[].status.addresses[] | select(.type == "InternalIP") | .address] | join(",")' | sed 's/,/\\,/g') MASTER_NODE_COUNT=$(kubectl get nodes -l kube-ovn/role=master -o json | jq -r '.items[].status.addresses[] | select(.type=="InternalIP") | .address' | wc -l) diff --git a/bin/install-redis-operator.sh b/bin/install-redis-operator.sh new file mode 100644 index 000000000..598f3b5b9 --- /dev/null +++ b/bin/install-redis-operator.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# shellcheck disable=SC2124,SC2145,SC2294 + +export VERSION="${VERSION:-0.21.0}" + +# Default parameter value +export CLUSTER_NAME=${CLUSTER_NAME:-cluster.local} + +# Directory to check for YAML files +CONFIG_DIR="/etc/genestack/helm-configs/redis-operator" + +# 'cluster.local' is the default value in base helm values file +if [ "${CLUSTER_NAME}" != "cluster.local" ]; then + CONFIG_FILE="$CONFIG_DIR/redis-operator-helm-overrides.yaml" + + mkdir -p "$CONFIG_DIR" + touch "$CONFIG_FILE" + + # Check if the file is empty and add/modify content accordingly + if [ ! -s "$CONFIG_FILE" ]; then + echo "clusterName: $CLUSTER_NAME" > "$CONFIG_FILE" + else + # If the clusterName line exists, modify it, otherwise add it at the end + if grep -q "^clusterName:" "$CONFIG_FILE"; then + sed -i -e "s/^clusterName: .*/clusterName: ${CLUSTER_NAME}/" "$CONFIG_FILE" + else + echo "clusterName: $CLUSTER_NAME" >> "$CONFIG_FILE" + fi + fi +fi + +# Add the redis-operator helm repository +helm repo add ot-helm https://ot-container-kit.github.io/helm-charts/ +helm repo update + +# Install the CRDs that match the version defined +helm upgrade --install --namespace=redis-systems --create-namespace redis-operator ot-helm/redis-operator --version "${VERSION}" + +# Helm command setup for Redis operator and cluster +HELM_CMD="helm upgrade --install redis-cluster ot-helm/redis-cluster \ + --namespace=redis-systems \ + --timeout 120m \ + -f /opt/genestack/base-helm-configs/redis-operator/redis-operator-helm-overrides.yaml" + +# Check if YAML files exist in the specified directory +if compgen -G "${CONFIG_DIR}/*.yaml" > /dev/null; then + # Add all YAML files from the directory to the helm command + for yaml_file in "${CONFIG_DIR}"/*.yaml; do + HELM_CMD+=" -f ${yaml_file}" + done +fi + +HELM_CMD+=" $@" + +# Run the helm command +echo "Executing Helm command:" +echo "${HELM_CMD}" +eval "${HELM_CMD}" diff --git a/bin/setup-openstack.sh b/bin/setup-openstack.sh index 60fe22c00..c8d14dc2a 100755 --- a/bin/setup-openstack.sh +++ b/bin/setup-openstack.sh @@ -1,8 +1,6 @@ #!/usr/bin/env bash set -e -export GENESTACK_INSTALL_TELEMETRY=${GENESTACK_INSTALL_TELEMETRY:-false} - # Track the PIDs of the services deploying in parallel pids=() @@ -11,7 +9,7 @@ function runTrackErator() { pids+=($!) } -function waitErator () { +function waitErator() { for pid in ${pids[*]}; do if ! timeout --preserve-status --verbose 30m tail --pid=${pid} -f /dev/null; then echo "==== PROCESS TIMEOUT =====================================" @@ -23,28 +21,65 @@ function waitErator () { done } +# Function to prompt user for component installation +prompt_component() { + local component=$1 + local prompt=$2 + read -p "Install ${prompt}? (y/n): " answer + if [[ "$answer" =~ ^[Yy]$ ]]; then + echo " ${component}: true" >> /etc/genestack/openstack-components.yaml + else + echo " ${component}: false" >> /etc/genestack/openstack-components.yaml + fi +} + +# Function to check if a component is set to true in the YAML file +is_component_enabled() { + local component=$1 + grep -qi "^[[:space:]]*${component}:[[:space:]]*true" "$CONFIG_FILE" +} + +# Check for YAML file and create if it doesn't exist +CONFIG_FILE="/etc/genestack/openstack-components.yaml" +if [ ! -f "$CONFIG_FILE" ]; then + echo "Configuration file $CONFIG_FILE not found. Creating it..." + cat > "$CONFIG_FILE" << EOF +components: + keystone: true +EOF + prompt_component "glance" "Glance (Image Service)" + prompt_component "heat" "Heat (Orchestration)" + prompt_component "barbican" "Barbican (Key Manager)" + prompt_component "cinder" "Cinder (Block Storage)" + prompt_component "placement" "Placement" + prompt_component "nova" "Nova (Compute)" + prompt_component "neutron" "Neutron (Networking)" + prompt_component "magnum" "Magnum (Container Orchestration)" + prompt_component "octavia" "Octavia (Load Balancer)" + prompt_component "masakari" "Masakari (Instance High Availability)" + prompt_component "ceilometer" "Ceilometer (Telemetry)" + prompt_component "gnocchi" "Gnocchi (Time Series Database)" + prompt_component "skyline" "Skyline (Dashboard)" +fi + # Block on Keystone /opt/genestack/bin/install-keystone.sh -# Run the rest of the services in parallel -runTrackErator /opt/genestack/bin/install-glance.sh -runTrackErator /opt/genestack/bin/install-heat.sh -runTrackErator /opt/genestack/bin/install-barbican.sh -runTrackErator /opt/genestack/bin/install-cinder.sh -runTrackErator /opt/genestack/bin/install-placement.sh -runTrackErator /opt/genestack/bin/install-nova.sh -runTrackErator /opt/genestack/bin/install-neutron.sh -runTrackErator /opt/genestack/bin/install-magnum.sh -runTrackErator /opt/genestack/bin/install-octavia.sh -runTrackErator /opt/genestack/bin/install-masakari.sh - -# Install telemetry services -if [ "${GENESTACK_INSTALL_TELEMETRY}" = true ]; then - runTrackErator /opt/genestack/bin/install-ceilometer.sh - runTrackErator /opt/genestack/bin/install-gnocchi.sh -fi +# Run selected services in parallel +is_component_enabled "glance" && runTrackErator /opt/genestack/bin/install-glance.sh +is_component_enabled "heat" && runTrackErator /opt/genestack/bin/install-heat.sh +is_component_enabled "barbican" && runTrackErator /opt/genestack/bin/install-barbican.sh +is_component_enabled "cinder" && runTrackErator /opt/genestack/bin/install-cinder.sh +is_component_enabled "placement" && runTrackErator /opt/genestack/bin/install-placement.sh +is_component_enabled "nova" && runTrackErator /opt/genestack/bin/install-nova.sh +is_component_enabled "neutron" && runTrackErator /opt/genestack/bin/install-neutron.sh +is_component_enabled "magnum" && runTrackErator /opt/genestack/bin/install-magnum.sh +is_component_enabled "octavia" && runTrackErator /opt/genestack/bin/install-octavia.sh +is_component_enabled "masakari" && runTrackErator /opt/genestack/bin/install-masakari.sh +is_component_enabled "ceilometer" && runTrackErator /opt/genestack/bin/install-ceilometer.sh +is_component_enabled "gnocchi" && runTrackErator /opt/genestack/bin/install-gnocchi.sh waitErator # Install skyline after all services are up -/opt/genestack/bin/install-skyline.sh +is_component_enabled "skyline" && /opt/genestack/bin/install-skyline.sh diff --git a/bootstrap.sh b/bootstrap.sh index 891f6f03c..42d48e034 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -21,27 +21,42 @@ cd "${BASEDIR}" || error "Could not change to ${BASEDIR}" source scripts/lib/functions.sh -set -e - success "Environment variables:" env | grep -E '^(SUDO|RPC_|ANSIBLE_|GENESTACK_|K8S|CONTAINER_|OPENSTACK_|OSH_)' | sort -u -success "Installing base packages (git):" -apt update +# Explictily do not exit script on non-zero returns +set +e + +# Wait until cloud-init is finished before proceeding +echo "Waiting for cloud-init to finish..." +wait_for_cloud_init -DEBIAN_FRONTEND=noninteractive \ - apt-get -o "Dpkg::Options::=--force-confdef" \ - -o "Dpkg::Options::=--force-confold" \ - -qy install git python3-pip python3-venv python3-dev jq build-essential > ~/genestack-base-package-install.log 2>&1 +if [[ $? -eq 0 ]]; then + echo "Cloud-init completed successfully!" +elif [[ $? -eq 1 ]]; then + echo "Cloud-init crashed or experienced a serious issue." +elif [[ $? -eq 2 ]]; then + echo "Cloud-init completed with errors." +else + echo "Cloud-init command not found." +fi + +# NOTE: (brew) This function will determine wether DNF or APT should be used +# to install packages and will install them. +# Package: scripts/lib/funcitons.sh ['apt_packages', 'dnf_packages'] +wait_and_install_packages if [ $? -gt 1 ]; then error "Check for ansible errors at ~/genestack-base-package-install.log" else - success "Local base OS packages installed" + success "Local base OS packages installed." fi +# Set script to exit on any non-zero error code +set -e + # Install project dependencies -success "Installing genestack dependencies" +success "Configuring genestack directory and overrides directory structure:" test -L "$GENESTACK_CONFIG" 2>&1 || mkdir -p "${GENESTACK_CONFIG}" # Set config @@ -60,14 +75,14 @@ test -d "$GENESTACK_CONFIG/gateway-api" || cp -a "${BASEDIR}/etc/gateway-api" "$ # Create venv and prepare Ansible python3 -m venv "${HOME}/.venvs/genestack" "${HOME}/.venvs/genestack/bin/pip" install pip --upgrade -source "${HOME}/.venvs/genestack/bin/activate" && success "Switched to venv ~/.venvs/genestack" -pip install -r "${BASEDIR}/requirements.txt" && success "Installed ansible package" +source "${HOME}/.venvs/genestack/bin/activate" && success "Switched to venv ~/.venvs/genestack." +pip install -r "${BASEDIR}/requirements.txt" && success "Installed ansible package." ansible-playbook "${BASEDIR}/scripts/get-ansible-collection-requirements.yml" \ -e collections_file="${ANSIBLE_COLLECTION_FILE}" \ -e user_collections_file="${USER_COLLECTION_FILE}" source "${BASEDIR}/scripts/genestack.rc" -success "Environment sourced per ${BASEDIR}/scripts/genestack.rc" +success "Environment sourced per ${BASEDIR}/scripts/genestack.rc." message "OpenStack Release: ${OPENSTACK_RELEASE}" message "Target OS Distro: ${CONTAINER_DISTRO_NAME}:${CONTAINER_DISTRO_VERSION}" @@ -90,24 +105,24 @@ for service in "$base_source_dir"/*; do # If no subdirectories, symlink the service directly under the target dir if [ ! -L "$base_target_dir/$service_name" ]; then ln -s "$service" "$base_target_dir/$service_name" - success "Created symlink for $service_name directly under $base_target_dir" + success "Created symlink for $service_name directly under $base_target_dir." else - message "Symlink for $service_name already exists directly under $base_target_dir" + message "Symlink for $service_name already exists directly under $base_target_dir." fi else if [ -d "$base_target_dir/$service_name" ]; then - message "$base_target_dir/$service_name already exists" + message "$base_target_dir/$service_name already exists." else - message "Creating $base_target_dir/$service_name" + message "Creating $base_target_dir/$service_name." mkdir -p "$base_target_dir/$service_name" fi for item in "$service"/*; do item_name=$(basename "$item") if [ ! -L "$base_target_dir/$service_name/$item_name" ]; then ln -s "$item" "$base_target_dir/$service_name/$item_name" - success "Created symlink for $service_name/$item_name" + success "Created symlink for $service_name/$item_name." else - message "Symlink for $service_name/$item_name already exists" + message "Symlink for $service_name/$item_name already exists." fi done fi @@ -137,12 +152,12 @@ for service in "$overlay_target_dir"/*; do if [ ! -d "$overlay_path" ]; then mkdir -p "$overlay_path" - success "Creating overlay path $overlay_path" + success "Creating overlay path $overlay_path." fi if [ ! -f "$overlay_path/kustomization.yaml" ]; then echo "$kustomization_content" > "$overlay_path/kustomization.yaml" - success "Created overlay and kustomization.yaml for $(basename "$service")" + success "Created overlay and kustomization.yaml for $(basename "$service")." else message "kustomization.yaml already exists for $(basename "$service"), skipping..." fi @@ -155,7 +170,7 @@ done if [ ! -d "/etc/genestack/helm-configs" ]; then mkdir -p /etc/genestack/helm-configs - success "Created /etc/genestack/helm-configs" + success "Created /etc/genestack/helm-configs." else message "/etc/genestack/helm-configs already exists, skipping creation." fi @@ -166,7 +181,7 @@ for src_dir in /opt/genestack/base-helm-configs/*; do dest_dir="/etc/genestack/helm-configs/$dir_name" if [ ! -d "$dest_dir" ]; then mkdir -p "$dest_dir" - success "Created $dest_dir" + success "Created $dest_dir." else message "$dest_dir already exists, skipping creation." fi @@ -175,7 +190,7 @@ done if [ ! -d "/etc/genestack/helm-configs/global_overrides" ]; then mkdir -p /etc/genestack/helm-configs/global_overrides - echo "Created /etc/genestack/helm-configs/global_overrides" + echo "Created /etc/genestack/helm-configs/global_overrides." else echo "/etc/genestack/helm-configs/global_overrides already exists, skipping creation." fi @@ -183,9 +198,12 @@ fi # Copy manifests if it does not already exist if [ ! -d "/etc/genestack/manifests" ]; then cp -r /opt/genestack/manifests /etc/genestack/ - success "Copied manifests to /etc/genestack/" + success "Copied manifests to /etc/genestack/." else message "manifests already exists in /etc/genestack, skipping copy." fi +# Copy yaml editor to /usr/local/bin +cp /opt/genestack/yaml-editor/ye /usr/local/bin/ye + echo diff --git a/cve/filter.py b/cve/filter.py deleted file mode 100644 index a9f4f3ffe..000000000 --- a/cve/filter.py +++ /dev/null @@ -1,36 +0,0 @@ -import json - -try: - with open("installed.json") as f: - # Only store package names for comparison, ignore versions - installed = {pkg["name"].lower() for pkg in json.load(f)} -except (json.JSONDecodeError, FileNotFoundError): - installed = set() - -print("Installed packages:") -print("\n".join(sorted(installed)) if installed else "No installed packages found") -print("\n" + "=" * 50 + "\n") - -with open("cve/requirements.txt") as f: - requirements = [ - line.strip() for line in f if line.strip() and not line.startswith("#") - ] - -print("Requirements from requirements.txt:") -print("\n".join(requirements) if requirements else "No requirements found") -print("\n" + "=" * 50 + "\n") - -filtered = [] -for req in requirements: - # Only get package name for comparison - pkg_name = req.split("==")[0].strip().lower() - if pkg_name in installed: - # Add the full original requirement (including version) - filtered.append(req) - -print("Filtered requirements (matching installed packages):") -print("\n".join(filtered) if filtered else "No matching packages found") -print("\n" + "=" * 50 + "\n") - -with open("filtered-requirements.txt", "w") as f: - f.write("\n".join(filtered)) diff --git a/cve/requirements.txt b/cve/requirements.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/docs/2024.1-to-2025.1.md b/docs/2024.1-to-2025.1.md new file mode 100644 index 000000000..45bb880b3 --- /dev/null +++ b/docs/2024.1-to-2025.1.md @@ -0,0 +1,115 @@ +# Upgrading Genestack from OpenStack 2024.1 (Caracal) to 2025.1 (Epoxy) + +This guide outlines the process for upgrading a Genestack deployment from OpenStack 2024.1 (Caracal) to 2025.1 (Epoxy). Genestack leverages OpenStack-Helm charts for deployment and management, making upgrades primarily a matter of updating the underlying charts and configurations via Git and Helm. + +OpenStack 2025.1 (Epoxy) is a Skip Level Upgrade Release Process (SLURP) release, which supports direct upgrades from the previous SLURP release (Caracal), skipping the intermediate 2024.2 (Dalmatian) release if desired. This simplifies the upgrade path for stable environments. + +## Prerequisites + +- A full backup of your current deployment, including databases, configurations. +- Familiarity with the Genestack installation process and operator documentation. +- Minimum downtime window planned, as some services will require restarts. +- Verify that no active jobs or migrations are running that could conflict with the upgrade. + +## Upgrade Steps + +The upgrade process is similar to a fresh installation but focuses on updating charts and applying revisions. Perform these steps on the management node. + +1. **Navigate to the Genestack Directory:** + + ```bash + cd /opt/genestack + ``` + +2. **Update the Git Repository:** + - Fetch the latest changes: + + ```bash + git fetch origin + git tag + git checkout + ``` + - If there is no release tag for Epoxy then you will need to update the images manually in the overrides + + ```bash + *** Example *** + + keystone-helm-overrides.yaml + + images: + + tags: + + bootstrap: ghcr.io/aedan/genestack-images/heat:2025.1-1750442748 + + db_drop: ghcr.io/aedan/genestack-images/heat:2025.1-1750442748 + + db_init: ghcr.io/aedan/genestack-images/heat:2025.1-1750442748 + + keystone_api: ghcr.io/aedan/genestack-images/keystone:2025.1-1750442703 + + keystone_credential_cleanup: ghcr.io/aedan/genestack-images/heat:2025.1-1750442748 + + keystone_credential_rotate: ghcr.io/aedan/genestack-images/keystone:2025.1-1750442703 + + keystone_credential_setup: ghcr.io/aedan/genestack-images/keystone:2025.1-1750442703 + + keystone_db_sync: ghcr.io/aedan/genestack-images/keystone:2025.1-1750442703 + + keystone_domain_manage: ghcr.io/aedan/genestack-images/keystone:2025.1-1750442703 + + keystone_fernet_rotate: ghcr.io/aedan/genestack-images/keystone:2025.1-1750442703 + + keystone_fernet_setup: ghcr.io/aedan/genestack-images/keystone:2025.1-1750442703 + + ks_user: ghcr.io/aedan/genestack-images/heat:2025.1-1750442748 +``` + +4. **Handle Pre-Upgrade Cleanups:** + - Clean up Nova jobs to avoid conflicts: + + ```bash + kubectl --namespace openstack delete jobs $(kubectl --namespace openstack get jobs --no-headers -o custom-columns=":metadata.name" | grep nova) + ``` + +5. **Apply Updated Helm Charts:** + - Re-run the Helm deployments for OpenStack components, following your original installation guide. + + ```bash + /opt/genestack/setup-openstack.sh + ``` + - Or apply in sequence: Keystone, Glance, Nova, Neutron, etc., monitoring each for completion. + + Example for a basic OpenStack chart upgrade: + + ```bash + /opt/genestack/bin/install-keystone.sh + /opt/genestack/bin/install-glance.sh + /opt/genestack/bin/install-nova.sh + ``` + +7. **Post-Upgrade Verification:** + - Check pod status: + + ```bash + kubectl get pods --all-namespaces + ``` + - Run OpenStack health checks: + + ```bash + openstack endpoint list + openstack compute service list + openstack network agent list + openstack volume service list + ``` + - Test key functionalities (e.g., instance provisioning, networking). + - Monitor logs for errors: + + ```bash + kubectl logs -f + ``` +## Additional Resources + +- [Genestack General Upgrade Guide](https://docs.rackspacecloud.com/genestack-upgrade/) +- [OpenStack-Helm Upgrades](https://docs.openstack.org/openstack-helm/latest/devref/upgrades.html) +- [OpenStack Epoxy Announcement](https://www.prnewswire.com/news-releases/openinfra-foundation-openstack-epoxy-arrives-strengthening-position-as-vmware-alternative-support-for-ai-as-global-demand-surges-302418295.html) diff --git a/docs/alerting-info.md b/docs/alerting-info.md index c79944dc7..c768a697d 100644 --- a/docs/alerting-info.md +++ b/docs/alerting-info.md @@ -81,11 +81,5 @@ We can now take all this information and build out an alerting workflow that sui ## Genestack alerts -This section contains some information on individual Genestack alert. - -### MariaDB backup alert - -Based on a schedule of 6 hours by default, it allows 1 hour to upload and -alerts when MySQL doesn't successfully complete a backup. - -It alerts at warning level the first time this happens, and at critical level the second time this happens. +Genestack supplies default alerts, some of which are configured as part of the prometheus install and some of them come from the exporters deployments directly and are not controlled by Genestack. +View the list of currently defined alerts supplied by genestack at [Genestack Alerts](genestack-alerts.md). diff --git a/docs/assets/images/keycloak-client-config.png b/docs/assets/images/keycloak-client-config.png new file mode 100644 index 000000000..efab4b7cb Binary files /dev/null and b/docs/assets/images/keycloak-client-config.png differ diff --git a/docs/assets/images/keycloak-group-mapping.png b/docs/assets/images/keycloak-group-mapping.png new file mode 100644 index 000000000..587b9de17 Binary files /dev/null and b/docs/assets/images/keycloak-group-mapping.png differ diff --git a/docs/genestack-alerts.md b/docs/genestack-alerts.md new file mode 100644 index 000000000..03ce9d130 --- /dev/null +++ b/docs/genestack-alerts.md @@ -0,0 +1,428 @@ + +

Genestack Prometheus Alerts

+ +## Blackbox Alerts +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **Service Down** | Service probe has failed for more than two minutes on (instance {{ $labels.instance }}) | Service probe has failed for more than two minutes.
LABELS = {{ $labels }}
| critical | +| **TLS certificate expiring** | SSL certificate will expire soon on (instance {{ $labels.instance }}) | SSL certificate expires within 30 days.
VALUE = {{ $value }}
LABELS = {{ $labels }}
| warning | +| **TLS certificate expiring** | SSL certificate will expire soon on (instance {{ $labels.instance }}) | SSL certificate expires within 15 days.
VALUE = {{ $value }}
LABELS = {{ $labels }}
| critical | +

🔝 Back to Top

+ +--- + +## Compute Resource Alerts +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **AbnormalInstanceFailures** | Instance build failure rate is abnormally high | This indicates a major problem building compute instances.
View logs and take action to resolve the build failures.
| critical | +| **InstancesStuckInFailureState** | Instances stuck in failure state for a prolonged period | There are instances stuck in a building or error state for a prolonged period
that need to be cleaned up.
| warning | +

🔝 Back to Top

+ +--- + +## Image Resource Alerts +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **AbnormalImageFailures** | Image create failure rate is abnormally high | This indicates a major problem creating images.
View logs and take action to resolve the build failures.
| critical | +| **ImagesStuckInFailureState** | Images stuck in failure state for a prolonged period | There are images stuck in a failure state for a prolonged period
that need to be cleaned up.
| warning | +

🔝 Back to Top

+ +--- + +## Linux MDM device and RAID alerts +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **NodeMdInfoFailedDeviceCritical** | NVME device on Linux software RAID failure info | {{ $labels.name }}
Number MD Failed:{{ $labels.FailedDevices }}
LABELS: {{ $labels }} | critical | +| **NodeMdInfoStateCritical** | Linux software MD RAID State is NOT active\|clean | {{ $labels.name }}
State:{{ $labels.State }}
LABELS: {{ $labels }} | critical | +| **NodeMdInfoSuperblockPersistenceCritical** | Linux software MD Superblock is NOT persistent | {{ $labels.name }}
Persistence:{{ $labels.Persistence }}
LABELS: {{ $labels }} | critical | +| **NodeMdStateCritical** | Linux MDM RAID State is {{ $labels.state }} | {{ $labels.name }}
MD RAID status:{{ $value }}
MD RAID device:{{ $labels.device }}
LABELS: {{ $labels }} | critical | +

🔝 Back to Top

+ +--- + +## MariaDB backup alerts +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **mariadbBackupCritical** | Second successive MariaDB backup not successful within 1 hour of scheduled run | Second successive MariaDB backup not successful within 1 hour of scheduled run.
| critical | +| **mariadbBackupWarning** | Last MariaDB backup not successful within 1 hour of scheduled run | Last MariaDB backup not successful within 1 hour of scheduled run.
| warning | +

🔝 Back to Top

+ +--- + +## Multipath path checker alerts +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **NodeDmpathInfoMultipathCritical** | Multipathd paths are NOT active\|ready and paths are likely orphaned | {{ $labels.name }}
labels: {{ $labels }} | critical | +

🔝 Back to Top

+ +--- + +## Mysql Alerts +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **MysqlDown** | MariaDB down (instance {{ $labels.instance }}) | MariaDB instance is down on {{ $labels.instance }}
VALUE = {{ $value }}
LABELS = {{ $labels }}
| critical | +| **MysqlRestarted** | MySQL restarted (instance {{ $labels.instance }}) | MySQL has just been restarted, less than one minute ago on {{ $labels.instance }}.
VALUE = {{ $value }}
LABELS = {{ $labels }}
| info | +| **MysqlSlowQueries** | MySQL slow queries (instance {{ $labels.instance }}) | MySQL server has some new slow queries.
VALUE = {{ $value }}
LABELS = {{ $labels }}
| warning | +| **MysqlTooManyConnections(>80%)** | Database too many connections (> 90%) (instance {{ $labels.instance }}) | More than 90% of MySQL connections are in use on {{ $labels.instance }}
VALUE = {{ $value }}
LABELS = {{ $labels }}
| warning | +

🔝 Back to Top

+ +--- + +## OVN backup alerts +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **ovnBackupDiskUsageCritical** | OVN backup volume >= 90% disk usage | OVN backup volume >= 90% disk usage.
| critical | +| **ovnBackupDiskUsageWarning** | OVN backup volume >= 80% disk usage | OVN backup volume >= 80% disk usage.
| warning | +| **ovnBackupUploadCritical** | Second successive OVN backup not uploaded within 1 hour of scheduled run | Second successive OVN backup not uploaded within 1 hour of scheduled run.
| critical | +| **ovnBackupUploadWarning** | Last OVN backup not uploaded within 1 hour of scheduled run | Last OVN backup not uploaded within 1 hour of scheduled run.
| warning | +

🔝 Back to Top

+ +--- + +## Octavia Resource Alerts +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **LoadbalancersInError** | Loadbalancer stuck in error state for a prolonged period | This may indicate a potential problem with failover and/or health manager services.
This could also indicate other problems building load balancers in general.
| critical | +

🔝 Back to Top

+ +--- + +## Volume Alerts +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **KubernetesVolumeOutOfDiskSpace** | Kubernetes Volume out of disk space (instance {{ $labels.instance }}) | Volume is almost full (< 20% left).
VALUE = {{ $value }}
LABELS = {{ $labels }}
| warning | +

🔝 Back to Top

+ +--- + +## alertmanager.rules +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **AlertmanagerClusterCrashlooping** | Half or more of the Alertmanager instances within the same cluster are crashlooping. | {{ $value \| humanizePercentage }} of Alertmanager instances within the {{$labels.job}} cluster have restarted at least 5 times in the last 10m. | critical | +| **AlertmanagerClusterDown** | Half or more of the Alertmanager instances within the same cluster are down. | {{ $value \| humanizePercentage }} of Alertmanager instances within the {{$labels.job}} cluster have been up for less than half of the last 5m. | critical | +| **AlertmanagerClusterFailedToSendAlerts** | All Alertmanager instances in a cluster failed to send notifications to a critical integration. | The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value \| humanizePercentage }}. | critical | +| **AlertmanagerClusterFailedToSendAlerts** | All Alertmanager instances in a cluster failed to send notifications to a non-critical integration. | The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value \| humanizePercentage }}. | warning | +| **AlertmanagerConfigInconsistent** | Alertmanager instances within the same cluster have different configurations. | Alertmanager instances within the {{$labels.job}} cluster have different configurations. | critical | +| **AlertmanagerFailedReload** | Reloading an Alertmanager configuration has failed. | Configuration has failed to load for {{ $labels.namespace }}/{{ $labels.pod}}. | critical | +| **AlertmanagerFailedToSendAlerts** | An Alertmanager instance failed to send notifications. | Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed to send {{ $value \| humanizePercentage }} of notifications to {{ $labels.integration }}. | warning | +| **AlertmanagerMembersInconsistent** | A member of an Alertmanager cluster has not found all other cluster members. | Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} has only found {{ $value }} members of the {{$labels.job}} cluster. | critical | +

🔝 Back to Top

+ +--- + +## config-reloaders +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **ConfigReloaderSidecarErrors** | config-reloader sidecar has not had a successful reload for 10m | Errors encountered while the {{$labels.pod}} config-reloader sidecar attempts to sync config in {{$labels.namespace}} namespace.
As a result, configuration for service running in {{$labels.pod}} may be stale and cannot be updated anymore. | warning | +

🔝 Back to Top

+ +--- + +## etcd +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **etcdDatabaseHighFragmentationRatio** | etcd database size in use is less than 50% of the actual allocated storage. | etcd cluster "{{ $labels.job }}": database size in use on instance {{ $labels.instance }} is {{ $value \| humanizePercentage }} of the actual allocated disk space, please run defragmentation (e.g. etcdctl defrag) to retrieve the unused fragmented disk space. | warning | +| **etcdDatabaseQuotaLowSpace** | etcd cluster database is running full. | etcd cluster "{{ $labels.job }}": database size exceeds the defined quota on etcd instance {{ $labels.instance }}, please defrag or increase the quota as the writes to etcd will be disabled when it is full. | critical | +| **etcdExcessiveDatabaseGrowth** | etcd cluster database growing very fast. | etcd cluster "{{ $labels.job }}": Predicting running out of disk space in the next four hours, based on write observations within the past four hours on etcd instance {{ $labels.instance }}, please check as it might be disruptive. | warning | +| **etcdGRPCRequestsSlow** | etcd grpc requests are slow | etcd cluster "{{ $labels.job }}": 99th percentile of gRPC requests is {{ $value }}s on etcd instance {{ $labels.instance }} for {{ $labels.grpc_method }} method. | critical | +| **etcdHighCommitDurations** | etcd cluster 99th percentile commit durations are too high. | etcd cluster "{{ $labels.job }}": 99th percentile commit durations {{ $value }}s on etcd instance {{ $labels.instance }}. | warning | +| **etcdHighFsyncDurations** | etcd cluster 99th percentile fsync durations are too high. | etcd cluster "{{ $labels.job }}": 99th percentile fsync durations are {{ $value }}s on etcd instance {{ $labels.instance }}. | warning | +| **etcdHighFsyncDurations** | etcd cluster 99th percentile fsync durations are too high. | etcd cluster "{{ $labels.job }}": 99th percentile fsync durations are {{ $value }}s on etcd instance {{ $labels.instance }}. | critical | +| **etcdHighNumberOfFailedGRPCRequests** | etcd cluster has high number of failed grpc requests. | etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}. | warning | +| **etcdHighNumberOfFailedGRPCRequests** | etcd cluster has high number of failed grpc requests. | etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}. | critical | +| **etcdHighNumberOfFailedProposals** | etcd cluster has high number of proposal failures. | etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures within the last 30 minutes on etcd instance {{ $labels.instance }}. | warning | +| **etcdHighNumberOfLeaderChanges** | etcd cluster has high number of leader changes. | etcd cluster "{{ $labels.job }}": {{ $value }} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated. | warning | +| **etcdInsufficientMembers** | etcd cluster has insufficient number of members. | etcd cluster "{{ $labels.job }}": insufficient members ({{ $value }}). | critical | +| **etcdMemberCommunicationSlow** | etcd cluster member communication is slow. | etcd cluster "{{ $labels.job }}": member communication with {{ $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}. | warning | +| **etcdMembersDown** | etcd cluster members are down. | etcd cluster "{{ $labels.job }}": members are down ({{ $value }}). | warning | +| **etcdNoLeader** | etcd cluster has no leader. | etcd cluster "{{ $labels.job }}": member {{ $labels.instance }} has no leader. | critical | +

🔝 Back to Top

+ +--- + +## fluentbit serviceMonitor alert +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **MissingFluentbitServiceMonitor** | ServiceMonitor 'fluentbit-fluent-bit' is either down or missing. | Check if the Fluentbit ServiceMonitor is properly configured and deployed.
| critical | +

🔝 Back to Top

+ +--- + +## general.rules +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **InfoInhibitor** | Info-level alert inhibition. | This is an alert that is used to inhibit info alerts.
By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with
other alerts.
This alert fires whenever there's a severity="info" alert, and stops firing when another alert with a
severity of 'warning' or 'critical' starts firing on the same namespace.
This alert should be routed to a null receiver and configured to inhibit alerts with severity="info".
| none | +| **TargetDown** | One or more targets are unreachable. | {{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service }} targets in {{ $labels.namespace }} namespace are down. | warning | +| **Watchdog** | An alert that should always be firing to certify that Alertmanager is working properly. | This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing, therefore it should always be firing in Alertmanager
and always fire against a receiver. There are integrations with various notification
mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty.
| none | +

🔝 Back to Top

+ +--- + +## kube-apiserver-slos +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **KubeAPIErrorBudgetBurn** | The API server is burning too much error budget. | The API server is burning too much error budget on cluster {{ $labels.cluster }}. | critical | +| **KubeAPIErrorBudgetBurn** | The API server is burning too much error budget. | The API server is burning too much error budget on cluster {{ $labels.cluster }}. | critical | +| **KubeAPIErrorBudgetBurn** | The API server is burning too much error budget. | The API server is burning too much error budget on cluster {{ $labels.cluster }}. | warning | +| **KubeAPIErrorBudgetBurn** | The API server is burning too much error budget. | The API server is burning too much error budget on cluster {{ $labels.cluster }}. | warning | +

🔝 Back to Top

+ +--- + +## kube-state-metrics +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **KubeStateMetricsListErrors** | kube-state-metrics is experiencing errors in list operations. | kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all. | critical | +| **KubeStateMetricsShardingMismatch** | kube-state-metrics sharding is misconfigured. | kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all. | critical | +| **KubeStateMetricsShardsMissing** | kube-state-metrics shards are missing. | kube-state-metrics shards are missing, some Kubernetes objects are not being exposed. | critical | +| **KubeStateMetricsWatchErrors** | kube-state-metrics is experiencing errors in watch operations. | kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all. | critical | +

🔝 Back to Top

+ +--- + +## kubernetes-apps +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **KubeContainerWaiting** | Pod container waiting longer than 1 hour | pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container {{ $labels.container}} has been in waiting state for longer than 1 hour. (reason: "{{ $labels.reason }}") on cluster {{ $labels.cluster }}. | warning | +| **KubeDaemonSetMisScheduled** | DaemonSet pods are misscheduled. | {{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run on cluster {{ $labels.cluster }}. | warning | +| **KubeDaemonSetNotScheduled** | DaemonSet pods are not scheduled. | {{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled on cluster {{ $labels.cluster }}. | warning | +| **KubeDaemonSetRolloutStuck** | DaemonSet rollout is stuck. | DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15m on cluster {{ $labels.cluster }}. | warning | +| **KubeDeploymentGenerationMismatch** | Deployment generation mismatch due to possible roll-back | Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back on cluster {{ $labels.cluster }}. | warning | +| **KubeDeploymentReplicasMismatch** | Deployment has not matched the expected number of replicas. | Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes on cluster {{ $labels.cluster }}. | warning | +| **KubeDeploymentRolloutStuck** | Deployment rollout is not progressing. | Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment }} is not progressing for longer than 15 minutes on cluster {{ $labels.cluster }}. | warning | +| **KubeHpaMaxedOut** | HPA is running at max replicas | HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has been running at max replicas for longer than 15 minutes on cluster {{ $labels.cluster }}. | warning | +| **KubeHpaReplicasMismatch** | HPA has not matched desired number of replicas. | HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has not matched the desired number of replicas for longer than 15 minutes on cluster {{ $labels.cluster }}. | warning | +| **KubeJobFailed** | Job failed to complete. | Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. Removing failed job after investigation should clear this alert on cluster {{ $labels.cluster }}. | warning | +| **KubeJobNotCompleted** | Job did not complete in time | Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than {{ "43200" \| humanizeDuration }} to complete on cluster {{ $labels.cluster }}. | warning | +| **KubePodCrashLooping** | Pod is crash looping. | Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is in waiting state (reason: "CrashLoopBackOff") on cluster {{ $labels.cluster }}. | warning | +| **KubePodNotReady** | Pod has been in a non-ready state for more than 15 minutes. | Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes on cluster {{ $labels.cluster }}. | warning | +| **KubeStatefulSetGenerationMismatch** | StatefulSet generation mismatch due to possible roll-back | StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back on cluster {{ $labels.cluster }}. | warning | +| **KubeStatefulSetReplicasMismatch** | StatefulSet has not matched the expected number of replicas. | StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes on cluster {{ $labels.cluster }}. | warning | +| **KubeStatefulSetUpdateNotRolledOut** | StatefulSet update has not been rolled out. | StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out on cluster {{ $labels.cluster }}. | warning | +

🔝 Back to Top

+ +--- + +## kubernetes-resources +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **CPUThrottlingHigh** | Processes experience elevated CPU throttling. | {{ $value \| humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }} on cluster {{ $labels.cluster }}. | info | +| **KubeCPUOvercommit** | Cluster has overcommitted CPU resource requests. | Cluster {{ $labels.cluster }} has overcommitted CPU resource requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure. | warning | +| **KubeCPUQuotaOvercommit** | Cluster has overcommitted CPU resource requests. | Cluster {{ $labels.cluster }} has overcommitted CPU resource requests for Namespaces. | warning | +| **KubeMemoryOvercommit** | Cluster has overcommitted memory resource requests. | Cluster {{ $labels.cluster }} has overcommitted memory resource requests for Pods by {{ $value \| humanize }} bytes and cannot tolerate node failure. | warning | +| **KubeMemoryQuotaOvercommit** | Cluster has overcommitted memory resource requests. | Cluster {{ $labels.cluster }} has overcommitted memory resource requests for Namespaces. | warning | +| **KubeQuotaAlmostFull** | Namespace quota is going to be full. | Namespace {{ $labels.namespace }} is using {{ $value \| humanizePercentage }} of its {{ $labels.resource }} quota on cluster {{ $labels.cluster }}. | info | +| **KubeQuotaExceeded** | Namespace quota has exceeded the limits. | Namespace {{ $labels.namespace }} is using {{ $value \| humanizePercentage }} of its {{ $labels.resource }} quota on cluster {{ $labels.cluster }}. | warning | +| **KubeQuotaFullyUsed** | Namespace quota is fully used. | Namespace {{ $labels.namespace }} is using {{ $value \| humanizePercentage }} of its {{ $labels.resource }} quota on cluster {{ $labels.cluster }}. | info | +

🔝 Back to Top

+ +--- + +## kubernetes-storage +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **KubePersistentVolumeErrors** | PersistentVolume is having issues with provisioning. | The persistent volume {{ $labels.persistentvolume }} {{ with $labels.cluster -}} on Cluster {{ . }} {{- end }} has status {{ $labels.phase }}. | critical | +| **KubePersistentVolumeFillingUp** | PersistentVolume is filling up. | The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster {{ . }} {{- end }} is only {{ $value \| humanizePercentage }} free. | critical | +| **KubePersistentVolumeFillingUp** | PersistentVolume is filling up. | Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster {{ . }} {{- end }} is expected to fill up within four days. Currently {{ $value \| humanizePercentage }} is available. | warning | +| **KubePersistentVolumeInodesFillingUp** | PersistentVolumeInodes are filling up. | The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster {{ . }} {{- end }} only has {{ $value \| humanizePercentage }} free inodes. | critical | +| **KubePersistentVolumeInodesFillingUp** | PersistentVolumeInodes are filling up. | Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster {{ . }} {{- end }} is expected to run out of inodes within four days. Currently {{ $value \| humanizePercentage }} of its inodes are free. | warning | +

🔝 Back to Top

+ +--- + +## kubernetes-system +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **KubeClientErrors** | Kubernetes API server client is experiencing errors. | Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value \| humanizePercentage }} errors on cluster {{ $labels.cluster }}. | warning | +| **KubeVersionMismatch** | Different semantic versions of Kubernetes components running. | There are {{ $value }} different semantic versions of Kubernetes components running on cluster {{ $labels.cluster }}. | warning | +

🔝 Back to Top

+ +--- + +## kubernetes-system-apiserver +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **KubeAPIDown** | Target disappeared from Prometheus target discovery. | KubeAPI has disappeared from Prometheus target discovery. | critical | +| **KubeAPITerminatedRequests** | The kubernetes apiserver has terminated {{ $value \| humanizePercentage }} of its incoming requests. | The kubernetes apiserver has terminated {{ $value \| humanizePercentage }} of its incoming requests on cluster {{ $labels.cluster }}. | warning | +| **KubeAggregatedAPIDown** | Kubernetes aggregated API is down. | Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value \| humanize }}% available over the last 10m on cluster {{ $labels.cluster }}. | warning | +| **KubeAggregatedAPIErrors** | Kubernetes aggregated API has reported errors. | Kubernetes aggregated API {{ $labels.instance }}/{{ $labels.name }} has reported {{ $labels.reason }} errors on cluster {{ $labels.cluster }}. | warning | +| **KubeClientCertificateExpiration** | Client certificate is about to expire. | A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days on cluster {{ $labels.cluster }}. | warning | +| **KubeClientCertificateExpiration** | Client certificate is about to expire. | A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours on cluster {{ $labels.cluster }}. | critical | +

🔝 Back to Top

+ +--- + +## kubernetes-system-controller-manager +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **KubeControllerManagerDown** | Target disappeared from Prometheus target discovery. | KubeControllerManager has disappeared from Prometheus target discovery. | critical | +

🔝 Back to Top

+ +--- + +## kubernetes-system-kube-proxy +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **KubeProxyDown** | Target disappeared from Prometheus target discovery. | KubeProxy has disappeared from Prometheus target discovery. | critical | +

🔝 Back to Top

+ +--- + +## kubernetes-system-kubelet +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **KubeNodeEviction** | Node is evicting pods. | Node {{ $labels.node }} on {{ $labels.cluster }} is evicting Pods due to {{ $labels.eviction_signal }}. Eviction occurs when eviction thresholds are crossed, typically caused by Pods exceeding RAM/ephemeral-storage limits. | info | +| **KubeNodeNotReady** | Node is not ready. | {{ $labels.node }} has been unready for more than 15 minutes on cluster {{ $labels.cluster }}. | warning | +| **KubeNodePressure** | Node has as active Condition. | {{ $labels.node }} on cluster {{ $labels.cluster }} has active Condition {{ $labels.condition }}. This is caused by resource usage exceeding eviction thresholds. | info | +| **KubeNodeReadinessFlapping** | Node readiness status is flapping. | The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes on cluster {{ $labels.cluster }}. | warning | +| **KubeNodeUnreachable** | Node is unreachable. | {{ $labels.node }} is unreachable and some workloads may be rescheduled on cluster {{ $labels.cluster }}. | warning | +| **KubeletClientCertificateExpiration** | Kubelet client certificate is about to expire. | Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value \| humanizeDuration }} on cluster {{ $labels.cluster }}. | warning | +| **KubeletClientCertificateExpiration** | Kubelet client certificate is about to expire. | Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value \| humanizeDuration }} on cluster {{ $labels.cluster }}. | critical | +| **KubeletClientCertificateRenewalErrors** | Kubelet has failed to renew its client certificate. | Kubelet on node {{ $labels.node }} has failed to renew its client certificate ({{ $value \| humanize }} errors in the last 5 minutes) on cluster {{ $labels.cluster }}. | warning | +| **KubeletDown** | Target disappeared from Prometheus target discovery. | Kubelet has disappeared from Prometheus target discovery. | critical | +| **KubeletPlegDurationHigh** | Kubelet Pod Lifecycle Event Generator is taking too long to relist. | The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }} on cluster {{ $labels.cluster }}. | warning | +| **KubeletPodStartUpLatencyHigh** | Kubelet Pod startup latency is too high. | Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }} on cluster {{ $labels.cluster }}. | warning | +| **KubeletServerCertificateExpiration** | Kubelet server certificate is about to expire. | Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value \| humanizeDuration }} on cluster {{ $labels.cluster }}. | warning | +| **KubeletServerCertificateExpiration** | Kubelet server certificate is about to expire. | Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value \| humanizeDuration }} on cluster {{ $labels.cluster }}. | critical | +| **KubeletServerCertificateRenewalErrors** | Kubelet has failed to renew its server certificate. | Kubelet on node {{ $labels.node }} has failed to renew its server certificate ({{ $value \| humanize }} errors in the last 5 minutes) on cluster {{ $labels.cluster }}. | warning | +| **KubeletTooManyPods** | Kubelet is running at capacity. | Kubelet '{{ $labels.node }}' is running at {{ $value \| humanizePercentage }} of its Pod capacity on cluster {{ $labels.cluster }}. | info | +

🔝 Back to Top

+ +--- + +## kubernetes-system-scheduler +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **KubeSchedulerDown** | Target disappeared from Prometheus target discovery. | KubeScheduler has disappeared from Prometheus target discovery. | critical | +

🔝 Back to Top

+ +--- + +## mariadb-alerts +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **MariaDBDown** | MariaDB not up and running, immediate attention is required. | MariaDB {{$labels.job}} on {{$labels.instance}} is not up. | critical | +| **MariaDBReplicationErrors** | MariaDB is reporting replication errors from {{$labels.instance}}, immediate attention is required. | MariaDB {{$labels.job}} on {{$labels.instance}} is reporting replication errors. | critical | +| **MysqlSlaveReplicationLag** | MySQL Slave replication lag (instance {{ $labels.instance }}) | MySQL replication lag on {{ $labels.instance }}
VALUE = {{ $value }}
LABELS = {{ $labels }} | critical | +| **MysqlTooManyConnections(>80%)** | MySQL too many connections (> 80%) (instance {{ $labels.instance }}) | More than 80% of MySQL connections are in use on {{ $labels.instance }}
VALUE = {{ $value }}
LABELS = {{ $labels }} | warning | +

🔝 Back to Top

+ +--- + +## node-exporter +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **NodeBondingDegraded** | Bonding interface is degraded | Bonding interface {{ $labels.master }} on {{ $labels.instance }} is in degraded state due to one or more slave failures. | warning | +| **NodeCPUHighUsage** | High CPU usage. | CPU usage at {{ $labels.instance }} has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
| info | +| **NodeClockNotSynchronising** | Clock not synchronising. | Clock at {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host. | warning | +| **NodeClockSkewDetected** | Clock skew detected. | Clock at {{ $labels.instance }} is out of sync by more than 0.05s. Ensure NTP is configured correctly on this host. | warning | +| **NodeDiskIOSaturation** | Disk IO queue is high. | Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 30 minutes, is currently at {{ printf "%.2f" $value }}.
This symptom might indicate disk saturation.
| warning | +| **NodeFileDescriptorLimit** | Kernel is predicted to exhaust file descriptors limit soon. | File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%. | warning | +| **NodeFileDescriptorLimit** | Kernel is predicted to exhaust file descriptors limit soon. | File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%. | critical | +| **NodeFilesystemAlmostOutOfFiles** | Filesystem has less than 5% inodes left. | Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left. | warning | +| **NodeFilesystemAlmostOutOfFiles** | Filesystem has less than 3% inodes left. | Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left. | critical | +| **NodeFilesystemAlmostOutOfSpace** | Filesystem has less than 5% space left. | Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left. | warning | +| **NodeFilesystemAlmostOutOfSpace** | Filesystem has less than 3% space left. | Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left. | critical | +| **NodeFilesystemFilesFillingUp** | Filesystem is predicted to run out of inodes within the next 24 hours. | Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up. | warning | +| **NodeFilesystemFilesFillingUp** | Filesystem is predicted to run out of inodes within the next 4 hours. | Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast. | critical | +| **NodeFilesystemSpaceFillingUp** | Filesystem is predicted to run out of space within the next 24 hours. | Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up. | warning | +| **NodeFilesystemSpaceFillingUp** | Filesystem is predicted to run out of space within the next 4 hours. | Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast. | critical | +| **NodeHighNumberConntrackEntriesUsed** | Number of conntrack are getting close to the limit. | {{ $labels.instance }} {{ $value \| humanizePercentage }} of conntrack entries are used. | warning | +| **NodeMemoryHighUtilization** | Host is running out of memory. | Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
| warning | +| **NodeMemoryMajorPagesFaults** | Memory major page faults are occurring at very high rate. | Memory major pages are occurring at very high rate at {{ $labels.instance }}, 500 major page faults per second for the last 15 minutes, is currently at {{ printf "%.2f" $value }}.
Please check that there is enough memory available at this instance.
| warning | +| **NodeNetworkReceiveErrs** | Network interface is reporting many receive errors. | {{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes. | warning | +| **NodeNetworkTransmitErrs** | Network interface is reporting many transmit errors. | {{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes. | warning | +| **NodeRAIDDegraded** | RAID Array is degraded. | RAID array '{{ $labels.device }}' at {{ $labels.instance }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically. | critical | +| **NodeRAIDDiskFailure** | Failed device in RAID array. | At least one device in RAID array at {{ $labels.instance }} failed. Array '{{ $labels.device }}' needs attention and possibly a disk swap. | warning | +| **NodeSystemSaturation** | System saturated, load per core is very high. | System load per core at {{ $labels.instance }} has been above 2 for the last 15 minutes, is currently at {{ printf "%.2f" $value }}.
This might indicate this instance resources saturation and can cause it becoming unresponsive.
| warning | +| **NodeSystemdServiceCrashlooping** | Systemd service keeps restaring, possibly crash looping. | Systemd service {{ $labels.name }} has being restarted too many times at {{ $labels.instance }} for the last 15 minutes. Please check if service is crash looping. | warning | +| **NodeSystemdServiceFailed** | Systemd service has entered failed state. | Systemd service {{ $labels.name }} has entered failed state at {{ $labels.instance }} | warning | +| **NodeTextFileCollectorScrapeError** | Node Exporter text file collector failed to scrape. | Node Exporter text file collector on {{ $labels.instance }} failed to scrape. | warning | +

🔝 Back to Top

+ +--- + +## node-network +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **NodeNetworkInterfaceFlapping** | Network interface is often changing its status | Network interface "{{ $labels.device }}" changing its up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }} | warning | +

🔝 Back to Top

+ +--- + +## pod-state-alerts +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **HighPodRestartRate** | High pod restart count detected | Pod {{ $labels.pod }} in namespace {{ $labels.namespace }} is restarting frequently, which may indicate network instability. | warning | +| **KubePodNotReadyCritical** | Pod has been in a non-ready state for more than 5 minutes. | Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 5 minutes. | critical | +| **TooManyContainerRestarts** | Container named {{ $labels.container }} in {{ $labels.pod }} in {{ $labels.namespace }} has restarted too many times in a short period and needs to be investigated. | Namespace: {{$labels.namespace}}
Pod name: {{$labels.pod}}
Container name: {{$labels.container}}
| critical | +

🔝 Back to Top

+ +--- + +## prometheus +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **PrometheusBadConfig** | Failed Prometheus configuration reload. | Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to reload its configuration. | critical | +| **PrometheusDuplicateTimestamps** | Prometheus is dropping samples with duplicate timestamps. | Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with different values but duplicated timestamp. | warning | +| **PrometheusErrorSendingAlertsToAnyAlertmanager** | Prometheus encounters more than 3% errors sending alerts to any Alertmanager. | {{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager. | critical | +| **PrometheusErrorSendingAlertsToSomeAlertmanagers** | More than 1% of alerts sent by Prometheus to a specific Alertmanager were affected by errors. | {{ printf "%.1f" $value }}% of alerts sent by Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}} were affected by errors. | warning | +| **PrometheusHighQueryLoad** | Prometheus is reaching its maximum capacity serving concurrent requests. | Prometheus {{$labels.namespace}}/{{$labels.pod}} query API has less than 20% available capacity in its query engine for the last 15 minutes. | warning | +| **PrometheusKubernetesListWatchFailures** | Requests in Kubernetes SD are failing. | Kubernetes service discovery of Prometheus {{$labels.namespace}}/{{$labels.pod}} is experiencing {{ printf "%.0f" $value }} failures with LIST/WATCH requests to the Kubernetes API in the last 5 minutes. | warning | +| **PrometheusLabelLimitHit** | Prometheus has dropped targets because some scrape configs have exceeded the labels limit. | Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf "%.0f" $value }} targets because some samples exceeded the configured label_limit, label_name_length_limit or label_value_length_limit. | warning | +| **PrometheusMissingRuleEvaluations** | Prometheus is missing rule evaluations due to slow rule group evaluation. | Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{ printf "%.0f" $value }} rule group evaluations in the last 5m. | warning | +| **PrometheusNotConnectedToAlertmanagers** | Prometheus is not connected to any Alertmanagers. | Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected to any Alertmanagers. | warning | +| **PrometheusNotIngestingSamples** | Prometheus is not ingesting samples. | Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting samples. | warning | +| **PrometheusNotificationQueueRunningFull** | Prometheus alert notification queue predicted to run full in less than 30m. | Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}} is running full. | warning | +| **PrometheusOutOfOrderTimestamps** | Prometheus drops samples with out-of-order timestamps. | Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with timestamps arriving out of order. | warning | +| **PrometheusRemoteStorageFailures** | Prometheus fails to send samples to remote storage. | Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send {{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }} | critical | +| **PrometheusRemoteWriteBehind** | Prometheus remote write is behind. | Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}. | critical | +| **PrometheusRemoteWriteDesiredShards** | Prometheus remote write desired shards calculation wants to run more than configured max shards. | Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write desired shards calculation wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="kube-prometheus-stack-prometheus",namespace="prometheus"}` $labels.instance \| query \| first \| value }}. | warning | +| **PrometheusRuleFailures** | Prometheus is failing rule evaluations. | Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to evaluate {{ printf "%.0f" $value }} rules in the last 5m. | critical | +| **PrometheusSDRefreshFailure** | Failed Prometheus SD refresh. | Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to refresh SD with mechanism {{$labels.mechanism}}. | warning | +| **PrometheusScrapeBodySizeLimitHit** | Prometheus has dropped some targets that exceeded body size limit. | Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf "%.0f" $value }} scrapes in the last 5m because some targets exceeded the configured body_size_limit. | warning | +| **PrometheusScrapeSampleLimitHit** | Prometheus has failed scrapes that have exceeded the configured sample limit. | Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf "%.0f" $value }} scrapes in the last 5m because some targets exceeded the configured sample_limit. | warning | +| **PrometheusTSDBCompactionsFailing** | Prometheus has issues compacting blocks. | Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value \| humanize}} compaction failures over the last 3h. | warning | +| **PrometheusTSDBReloadsFailing** | Prometheus has issues reloading blocks from disk. | Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value \| humanize}} reload failures over the last 3h. | warning | +| **PrometheusTargetLimitHit** | Prometheus has dropped targets because some scrape configs have exceeded the targets limit. | Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf "%.0f" $value }} targets because the number of targets exceeded the configured target_limit. | warning | +| **PrometheusTargetSyncFailure** | Prometheus has failed to sync targets. | {{ printf "%.0f" $value }} targets in Prometheus {{$labels.namespace}}/{{$labels.pod}} have failed to sync because invalid configuration was supplied. | critical | +

🔝 Back to Top

+ +--- + +## prometheus-operator +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **PrometheusOperatorListErrors** | Errors while performing list operations in controller. | Errors while performing List operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace. | warning | +| **PrometheusOperatorNodeLookupErrors** | Errors while reconciling Prometheus. | Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace. | warning | +| **PrometheusOperatorNotReady** | Prometheus operator not ready | Prometheus operator in {{ $labels.namespace }} namespace isn't ready to reconcile {{ $labels.controller }} resources. | warning | +| **PrometheusOperatorReconcileErrors** | Errors while reconciling objects. | {{ $value \| humanizePercentage }} of reconciling operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace. | warning | +| **PrometheusOperatorRejectedResources** | Resources rejected by Prometheus operator | Prometheus operator in {{ $labels.namespace }} namespace rejected {{ printf "%0.0f" $value }} {{ $labels.controller }}/{{ $labels.resource }} resources. | warning | +| **PrometheusOperatorStatusUpdateErrors** | Errors while updating objects status. | {{ $value \| humanizePercentage }} of status update operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace. | warning | +| **PrometheusOperatorSyncFailed** | Last controller reconciliation failed | Controller {{ $labels.controller }} in {{ $labels.namespace }} namespace fails to reconcile {{ $value }} objects. | warning | +| **PrometheusOperatorWatchErrors** | Errors while performing watch operations in controller. | Errors while performing watch operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace. | warning | +

🔝 Back to Top

+ +--- + +## rabbitmq +| Alert Name | Summary | Description | Severity | +| :--- | :--- | :--- | :--- | +| **ContainerRestarts** | Investigate why the container got restarted.
Check the logs of the current container: `kubectl -n {{ $labels.namespace }} logs {{ $labels.pod }}`
Check the logs of the previous container: `kubectl -n {{ $labels.namespace }} logs {{ $labels.pod }} --previous`
Check the last state of the container: `kubectl -n {{ $labels.namespace }} get pod {{ $labels.pod }} -o jsonpath='{.status.containerStatuses[].lastState}'`
| Over the last 10 minutes, container `{{ $labels.container }}`
restarted `{{ $value \| printf "%.0f" }}` times in pod `{{ $labels.pod }}` of RabbitMQ cluster
`{{ $labels.rabbitmq_cluster }}` in namespace `{{ $labels.namespace }}`.
| warning | +| **FileDescriptorsNearLimit** | More than 80% of file descriptors are used on the RabbitMQ node.
When this value reaches 100%, new connections will not be accepted and disk write operations may fail.
Client libraries, peer nodes and CLI tools will not be able to connect when the node runs out of available file descriptors.
See https://www.rabbitmq.com/production-checklist.html#resource-limits-file-handle-limit.
| `{{ $value \| humanizePercentage }}` file descriptors of file
descriptor limit are used in RabbitMQ node `{{ $labels.rabbitmq_node }}`,
pod `{{ $labels.pod }}`, RabbitMQ cluster `{{ $labels.rabbitmq_cluster }}`,
namespace `{{ $labels.namespace }}`.
| warning | +| **HighConnectionChurn** | More than 10% of total connections are churning.
This means that client application connections are short-lived instead of long-lived.
Read https://www.rabbitmq.com/connections.html#high-connection-churn to understand why this is an anti-pattern.
| Over the last 5 minutes, `{{ $value \| humanizePercentage }}`
of total connections are closed or opened per second in RabbitMQ cluster `{{ $labels.rabbitmq_cluster }}`
in namespace `{{ $labels.namespace }}`.
| warning | +| **InsufficientEstablishedErlangDistributionLinks** | RabbitMQ clusters have a full mesh topology.
All RabbitMQ nodes connect to all other RabbitMQ nodes in both directions.
The expected number of established Erlang distribution links is therefore `n*(n-1)` where `n` is the number of RabbitMQ nodes in the cluster.
Therefore, the expected number of distribution links are `0` for a 1-node cluster, `6` for a 3-node cluster, and `20` for a 5-node cluster.
This alert reports that the number of established distributions links is less than the expected number.
Some reasons for this alert include failed network links, network partitions, failed clustering (i.e. nodes can't join the cluster).
Check the panels `All distribution links`, `Established distribution links`, `Connecting distributions links`, `Waiting distribution links`, and `distribution links`
of the Grafana dashboard `Erlang-Distribution`.
Check the logs of the RabbitMQ nodes: `kubectl -n {{ $labels.namespace }} logs -l app.kubernetes.io/component=rabbitmq,app.kubernetes.io/name={{ $labels.rabbitmq_cluster }}`
| There are only `{{ $value }}` established Erlang distribution links
in RabbitMQ cluster `{{ $labels.rabbitmq_cluster }}` in namespace `{{ $labels.namespace }}`.
| warning | +| **LowDiskWatermarkPredicted** | Based on the trend of available disk space over the past 24 hours, it's predicted that, in 24 hours from now, a disk alarm will be triggered since the free disk space will drop below the free disk space limit.
This alert is reported for the partition where the RabbitMQ data directory is stored.
When the disk alarm will be triggered, all publishing connections across all cluster nodes will be blocked.
See
https://www.rabbitmq.com/alarms.html,
https://www.rabbitmq.com/disk-alarms.html,
https://www.rabbitmq.com/production-checklist.html#resource-limits-disk-space,
https://www.rabbitmq.com/persistence-conf.html,
https://www.rabbitmq.com/connection-blocked.html.
| The predicted free disk space in 24 hours from now is `{{ $value \| humanize1024 }}B`
in RabbitMQ node `{{ $labels.rabbitmq_node }}`, pod `{{ $labels.pod }}`,
RabbitMQ cluster `{{ $labels.rabbitmq_cluster }}`, namespace `{{ $labels.namespace }}`.
| warning | +| **MemoryAlarm** | A RabbitMQ node reached the `vm_memory_high_watermark` threshold.
See https://www.rabbitmq.com/docs/alarms#overview, https://www.rabbitmq.com/docs/memory.
| RabbitMQ cluster `{{ $labels.rabbitmq_cluster }}` memory alarm active. Publishers are blocked.
| warning | +| **NoMajorityOfNodesReady** | No majority of nodes have been ready for the last 5 minutes.
Check the details of the pods:
`kubectl -n {{ $labels.namespace }} describe pods -l app.kubernetes.io/component=rabbitmq,app.kubernetes.io/name={{ $labels.label_app_kubernetes_io_name }}`
| Only `{{ $value }}` replicas are ready in StatefulSet `{{ $labels.statefulset }}`
of RabbitMQ cluster `{{ $labels.label_app_kubernetes_io_name }}` in namespace `{{ $labels.namespace }}`.
| warning | +| **PersistentVolumeMissing** | RabbitMQ needs a PersistentVolume for its data.
However, there is no PersistentVolume bound to the PersistentVolumeClaim.
This means the requested storage could not be provisioned.
Check the status of the PersistentVolumeClaim: `kubectl -n {{ $labels.namespace }} describe pvc {{ $labels.persistentvolumeclaim }}`.
| PersistentVolumeClaim `{{ $labels.persistentvolumeclaim }}` of
RabbitMQ cluster `{{ $labels.label_app_kubernetes_io_name }}` in namespace
`{{ $labels.namespace }}` is not bound.
| critical | +| **QueueHasNoConsumers** | Messages are sitting idle in the queue, without any processing.
This alert is highly application specific (and e.g. doesn't make sense for stream queues).
| Over the last 10 minutes, non-empty queue `{{ $labels.queue }}` with {{ $value }} messages
in virtual host `{{ $labels.vhost }}` didn't have any consumers in
RabbitMQ cluster `{{ $labels.rabbitmq_cluster }}` in namespace `{{ $labels.namespace }}`.
| warning | +| **QueueIsGrowing** | Queue size is steadily growing over time.
| Over the last 10 minutes, queue `{{ $labels.queue }}` in virtual host `{{ $labels.vhost }}`
was growing. 10 minute moving average has grown by {{ $value }}.
This happens in RabbitMQ cluster `{{ $labels.rabbitmq_cluster }}` in namespace `{{ $labels.namespace }}`.
| warning | +| **RabbitmqDiskAlarm** | A RabbitMQ node reached the `disk_free_limit` threshold.
See https://www.rabbitmq.com/docs/alarms#overview, https://www.rabbitmq.com/docs/disk-alarms.
| RabbitMQ cluster `{{ $labels.rabbitmq_cluster }}` disk alarm active. Publishers are blocked.
| warning | +| **RabbitmqFileDescriptorAlarm** | A RabbitMQ node ran out of file descriptors.
See https://www.rabbitmq.com/docs/alarms#file-descriptors.
| RabbitMQ cluster `{{ $labels.rabbitmq_cluster }}` file descriptor alarm active. Publishers are blocked.
| warning | +| **TCPSocketsNearLimit** | More than 80% of TCP sockets are open on the RabbitMQ node.
When this value reaches 100%, new connections will not be accepted.
Client libraries, peer nodes and CLI tools will not be able to connect when the node runs out of available TCP sockets.
See https://www.rabbitmq.com/networking.html.
| `{{ $value \| humanizePercentage }}` TCP sockets of TCP socket
limit are open in RabbitMQ node `{{ $labels.rabbitmq_node }}`, pod `{{ $labels.pod }}`,
RabbitMQ cluster `{{ $labels.rabbitmq_cluster }}`, namespace `{{ $labels.namespace }}`.
| warning | +| **UnroutableMessages** | There are messages published into an exchange which cannot be routed and are either dropped silently, or returned to publishers.
Is your routing topology set up correctly?
Check your application code and bindings between exchanges and queues.
See
https://www.rabbitmq.com/publishers.html#unroutable,
https://www.rabbitmq.com/confirms.html#when-publishes-are-confirmed.
| There were `{{ $value \| printf "%.0f" }}` unroutable messages within the last
5 minutes in RabbitMQ cluster `{{ $labels.rabbitmq_cluster }}` in namespace
`{{ $labels.namespace }}`.
| warning | +

🔝 Back to Top

+ +--- + diff --git a/docs/import-grafana-dashboard.md b/docs/import-grafana-dashboard.md new file mode 100644 index 000000000..6c294c346 --- /dev/null +++ b/docs/import-grafana-dashboard.md @@ -0,0 +1,38 @@ +# Grafana Dashboard Import Script + +This script helps you **import Grafana dashboards** from a local directory that contains JSON files. Each file must contain a valid Grafana dashboard definition. + + +## Prerequisites +- A running [monitoring stack](https://github.com/rackerlabs/genestack/blob/main/docs/monitoring-info.md) +- Dashboards exported as valid [JSON files](https://github.com/rackerlabs/genestack/tree/main/etc/grafana-dashboards) + +## Environment Variables +Set the following environment variables before running the script: + +| Variable | Required | Description | Default | +|-------------------|----------|-------------------------------------------------------|---------------------------------| +| `GRAFANA_PASSWORD`| True | Grafana admin password | None. | +| `GRAFANA_USERNAME`| False | Grafana admin username | `admin` | +| `GRAFANA_URL` | False | URL of your Grafana instance | `http://grafana.grafana.svc.cluster.local:80` | + + +## Usage +```bash +# python import_dashboard.py -h +usage: import_dashboard.py [-h] -d DIR [-ds DATASOURCE] + +Import Grafana dashboards from a local directory. + +options: + -h, --help show this help message and exit + -d DIR, --dir DIR Path to directory containing dashboard JSON files + -ds DATASOURCE, --datasource DATASOURCE + Name of the Prometheus datasource. Default: "Prometheus" + +export GRAFANA_USERNAME=admin +export GRAFANA_URL=https://grafana.sjc3.rackspacecloud.com +export GRAFANA_PASSWORD=your_admin_password + +python import_dashboards.py --dir /opt/genestack/etc/grafana-dashboards/ --datasource Prometheus +``` diff --git a/docs/infrastructure-envoy-gateway-api-security.md b/docs/infrastructure-envoy-gateway-api-security.md new file mode 100644 index 000000000..829d792dd --- /dev/null +++ b/docs/infrastructure-envoy-gateway-api-security.md @@ -0,0 +1,127 @@ +# Security Policies + +From [Envoy documentation](https://gateway.envoyproxy.io/docs/concepts/introduction/gateway_api_extensions/security-policy/): + +SecurityPolicy is an Envoy Gateway extension to the Kubernetes Gateway API that allows you to define authentication and authorization requirements for traffic entering your gateway. It acts as a security layer that only properly authenticated and authorized requests are allowed through your backend services. + +In this section we will be implementing [oidc](https://gateway.envoyproxy.io/docs/tasks/security/oidc/) authentication to auth using Azure AD. + +!!! note "You must have deployed Envoy Gateway already and installed the CRDs before this will work" + +## Create the HTTPRoute + +!!! note "The examples used here reference alertmanager. You will change the settings as necessary for your application/s" + +``` yaml title="alertmanager-gw-route.yaml" +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + annotations: + name: alertmanager-gateway-route + namespace: prometheus +spec: + hostnames: + - alertmanager.example.com + parentRefs: + - group: gateway.networking.k8s.io + kind: Gateway + name: flex-internal-gateway + namespace: internal + sectionName: am-https + rules: + - backendRefs: + - group: "" + kind: Service + name: kube-prometheus-stack-alertmanager + port: 9093 + weight: 1 + matches: + - path: + type: PathPrefix + value: / +``` + +`kubectl apply -f alertmanager-gw-route.yaml` + +### Check/update your listener + +Make sure you have a listener configured on your gateway for the HTTPRoute you created. As an example, you should have something like the following in your gateway configuration: + +``` yaml + - allowedRoutes: + namespaces: + from: All + hostname: alertmanager.example.com + name: am-https + port: 443 + protocol: HTTPS + tls: + certificateRefs: + - group: "" + kind: Secret + name: alertmanager-envoy-secret + mode: Terminate +``` + + +## Register an OIDC application + +Registering the Azure OIDC application is beyond the scope of this article. You will need to add a redirect url and you will need to know your client and tenant ids as well as your client secret. Once you have all that information, you may proceed to configuring the Kubernetes secret and security policy. + +## Kubernetes secret + +You will need to create a kubernetes secret that contains the client secret for your Azure application. You can either use a yaml file or paste the secret on the command line. + +=== "CLI" + ``` shell + read -s CLIENT_SECRET + read -p "Please enter the application namespace: " APP_NAMESPACE + read -p "Please enter the application name: " APP_NAME + kubectl -n ${APP_NAMESPACE} create secret generic azuread-client-secret-${APP_NAME} --from-literal=client-secret=${CLIENT_SECRET} + ``` + +=== "YAML" + ``` yaml title="azuread-client-secret-APP_NAME.yaml" + apiVersion: v1 + data: + client-secret: + kind: Secret + metadata: + name: azuread-client-secret + namespace: + type: Opaque + ``` + + `kubectl apply -f azuread-client-secret-` + + +## Create the Security Policy + +``` yaml title="alertmanager-sp.yaml" +apiVersion: gateway.envoyproxy.io/v1alpha1 +kind: SecurityPolicy +metadata: + annotations: + generation: 1 + name: azuread-oidc-policy + namespace: +spec: + oidc: + clientID: + clientSecret: + group: "" + kind: Secret + name: azuread-client-secret- + logoutPath: //logout + provider: + issuer: https://login.microsoftonline.com//v2.0 + redirectURL: https://alertmanager.example.com//oauth2/callback + targetRefs: + - group: gateway.networking.k8s.io + kind: HTTPRoute + name: +``` + +`kubectl -f apply alertmanager-sp.yaml` + +!!! note "Your redirect URL in the SecurityPolicy must match what you configured in your OIDC application" diff --git a/docs/infrastructure-envoy-gateway-api.md b/docs/infrastructure-envoy-gateway-api.md index 415ff5b5f..20ed138be 100644 --- a/docs/infrastructure-envoy-gateway-api.md +++ b/docs/infrastructure-envoy-gateway-api.md @@ -60,5 +60,5 @@ kubectl -n envoy-gateway get gateways.gateway.networking.k8s.io flex-gateway If you encounter any issues, check the logs of the `envoy-gateway` deployment. ``` shell -kubectl logs -n envoy-gateway-system deployment/envoy-gateway +kubectl logs -n envoyproxy-gateway-system deployment/envoy-gateway ``` diff --git a/docs/infrastructure-metallb.md b/docs/infrastructure-metallb.md index 9d0131af2..707b5e82b 100644 --- a/docs/infrastructure-metallb.md +++ b/docs/infrastructure-metallb.md @@ -4,6 +4,11 @@ The MetalLb loadbalancer can be setup by editing the following file `metallb-ope your "external" VIP(s) to the loadbalancer so that they can be used within services. These IP addresses are unique and will need to be customized to meet the needs of your environment. +!!! tip + + When L2Advertisement is used, you should use a CIDR that is not overlapping with any local interface CIDR. + This also enables later migration to BGP advertisement. + ## Create the MetalLB namespace ``` shell @@ -37,8 +42,29 @@ Verify the deployment of MetalLB by checking the pods in the `metallb-system` na kubectl --namespace metallb-system get deployment.apps/metallb-controller ``` -Once MetalLB is operatoinal, apply the metallb service manifest. +Once MetalLB is operatianal, apply the metallb service manifest. ``` shell kubectl apply -f /etc/genestack/manifests/metallb/metallb-openstack-service-lb.yml ``` + +## Re-IP the advertisement pools +In situations where the advertisement pools must be changed, the following disruptive procedure can be used: + +Update existing metallb configuration: + +```shell +kubectl -n metallb-system delete IPAddressPool/primary +kubectl -n metallb-system delete IPAddressPool/gateway-api-external +kubectl apply -f /etc/genestack/manifests/metallb/metallb-openstack-service-lb.yml +``` +``` + +Restart the metallb controller: + +```shell +kubectl rollout restart deployment metallb-controller -n metallb-system +``` + +Once the metallb controller restarts it'll begin to reip the external service IP associations which typically +requires DNS entry updates. This change including the DNS refresh (TTL) time will be disruptive. diff --git a/docs/infrastructure-namespace.md b/docs/infrastructure-namespace.md index aa3046cbd..31831c66b 100644 --- a/docs/infrastructure-namespace.md +++ b/docs/infrastructure-namespace.md @@ -11,9 +11,9 @@ Then you can create all needed secrets by running the create-secrets.sh command !!! tip "Optional --region param" Note that the `create-secrets.sh` script by default creates a secret - with a default region of RegionOne. This can be overridden with the + with a default region of *RegionOne*. This can be overridden with the `--region` parameter to specify your custom region name in Keystone. - > Usage: ./create-secrets.sh [--region default: RegionOne] + > Usage: ./create-secrets.sh [--region ] ``` shell /opt/genestack/bin/create-secrets.sh diff --git a/docs/infrastructure-ovn-setup.md b/docs/infrastructure-ovn-setup.md index 7229669e7..b66b27357 100644 --- a/docs/infrastructure-ovn-setup.md +++ b/docs/infrastructure-ovn-setup.md @@ -99,11 +99,12 @@ kubectl annotate \ ### Set `ovn.openstack.org/gateway` Define where the gateways nodes will reside. There are many ways to run this, some like every compute node to be a gateway, some like dedicated gateway hardware. Either way you will need at least one gateway node within your environment. +NOTE: In the following example, we will apply the 'ovn.openstack.org/gateway' to dedicated network nodes. You will want to change the node filter to the specific nodes you wish to use as ovn gateway nodes. ``` shell kubectl annotate \ nodes \ - -l openstack-network-node=enabled \ + -l $(kubectl get nodes | awk '/network/ {print $1}') \ ovn.openstack.org/gateway='enabled' ``` diff --git a/docs/maridb-backuprestore-from-tempauth.md b/docs/maridb-backuprestore-from-tempauth.md new file mode 100644 index 000000000..6d53c21ce --- /dev/null +++ b/docs/maridb-backuprestore-from-tempauth.md @@ -0,0 +1,185 @@ +# MariaDB Restore Procedures with Swift Tempauth + +This document provides procedures to restore MariaDB backups stored in Rackspace's Swift object storage with tempauth for the production environments: DFW, SJC, and IAD as part of Jira:OSPC-1141. It details two methods: using theKubernetes Restore CRD with the MariaDB Operator and a manual restore using AWS S3 commands. These procedures ensure recovery from backups in the mariadb-backups container, based on the reference document present. + +## 1. Prerequisites + ### Software: + - Kubernetes CLI (`kubectl`) installed and configured with access to the respective production cluster (DFW, SJC, IAD). + - AWS CLI installed on the overseer node for each production environment (`pip install awscli awscli-plugin-endpoint`). + ### Credentials: + - Kubernetes secret (e.g., `dfw-credentials`, `sjc-credentials`, `iad-credentials`) with `access-key-id` and `secret-access-key` keys, generated via `openstack ec2 credentials create`. + - AWS CLI profiles (e.g., `dfw_admin`, `sjc_admin`, `iad_admin`) configured on the respective overseers. + ### Environment: + - Access to the Kubernetes cluster and overseer node for each production region. + - Network access to the region-specific Swift endpoint. + - MariaDB Operator deployed in each cluster with a `mariadb` resource. + +## 2. Backup/Restore Flow +```mermaid +graph TD + + subgraph Locations + I[DFW] + J[SJC] + K[IAD] + end + + A["Kubernetes Cluster
DFW, SJC, IAD"] --> B[MariaDB Instances] + B -->|Backup Data| C[MariaDB Operator] + C -->|Create Backup| D[Backup CRD] + D -->|Store Backup| E["Swift Object Storage
mariadb-backups"] + E -->|Retrieve Backup| F[Restore CRD] + F -->|Restore Data| C + C -->|Restore to MariaDB| B + E -->|Download Backup| G[Overseer Nodes] + G -->|Execute Restore| H[AWS CLI] + H -->|Restore to MariaDB| B + + I --> A + J --> A + K --> A +``` + +## 3. Restore Using Kubernetes `Restore` CRD +### CRD(Custom Resource Definition) +- The Restore CRD is a Custom Resource Definintion, a kubernetes feature that extend the API to define custom resources for managing restore operations. For detailed information on CRD, refers to the [Kubernetes Documentation on Custom Resources.](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/). + +This method automates the restore process using the MariaDB Operator, applicable to all production regions. + + ### 3.1 Backup and Restore of Specific Databases + Backup Context: Backups are created with the Backup resource, which by default includes all logical databases. To back up specific databases, the databases field can be used (e.g., db1, db2, db3), influencing the content available for restoration. For detailed backup creation, refer to the backup documentation or administrator. + + Restore Configuration: By default, all databases in the backup are restored. To restore a single database, specify the database field in the Restore resource: + + ``` yaml + apiVersion: k8s.mariadb.com/v1alpha1 + kind: Restore + metadata: + name: restore + spec: + mariaDbRef: + name: mariadb + backupRef: + name: backup + database: db1 + ``` + ### 3.2 Procedure + #### 1. Configure the Restore CRD: + Create a file named `restore.yaml` with the following content, adjusting the region-specific details: + + ``` yaml + apiVersion: k8s.mariadb.com/v1alpha1 + kind: Restore + metadata: + name: maria-restore + namespace: # Replace with the actual namespace (e.g., default or mariadb) + spec: + mariaDbRef: + name: mariadb # Must match the existing MariaDB resource name + s3: + bucket: mariadb-backups + prefix: cron + endpoint: # See table below + accessKeyIdSecretKeyRef: + name: # e.g., dfw-credentials + key: access-key-id + secretAccessKeySecretKeyRef: + name: # e.g., dfw-credentials + key: secret-access-key + database: # e.g., nova + ``` + + Replace and with the appropriate values for each environment. + Use the following region-specific endpoints: + + | Region | Environment | Endpoint | Profile | Credential Secret | + |--------|-------------|-----------------------------------------------|-----------|-------------------| + | DFW | DFW | https://swift.api.dfw3.rackspacecloud.com | dfw_admin | dfw-credentials | + | SJC | SJC | https://swift.api.sjc3.rackspacecloud.com | sjc_admin | sjc-credentials | + | IAD | IAD | https://swift.api.iad3.rackspacecloud.com | iad_admin | iad-credentials | + + #### 2. Apply the CRD: + Execute the deployement: + ```shell + kubectl apply -f restore.yaml + ``` + #### 3. Monitor the Restore: + Check the status: + ```shell + kubectl describe restore maria-restore -n . + ``` + Here the resource type : restore + resource name : maria-restore + + Monitor logs: + ```shell + kubectl logs -f -n . + ``` + Identify the pod with kubectl get pods + Wait for the status to change to Succeeded. + #### 4. Verify Restore: + Access the mariadb Pod: + ```shell + kubectl exec -it -n -- mysql -u root -p. + ``` + Run a query: SELECT COUNT(*) FROM ; (e.g., nova.instances) to confirm data. + + **Notes:** + Ensure the region-specific credentials secret exists: kubectl get secret -n -o yaml. + + **Note:** This procedure some reference present in + +## 4. Manual Restore Using AWS S3 Commands + This method retrieves the backup from the overseer and restores it manually, applicable to all production regions as a fallback. + + ### Steps: + #### 1. Access the Region-Specific Overseer: + Log in to the overseer node (e.g., ssh user@dfw-prod-overseer-ip or any available method which is allowed to login into DFW Prod in a secured manner). + #### 2.Verify AWS CLI Configuration: + Ensure the region-specific profile is set up (e.g., dfw_admin for DFW): + ```yaml + [profile dfw_admin] + region = dfw + s3 = + endpoint_url = https://swift.api.dfw.rackspacecloud.com + signature_version = s3v4 + ``` + ```yaml + [dfw_admin] + aws_access_key_id = YOUR_ACCESS_KEY + aws_secret_access_key = YOUR_SECRET_KEY + ``` + Adjust for SJC (sjc_admin), IAD (iad_admin) with their endpoints (see table above). + Test with below command to list backups: + ```shell + aws --profile _admin s3 ls s3://mariadb-backups/ . + ``` + #### 3. Retrieve the Backup: + List available backups: aws --profile _admin s3 ls s3://mariadb-backups/cron/. + Download a specific backup: + As a example given here: + ```shell + aws --profile dfw_admin s3 cp s3://mariadb-backups/cron/backup.2025-02-04T19:05:57Z.gzip.sql /tmp/backup.2025-02-04T19:05:57Z.gzip.sql + ``` + Note: Need to replace the particular file to download as per the requirement. + #### 4. Restore the Backup: + Access a test MariaDB instance (e.g., via kubectl exec or a local DB): mysql -u user -p < backup.2025-02-04T19:05:57Z.gzip.sql. + #### 5. Single Database Restore: + If the backup contains multiple databases, extract the desired database (e.g., nova) using a tool like sed or mysql filters, then restore: mysql -u user -p nova < nova_backup.sql. + #### 6. Verify: + Check the return code: echo $? (0 indicates success). + Query the database: mysql -u user -p -e "SELECT COUNT(*) FROM ;". + + **Notes:** + Ensure the overseer has network access to the region-specific Swift endpoint. + + **Before applying/executing into Production, First need to apply these commands on DEV or staging environment.** + +## 5. References + ### https://github.com/mariadb-operator/mariadb-operator/blob/main/docs/BACKUP.md + ### https://docs.rackspacecloud.com/storage-object-store-s3-cli/ + ### https://mariadb.com/docs/server/server-usage/backup-and-restore/backup-and-restore-overview + +## 6. Escalation: + If validation fails, coordinate with Admin Team or Data Base team to resolve network or any related configuration issues.\ + **Note: This step might be extended further** diff --git a/docs/monitoring-info.md b/docs/monitoring-info.md index c2b440e96..9a8fa1787 100644 --- a/docs/monitoring-info.md +++ b/docs/monitoring-info.md @@ -68,6 +68,12 @@ View the [kube-state-docs](https://github.com/kubernetes/kube-state-metrics/tree Beyond those two highly important ones installed by default are many more equally important metric exporters that we install as part of Genestack's workflow that we'll go over next. +* ### Kubernetes Event Exporter +Kubernetes clusters are constantly sending events that contain potentially important data that should be captured. +With the [Kubernetes Event Exporter](https://github.com/resmoio/kubernetes-event-exporter) we can capture these events to gain a better view of what our cluster is doing. +This exporter also includes built in alerting mechanisms for things like Slack and MSTeams that can be configured to send messages when specific events are seen. +View the [Kubernetes Event Exporter Install Instructions](prometheus-kube-event-exporter.md) to get this exporter installed. + * ### MariaDB/MySQL Exporter: Genestack uses a couple different database solutions to run OpenStack or just for general storage capabilities, the most prominent of them is MySQL or more specifically within Genestack MariaDB and Galera. When installing [MariaDB](infrastructure-mariadb.md) as part of Genestack's workflow it is default to enable metrics which deploys its own service monitor as part of the [mariadb-operator](https://mariadb-operator.github.io/mariadb-operator/latest/) helm charts. @@ -110,11 +116,16 @@ Once we've ran the apply command we will have installed ServiceMonitors for Kube You can view more information about OVN monitoring in the [OVN Monitoring Introduction Docs](ovn-monitoring-introduction.md). * ### Nginx Gateway Monitoring: -Genestack makes use of the Gateway API for its implementation of [Kubernetes Gateway API](https://gateway-api.sigs.k8s.io/). Genestack deploys this as part of its infrastructure, view the [Gateway Deployment Doc](infrastructure-gateway-api.md) for more information. +Genestack can make use of the NGINX Gateway API for its implementation of [Kubernetes Gateway API](https://gateway-api.sigs.k8s.io/). Genestack deploys this as part of its infrastructure, view the [Gateway Deployment Doc](infrastructure-gateway-api.md) for more information. Nginx Gateway does expose important metrics for us to gather but it does not do so via a service. Instead we must make use another Prometheus CRD the [PodMonitor](https://prometheus-operator.dev/docs/getting-started/design/#podmonitor). The install is similar to the above OVN monitoring as you can see in the [Nginx Gateway Exporter Deployment Doc](prometheus-nginx-gateway.md). The primary difference is the need to target and match on a pod that's exposing the metrics rather than a service. You can view more information about the metrics exposed by the Nginx Gateway by viewing the [Nginx Gateway Fabric Docs](https://docs.nginx.com/nginx-gateway-fabric/how-to/monitoring/prometheus/). +* ### Envoy Gateway Monitoring: +Genestack makes use of the Envoy Gateway API for its implementation of [Kubernetes Gateway API](https://gateway-api.sigs.k8s.io/). Genestack deploys the Envoy Gateway as part of its infrastructure, view the [Envoy Gateway Deployment Doc](infrastructure-envoy-gateway-api.md) for more information. +Envoy Gateway is a Kubernetes-native API Gateway and reverse proxy control plane. It simplifies deploying and operating Envoy Proxy as a data plane by using the standard Gateway API and its own extensible APIs. For more information about Envoy Gateway in general view the [Envoy Gateway Documentation](https://gateway.envoyproxy.io/docs/concepts/introduction/). +The Envoy Gateway serves Prometheus metrics by default and list of the metrics collected can be found at [Envoy Gateway Exported Metrics](https://gateway.envoyproxy.io/docs/tasks/observability/gateway-exported-metrics/). + * ### OpenStack Metrics: OpenStack Metrics are a bit different compared to the rest of the exporters as there's no single service, pod or deployment that exposes Prometheus metrics for collection. Instead, Genestack uses the [OpenStack Exporter](https://github.com/openstack-exporter/openstack-exporter) to gather the metrics for us. The OpenStack exporter reaches out to all the configured OpenStack services, queries their API for stats and packages them as metrics Prometheus can then process. The OpenStack exporter is configurable and can collect metrics from just about every OpenStack service such as Keystone, Nova, Octavia etc.. @@ -177,7 +188,7 @@ You can manually add additional datasources by following the [add datasource](ht More information about the primary datasources can be found in the [Prometheus datasource](https://grafana.com/docs/grafana/latest/datasources/prometheus/) and [Loki datasource](https://grafana.com/docs/grafana/latest/datasources/loki/) documentation. As things stand now, the Grafana deployment does not deploy dashboards as part of the default deployment instructions. However, there are dashboards available found in the [etc directory](https://github.com/rackerlabs/genestack/tree/main/etc/grafana-dashboards) of the Genestack repo that can be installed manually by importing them into Grafana. -View the [importing dashboards](https://grafana.com/docs/grafana/latest/dashboards/build-dashboards/import-dashboards/) documentation for more information. +View the [importing dashboards](https://grafana.com/docs/grafana/latest/dashboards/build-dashboards/import-dashboards/) documentation for more information. You can also use [import-grafana-dashboard.py](https://github.com/rackerlabs/genestack/tree/main/scripts/import-grafana-dashboard.py) script for the same. The dashboards available cover just about every exporter/metric noted here and then some. Some of the dashboards may not be complete or may not provide the desired view. Please feel free to adjust them as needed and submit a PR to [Genestack repo](https://github.com/rackerlabs/genestack) if they may help others! ## Next Steps diff --git a/docs/openstack-cinder-lvmisci.md b/docs/openstack-cinder-lvmisci.md index 864ffc865..946a687cd 100644 --- a/docs/openstack-cinder-lvmisci.md +++ b/docs/openstack-cinder-lvmisci.md @@ -79,7 +79,7 @@ Within the `inventory.yaml` file, ensure you have the following variables for yo openstack_compute_nodes: vars: enable_iscsi: true - storage_network_multipath: false # optional -- enable when running multipath + custom_multipath: false # optional -- enable when running multipath with custom multipath.conf storage_nodes: vars: enable_iscsi: true @@ -111,7 +111,7 @@ ansible-playbook -i inventory.yaml playbooks/deploy-cinder-volumes-reference.yam ``` console ansible-playbook -i /etc/genestack/inventory/inventory.yaml deploy-cinder-volumes-reference.yaml \ - -e "cinder_storage_network_interface=ansible_br_storage_a cinder_storage_network_interface_secondary=ansible_br_storage_b storage_network_multipath=true storage_network_multipath=true cinder_backend_name=lvmdriver-1" \ + -e "cinder_storage_network_interface=ansible_br_storage_a cinder_storage_network_interface_secondary=ansible_br_storage_b storage_network_multipath=true cinder_backend_name=lvmdriver-1" \ --user ubuntu \ --become 'cinder_storage_nodes' ``` @@ -277,7 +277,7 @@ storage: ## 7 Verify Multipath Operations -If multipath is enabled, you check the status of the multipath devices on the storage nodes. +If multipath is enabled on compute nodes, you can verify dual iscsi targets on the storage nodes. ``` bash tgtadm --mode target --op show diff --git a/docs/openstack-cinder-volume-provisioning-specs.md b/docs/openstack-cinder-volume-provisioning-specs.md index d19b148cf..5109be37e 100644 --- a/docs/openstack-cinder-volume-provisioning-specs.md +++ b/docs/openstack-cinder-volume-provisioning-specs.md @@ -10,3 +10,24 @@ These specifications are set in the volume type. The following commands constrai root@openstack-node-0:~# kubectl --namespace openstack exec -ti openstack-admin-client -- openstack volume type set --property provisioning:min_vol_size=10 6af6ade2-53ca-4260-8b79-1ba2f208c91d root@openstack-node-0:~# kubectl --namespace openstack exec -ti openstack-admin-client -- openstack volume type set --property provisioning:max_vol_size=2048 6af6ade2-53ca-4260-8b79-1ba2f208c91d ``` + +## Sample Provisioning Script + +``` shell +#!/bin/bash + +openstack volume type create --description 'Capacity with LUKS encryption' --encryption-provider luks --encryption-cipher aes-xts-plain64 --encryption-key-size 256 --encryption-control-location front-end --property volume_backend_name=LVM_iSCSI --property provisioning:max_vol_size='2048' --property provisioning:min_vol_size='100' Capacity +openstack volume type create --description 'Standard with LUKS encryption' --encryption-provider luks --encryption-cipher aes-xts-plain64 --encryption-key-size 256 --encryption-control-location front-end --property volume_backend_name=LVM_iSCSI --property provisioning:max_vol_size='2048' --property provisioning:min_vol_size='10' Standard +openstack volume type create --description 'Performance with LUKS encryption' --encryption-provider luks --encryption-cipher aes-xts-plain64 --encryption-key-size 256 --encryption-control-location front-end --property volume_backend_name=LVM_iSCSI --property provisioning:max_vol_size='2048' --property provisioning:min_vol_size='10' Performance + + +openstack volume qos create --property read_iops_sec_per_gb='1' --property write_iops_sec_per_gb='1' Capacity-Block +openstack volume qos create --property read_iops_sec_per_gb='5' --property write_iops_sec_per_gb='5' Standard-Block +openstack volume qos create --property read_iops_sec_per_gb='10' --property write_iops_sec_per_gb='10' Performance-Block + +openstack volume qos associate Capacity-Block Capacity +openstack volume qos associate Standard-Block Standard +openstack volume qos associate Performance-Block Performance + +openstack volume type set --private __DEFAULT__ +``` diff --git a/docs/openstack-glance-images.md b/docs/openstack-glance-images.md index 90c64cf9d..0606a3553 100644 --- a/docs/openstack-glance-images.md +++ b/docs/openstack-glance-images.md @@ -369,11 +369,40 @@ openstack --os-cloud default image create \ SLES15-SP6 ``` -## Get RHEL +## Get Red Hat Enterprise Linux (RHEL) + +### RHEL 9 + +!!! note + + Make sure you download the latest available image from [here](https://access.redhat.com/downloads/content/479/ver=/rhel---9/9.6/x86_64/product-software). We used the `rhel-9.6-x86_64-kvm.qcow2` image. + +``` shell +openstack --os-cloud default image create \ + --progress \ + --disk-format qcow2 \ + --container-format bare \ + --public \ + --file rhel-9.6-x86_64-kvm.qcow2 \ + --property hw_vif_multiqueue_enabled=true \ + --property hw_qemu_guest_agent=yes \ + --property hypervisor_type=kvm \ + --property img_config_drive=optional \ + --property hw_machine_type=q35 \ + --property hw_firmware_type=uefi \ + --property os_require_quiesce=yes \ + --property os_type=linux \ + --property os_admin_user=cloud-user \ + --property os_distro=rhel \ + --property os_version=9.6 \ + RHEL-9.6 +``` + +### RHEL 10 !!! note - Make sure you download the latest available image from [here](https://access.redhat.com/downloads/content/479/ver=/rhel---9/9.4/x86_64/product-software). We used the rhel-9.4-x86_64-kvm.qcow2 image. + Make sure you download the latest available image from [here](https://access.redhat.com/downloads/content/479/ver=/rhel---10/10.0/x86_64/product-software). We used the `rhel-10.0-x86_64-kvm.qcow2` image. ``` shell openstack --os-cloud default image create \ @@ -381,7 +410,7 @@ openstack --os-cloud default image create \ --disk-format qcow2 \ --container-format bare \ --public \ - --file rhel-9.4-x86_64-kvm.qcow2 \ + --file rhel-10.0-x86_64-kvm.qcow2 \ --property hw_vif_multiqueue_enabled=true \ --property hw_qemu_guest_agent=yes \ --property hypervisor_type=kvm \ @@ -392,8 +421,8 @@ openstack --os-cloud default image create \ --property os_type=linux \ --property os_admin_user=cloud-user \ --property os_distro=rhel \ - --property os_version=9.4 \ - RHEL-9.4 + --property os_version=10.0 \ + RHEL-10.0 ``` ## Get Windows diff --git a/docs/openstack-keystone-federation.md b/docs/openstack-keystone-federation.md index dd14fdb60..c6040982a 100644 --- a/docs/openstack-keystone-federation.md +++ b/docs/openstack-keystone-federation.md @@ -97,14 +97,22 @@ they are not required. You can also use the `shibd` command directly on the host #### Retrieve the SAML2 files -Using the `docker` command and the shibd image, retrieve the SAML2 files from the container and place them in a local directory. +A example *keystone-sp* configuration is provided under `/opt/genestack/etc/keystone-sp` and needs to be copied to the local configuration directory ``` shell -docker run -v /etc/genestack/keystone-sp:/mnt \ - ghcr.io/rackerlabs/keystone-rxt/shibd:latest \ - cp -R /etc/shibboleth /mnt/ +cp -r /opt/genestack/etc/keystone-sp /etc/genestack ``` +??? "Extracting the configuration from the original source" + + Using the `docker` command and the shibd image, retrieve the SAML2 files from the container and place them in a local directory. + + ``` shell + docker run -v /etc/genestack/keystone-sp:/mnt \ + ghcr.io/rackerlabs/genestack-images/shibd:latest \ + cp -R /etc/shibboleth /mnt/ + ``` + #### Update the `shibboleth2.xml` file The files will be created and stored in a kubernetes secret named `keystone-shib-etc` and mounted to `/etc/shibboleth` in the `keystone` pod. The files are: @@ -219,10 +227,10 @@ used by the SAML2 identity provider. When using the Rackspace the `shibboleth2.xml` file must be updated to include signing for all requests and responses. - The `shibboleth2.xml` file must be updated to include the following options: + The *shibboleth2.xml* file must be updated to include the following options, commonly the URLs of the entities ``` xml - + digestAlg="http://www.w3.org/2001/04/xmlenc#sha256"/> ``` The following attributes are used by the Rackspace identity provider. Add the following options to the @@ -274,6 +282,81 @@ used by the SAML2 identity provider. ``` + === "Keycloak" + + The following attributes are an example of attributes exported through keycloak. + The attributes name provided by keycloak depend on the backend configuration inside keycloak and how + other identity provider are imported and mapped. Add the following options to the + `attribute-map.xml` to have it compatible with the provided `saml-mapping.json` which will be used + within keystone. + + ``` xml + + + + + + + ``` + + The most simplest form of user mapping with Keycloak is to pass-through attributes and map to existing + Keystone groups inside a domain. Those groups would inherit OpenStack roles such as *admin* or *member* + + + !!! note + By default Keycloak exports group names with a path prefix such as **/** and this behavior can be turned off + with creating a mapping rule for a attribute such **group** and configure the option as shown below. + In addition to disabling **Full Group Path** option, enable the **Single Group Attribute** option. + + Select your realm > Configure > User Federation > Provider (such as LDAP) > Mappers > Add Mapper + + ![Attribute mapping](assets/images/keycloak-group-mapping.png){align=center : style="max-width:296px"} + + + When using the Keycloak the *shibboleth2.xml* file must be updated to include signing for all requests and responses. + + The *shibboleth2.xml* file must be updated to include the following options, commonly the URLs marked with `example.com`. + In this example the username attribute + + ``` xml + + ``` + + The `MetadataProvider` should be made dynamic, where *openstack* designates the created realm inside keycloak. + + ``` xml + + + + ``` + + Additionally the `Handler` for the `MetadataGenerator` must be updated to include signing: + + ``` xml + + ``` + + !!! note Configure Keystone SP inside keycloak + Keystone SP must be introduced to keycloak as Client and entity ID among redirect URLs must be configured + to enable the SAML protocol via *Client type* SAML + + ![Allowing entities](assets/images/keycloak-client-config.png){align=center} + #### Generate the SAML Keys For **N** numbers of years. The keys are used to sign the SAML requests and responses. @@ -323,7 +406,8 @@ kubectl -n openstack create secret generic keystone-shibd-etc \ --from-file=/etc/genestack/keystone-sp/shibboleth/shibd.logger \ --from-file=/etc/genestack/keystone-sp/shibboleth/sp-cert.pem \ --from-file=/etc/genestack/keystone-sp/shibboleth/sp-key.pem \ - --from-file=/etc/genestack/keystone-sp/shibboleth/sslError.html + --from-file=/etc/genestack/keystone-sp/shibboleth/sslError.html \ + --from-file=/etc/genestack/keystone-sp/shibboleth/sso_callback_template.html ``` ### Create the SAML identity provider @@ -343,9 +427,49 @@ You're also welcome to generate your own mapping to suit your needs; however, if !!! abstract "Example keystone `saml-mapping.json` file" - ``` json - --8<-- "etc/keystone/saml-mapping.json" - ``` + === "Rackspace" + + ``` json + --8<-- "etc/keystone/saml-mapping.json" + ``` + + === "Keycloak" + + ``` json + [ + { + "local": [ + { + "user": { + "name": "{0}", + "email": "{1}", + "domain": { + "name": "KEYSTONE DOMAIN" + } + }, + "group": { + "name": "{2}", + "domain": { + "name": "KEYSTONE DOMAIN" + } + } + } + ], + "remote": [ + { + "type": "REMOTE_USERNAME" + }, + { + "type": "REMOTE_EMAIL" + }, + { + "type": "REMOTE_GROUP" + } + ] + } + ] + ``` + The example mapping **JSON** file can be found within the genestack repository at `etc/keystone/saml-mapping.json`. diff --git a/docs/openstack-vendordata.md b/docs/openstack-vendordata.md new file mode 100644 index 000000000..2da140ea4 --- /dev/null +++ b/docs/openstack-vendordata.md @@ -0,0 +1,58 @@ +# Openstack Vendordata + +To read more about Openstack Vendordata see [upstream docs](https://docs.openstack.org/nova/latest/admin/vendordata.html) + +## Overview + +It is a feature that provides way to pass vendor-specific data to the instances at boot-time. It can be accessed +with one of the following ways: + +* [Metadata Service](https://docs.openstack.org/nova/latest/admin/metadata-service.html) +* [Config Drives](https://docs.openstack.org/nova/latest/admin/config-drive.html) + +Also, Vendordata sources can be specified with two ways: + +* [StaticJSON](https://docs.openstack.org/nova/latest/admin/vendordata.html#staticjson) +* [DynamicJSON](https://docs.openstack.org/nova/latest/admin/vendordata.html#dynamicjson) + +*StaticJSON* collects data from a JSON file that exits locally and is suitable when data remains same for all instances. +On the other hand *DynamicJSON* can collect data from external REST service and works well when that data does change for +instances. + +## Vendordata in genestack + +Genestack use *Metadata Service* to access Vendordata. It has StaticJSON enabled in nova.conf as default provider: + +```yaml +api: + vendordata_providers: ['StaticJSON'] + vendordata_jsonfile_path: /etc/nova/vendor_data.json +``` + +You can override the default configmap `/opt/genestack/base-kustomize/nova/base/static-vendordata-configmap` to pass +static vendor-data against `vendor_data.json` key, which is mounted at `/etc/nova/vendor_data.json` in metadata service +resources. + +For DynamicJSON you need to enable it amongst providers and have to specify dynamic target URL(s) in nova.conf as follows: + +```yaml +api: + vendordata_providers: ['StaticJSON', 'DynamicJSON'] + vendordata_jsonfile_path: /etc/nova/vendor_data.json + vendordata_dynamic_targets: ['target/url1', 'target/url2'] +``` + +A POST request call will be made to these dynamic targets and you can expect the request body contains instance's context +e.g. *instance-id, image-id, hostname etc.* These targets should return a valid JSON in response. + +## Cloud-init and Vendordata + +Cloud-init instructions can be passed-in as string against key - `cloud-init` within Vendordata JSON as follows: + +```json +{ + "cloud-init": "#cloud-config\nruncmd:\n..." +} +``` + +See [cloud-init docs](https://cloudinit.readthedocs.io/en/latest/reference/datasources/openstack.html) for more details. diff --git a/docs/prometheus-envoy-gateway.md b/docs/prometheus-envoy-gateway.md new file mode 100644 index 000000000..cb84fb988 --- /dev/null +++ b/docs/prometheus-envoy-gateway.md @@ -0,0 +1,14 @@ +# Envoy Gateway Monitoring + +Envoy Gateway exposes metrics that can be used to monitor the behavior and health of the Envoy Gateway. + +Following the deployment of the [Envoy Gateway](infrastructure-envoy-gateway-api.md) the metrics will be served and the service monitor will be created. + +If you need to deploy the service monitor independently you may apply the file directly with the following directions. + +## Installation + +``` shell +kubectl apply -f /etc/genestack/kustomize/envoyproxy-gateway/base/envoy-service-monitor.yaml +``` + diff --git a/docs/prometheus-mysql-exporter.md b/docs/prometheus-mysql-exporter.md index 57ccb3e37..3281de942 100644 --- a/docs/prometheus-mysql-exporter.md +++ b/docs/prometheus-mysql-exporter.md @@ -21,7 +21,7 @@ kubectl --namespace openstack \ Then add the config to a secret that'll be used within the container for our shared services ``` shell -kubectl -n openstack create secret generic mariadb-monitor --type Opaque --from-literal=my.cnf="[client.monitoring] +kubectl -n openstack create secret generic mariadb-monitor --type Opaque --from-literal=my.cnf="[client.mariadb-monitor] user=monitoring password=$(kubectl --namespace openstack get secret mariadb-monitoring -o jsonpath='{.data.password}' | base64 -d)" ``` diff --git a/docs/redis-setup-guide.md b/docs/redis-setup-guide.md new file mode 100644 index 000000000..1fdbd421b --- /dev/null +++ b/docs/redis-setup-guide.md @@ -0,0 +1,57 @@ +Redis Cluster Setup with Helm (OT-Container-Kit Redis Operator 0.21.0) +Redis Git hub Repository: https://github.com/OT-CONTAINER-KIT/redis-operator +Overview +Deploys a 6-pod Redis cluster (3 leaders, 3 followers) on an OpenStack Kubernetes cluster using Helm, integrated into Genestack base-helm-configs, distributed across 3 nodes. +Prerequisites + +Kubernetes cluster with kubectl configured. +Helm installed. +Genestack repository cloned: git clone https://github.com/rackerlabs/genestack.git. + +Deployment Steps + +Ensure values.yaml is configured in genestack/base-helm-configs with: +leader.replicas: 3 +follower.replicas: 3 +storageClassName: general +persistenceEnabled: true + + +Create the override file: Save redis-operator-helm-overrides.yaml in /etc/genestack/helm-configs/redis-operator/ with the provided content. +Deploy the Redis operator and cluster: +Run: ./bin/install-redis-operator.sh +Optional: Customize CLUSTER_NAME: ./bin/install-redis-operator.sh CLUSTER_NAME=mycluster + + +Verify deployment: kubectl get pods -n redis-systems -o wide + +Basic Testing + +Verify Pods: kubectl get pods -n redis-systems -o wide +Expected: 6 pods across 3 nodes, e.g., redis-cluster-leader-0 on node-1, etc. + + +Cluster Health: Inside a pod (e.g., kubectl exec -it redis-cluster-leader-0 -n redis-systems -- /bin/sh), run redis-cli --cluster check 127.0.0.1:6379 +Expected: cluster_state:ok + + +Read/Write: redis-cli --cluster call 127.0.0.1:6379 SET testkey testvalue and GET testkey +Expected: OK and testvalue + + +Replication: Write to leader, check follower with redis-cli --cluster call 127.0.0.1:6379 GET repltest +Expected: replvalue + + +Persistence: Set persistkey, restart pod, verify with GET persistkey +Expected: persistvalue + + +Logs: kubectl logs redis-cluster-leader-0 -n redis-systems +Expected: No critical errors + + +Customization + +Use redis-operator-helm-overrides.yaml to adjust clusterName, namespace, or enable externalService for external access. +Example: Set externalService.enabled: true and serviceType: LoadBalancer for external connectivity. diff --git a/docs/storage-longhorn.md b/docs/storage-longhorn.md index face94141..2527f67d9 100644 --- a/docs/storage-longhorn.md +++ b/docs/storage-longhorn.md @@ -179,6 +179,24 @@ kubectl apply -f /etc/genestack/manifests/longhorn/longhorn-general-storageclass With the `general` StorageClass in place, you can now create PVCs that reference it to dynamically provision Longhorn volumes with the desired settings. +### General Multi Attach StorageClass + +For the purposes of Genestack, it is recommended that you create the `general-multi-attach` StorageClass to avoid deployment confusion. + +!!! example "longhorn-general-multi-attach-storageclass.yaml" + + ``` yaml + --8<-- "manifests/longhorn/longhorn-general-multi-attach-storageclass.yaml" + ``` + +Apply the general-multi-attach storage class manifest to create the StorageClass. + +``` shell +kubectl apply -f /etc/genestack/manifests/longhorn/longhorn-general-multi-attach-storageclass.yaml +``` + +With the `general-multi-attach` StorageClass in place, you can now create PVCs that reference it to dynamically provision Longhorn volumes with the desired settings. + ### (Optional) Create an Encrypted StorageClass If you want to enable data encryption, you can create an encrypted StorageClass. This feature encrypts the data at rest within the Longhorn volumes. Opting for the diff --git a/docs/sync-fernet-keys.md b/docs/sync-fernet-keys.md index d1848ee34..f0d6bdb34 100644 --- a/docs/sync-fernet-keys.md +++ b/docs/sync-fernet-keys.md @@ -42,20 +42,14 @@ Main K8s Cluster | ──> API ──> | Remote K8s Cluster | ## How can we sync keys? - Ensure that each cluster has the correct permissions to read and write Kubernetes Secrets. -- Use tools such as [External Secret](https://external-secrets.io/latest/api/pushsecret/) to sync the keystone-ferent-keys. +- We are using [fernet-sync](https://github.com/rackerlabs/fernet-sync) to sync the keystone-ferent-keys. - Make sure to have service account token by reading the above secret. -## Using PushSecret (external-secrets) to sync secrets +## Setup fernet-sync deploymet -Lets look at how we can setup pushsecrets to sync fernet keys between two or more clusters. -First, install external-secrets operator +Lets look at how we can setup to sync fernet keys between two or more clusters. -```shell -helm repo add external-secrets https://charts.external-secrets.io -helm install external-secrets external-secrets/external-secrets -n external-secrets --create-namespace -``` - -Now, in order for secrets to sync, we will use a serviceaccount token with access only to a particular secret. +First, in order for secrets to sync, we will use a serviceaccount token with access only to a particular secret. So, in the target cluster create a new service account and give it appropriate permissions ``` apiVersion: v1 @@ -113,64 +107,28 @@ kubectl get secret keystone-sync-external-secret -o yaml -n openstack next, on the source cluster create credentials for the target ``` -apiVersion: v1 -kind: Secret -metadata: - name: target-credentials - namespace: openstack -data: - token: +git clone https://github.com/rackerlabs/fernet-sync.git +cd fernet-sync +vim create-secret.sh + +then make sure to have your cluser and token defined in the format +TOKENS = {"https://cluster1.example.com": "token for cluster1", "https://cluster2.example.com": "token for cluster2"} ``` -next, create a secret store for pushsecret to use +next, create the secret ``` -apiVersion: external-secrets.io/v1beta1 -kind: SecretStore -metadata: - name: target-store - namespace: openstack -spec: - provider: - kubernetes: - remoteNamespace: openstack - server: - url: - caBundle: - auth: - token: - bearerToken: - name: target-credentials - key: token +cd fernet-sync +./create-secret.sh + ``` -Now, you can create a pushsecret to sync (any secret but in our case we have restricted to) keystone-fernet-keys. +Now, you can create a deployment to sync secret -Lets create that pushsecret definition -``` -apiVersion: external-secrets.io/v1alpha1 -kind: PushSecret -metadata: - name: pushsecret-target-store - namespace: openstack -spec: - # Replace existing secrets in provider - updatePolicy: Replace - # Resync interval - refreshInterval: 300s - # SecretStore to push secrets to - secretStoreRefs: - - name: target-store - kind: SecretStore - # Target Secret - selector: - secret: - name: keystone-fernet-keys # Source cluster Secret name - data: - - match: - remoteRef: - remoteKey: keystone-fernet-keys # Target cluster Secret name +```shell +kubectl apply -f deployment.yaml ``` -This will sync keystone-fernet-keys from source to destination and refresh it every 300sec. +This will create a deployment that will listen to any change in `keystone-fernet-keys` secret and sync it to the +clusters defined the create-secret.sh script. diff --git a/etc/gateway-api/routes/custom-alertmanager-routes.yaml b/etc/gateway-api/routes/custom-alertmanager-routes.yaml index 7c61de56f..caa3cd5ba 100644 --- a/etc/gateway-api/routes/custom-alertmanager-routes.yaml +++ b/etc/gateway-api/routes/custom-alertmanager-routes.yaml @@ -1,16 +1,17 @@ +--- apiVersion: gateway.networking.k8s.io/v1 kind: HTTPRoute metadata: - name: custom-alertmanger-gateway-route + name: custom-alertmanager-gateway-route namespace: prometheus spec: parentRefs: - - name: flex-gateway - sectionName: http - namespace: nginx-gateway + - name: flex-gateway + sectionName: http + namespace: nginx-gateway hostnames: - - "alertmanager.your.domain.tld" + - "alertmanager.your.domain.tld" rules: - backendRefs: - - name: kube-prometheus-stack-alertmanager - port: 9093 + - name: kube-prometheus-stack-alertmanager + port: 9093 diff --git a/etc/grafana-dashboards/envoy-gateway.json b/etc/grafana-dashboards/envoy-gateway.json new file mode 100644 index 000000000..355c39f56 --- /dev/null +++ b/etc/grafana-dashboards/envoy-gateway.json @@ -0,0 +1,3108 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Envoy Gateway monitoring Dashboard with exported metrics.", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [], + "title": "Watching Components", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "How long in seconds a subscribed watchable is handled.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 0, + "y": 1 + }, + "id": 1, + "maxPerRow": 3, + "options": { + "displayMode": "basic", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "11.0.0", + "repeat": "Runner", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(le) (watchable_subscribe_duration_seconds_bucket{runner=~\"$Runner\", namespace=\"$Namespace\"})", + "format": "heatmap", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{le}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Duration Bucket: $Runner", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 3, + "x": 7, + "y": 1 + }, + "id": 24, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value_and_name", + "wideLayout": false + }, + "pluginVersion": "11.0.0", + "repeat": "Runner", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "avg by(runner) (watchable_subscribe_duration_seconds_sum{runner=~\"$Runner\", namespace=\"$Namespace\"})", + "format": "time_series", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Avg", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "max by(runner) (watchable_subscribe_duration_seconds_sum{runner=~\"$Runner\", namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Max", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Duration Status", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Current depth of watchable map.", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "super-light-blue", + "mode": "shades" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 2, + "x": 10, + "y": 1 + }, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value", + "wideLayout": false + }, + "pluginVersion": "11.0.0", + "repeat": "Runner", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(runner) (watchable_depth{runner=~\"$Runner\", namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Depth", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "light-blue", + "mode": "shades" + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Success" + }, + "properties": [ + { + "id": "displayName", + "value": "Success" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 12, + "y": 1 + }, + "id": 10, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value_and_name", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "repeat": "Runner", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(status) (watchable_subscribe_total{runner=\"$Runner\", namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum(watchable_subscribe_total{runner=\"$Runner\", namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Total", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Statistics", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 35, + "panels": [], + "title": "Status Updater", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Total number of panics recovered in the system.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 0, + "y": 8 + }, + "id": 25, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value_and_name", + "wideLayout": false + }, + "pluginVersion": "11.0.0", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum(watchable_panics_recovered_total{namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Recovered Panics", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Recovered Panics", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "How long a status update takes to finish for all Kind.", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "super-light-blue", + "mode": "thresholds" + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 0, + "y": 37 + }, + "id": 61, + "options": { + "displayMode": "basic", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(le) (status_update_duration_seconds_bucket{namespace=\"$Namespace\"})", + "format": "heatmap", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{le}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Total Duration Bucket", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 0.2 + }, + { + "color": "red", + "value": 0.5 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 6, + "y": 37 + }, + "id": 82, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "status_update_duration_seconds_sum{namespace=\"$Namespace\"}", + "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{kind}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Avg Duration", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 0.1 + }, + { + "color": "red", + "value": 0.5 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 12, + "y": 37 + }, + "id": 83, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "max" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "status_update_duration_seconds_sum{namespace=\"$Namespace\"}", + "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{kind}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Max Duration", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlPu" + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 0.01 + }, + { + "color": "red", + "value": 0.1 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 18, + "y": 37 + }, + "id": 84, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "logmin" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "status_update_duration_seconds_sum{namespace=\"$Namespace\"}", + "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{kind}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Min Duration", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Total number of status updates by object kind.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": true, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "left", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "fieldMinMax": false, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 0, + "y": 46 + }, + "id": 56, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(kind) (status_update_total{namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{kind}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Total", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": true, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "left", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 10, + "y": 46 + }, + "id": 57, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(status) (status_update_total{namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Status", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 54 + }, + "id": 126, + "panels": [], + "title": "xDS Server", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "super-light-green", + "mode": "shades" + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 0, + "y": 55 + }, + "id": 127, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value_and_name", + "wideLayout": false + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum(xds_snapshot_create_total{namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Total", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(status) (xds_snapshot_create_total{namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{status}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Snapshot Creation Status", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "orange", + "mode": "shades" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 10, + "y": 55 + }, + "id": 149, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "max(xds_stream_duration_seconds_bucket{namespace=\"$Namespace\", isDeltaStream=\"true\"})", + "format": "heatmap", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Finished Stream", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Maximum duration seconds for finished xDS delta stream connection.", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "red", + "mode": "shades" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 15, + "y": 55 + }, + "id": 150, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "max" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "xds_stream_duration_seconds_sum{namespace=\"$Namespace\", isDeltaStream=\"true\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Max Duration", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Minimum duration seconds for finished xDS delta stream connection.", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "light-green", + "mode": "shades" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 15, + "y": 59 + }, + "id": 151, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "min" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "xds_stream_duration_seconds_sum{namespace=\"$Namespace\", isDeltaStream=\"true\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Min Duration", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Total number of xds snapshot cache updates by node id.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic-by-name" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 0, + "y": 63 + }, + "id": 152, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(nodeID) (xds_snapshot_update_total{namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Update Total", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 5, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 9, + "x": 10, + "y": 63 + }, + "id": 153, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(status) (xds_snapshot_update_total{namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{nodeID}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Update Status", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 71 + }, + "id": 156, + "panels": [], + "title": "Infrastructure Manager", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 72 + }, + "id": 199, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(le) (resource_apply_duration_seconds_bucket{namespace=\"$Namespace\"})", + "format": "heatmap", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{le}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Total Apply Duration Bucket", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.3 + }, + { + "color": "red", + "value": 0.5 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 72 + }, + "id": 220, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(kind) (resource_apply_duration_seconds_sum{namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Avg Apply Duration", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.3 + }, + { + "color": "red", + "value": 0.5 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 72 + }, + "id": 221, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "max" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(kind) (resource_apply_duration_seconds_sum{namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Max Apply Duration", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlPu" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.3 + }, + { + "color": "red", + "value": 0.5 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 72 + }, + "id": 222, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "logmin" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(kind) (resource_apply_duration_seconds_sum{namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Min Apply Duration", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Total number of applied resources sumed by kind.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 80 + }, + "id": 157, + "options": { + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(kind) (resource_apply_total{namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Total Applied Resources", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Total number of applied resources that succeed sumed by kind.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 80 + }, + "id": 229, + "options": { + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(status) (resource_apply_total{namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Applied Resources Status", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Total number of applied resources sumed by infra name.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 80 + }, + "id": 178, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(name) (resource_apply_total{namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Total Applied Infrastructures", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 87 + }, + "id": 223, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(le) (resource_delete_duration_seconds_bucket{namespace=\"$Namespace\"})", + "format": "heatmap", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{le}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Total Delete Duration Bucket", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.1 + }, + { + "color": "red", + "value": 0.3 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 87 + }, + "id": 224, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(kind) (resource_delete_duration_seconds_sum{namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Avg Delete Duration", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.1 + }, + { + "color": "red", + "value": 0.3 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 87 + }, + "id": 225, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "max" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(kind) (resource_delete_duration_seconds_sum{namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Max Delete Duration", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlPu" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.1 + }, + { + "color": "red", + "value": 0.3 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 87 + }, + "id": 226, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "logmin" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(kind) (resource_delete_duration_seconds_sum{namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Min Delete Duration", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Total number of deleted resources sumed by kind.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 95 + }, + "id": 227, + "options": { + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(kind) (resource_delete_total{namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Total Deleted Resources", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Total number of deleted resources that succeed sumed by kind.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 95 + }, + "id": 232, + "options": { + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(status) (resource_delete_total{namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Deleted Resources Status", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Total number of deleted resources sumed by infra name.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 95 + }, + "id": 228, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(name) (resource_delete_total{namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Total Deleted Infrastructures", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 102 + }, + "id": 249, + "panels": [], + "title": "Wasm", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 103 + }, + "id": 250, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "wasm_cache_entries{namespace=\"$Namespace\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Cache Entries", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Total number of Wasm remote fetch cache lookups.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 7, + "x": 4, + "y": 103 + }, + "id": 251, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(hit) (wasm_cache_lookup_total{namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "hit={{hit}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Cache Lookups", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Total number of Wasm remote fetches and results.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 7, + "x": 11, + "y": 103 + }, + "id": 252, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(status) (wasm_remote_fetch_total{namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{status}}", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum(wasm_remote_fetch_total{namespace=\"$Namespace\"})", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Total", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Cache Remote Fetches", + "type": "stat" + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": [ + "Control Plane" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "PBFA97CFB590B2093" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "selected": false, + "text": "envoy-gateway-system", + "value": "envoy-gateway-system" + }, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "definition": "label_values(watchable_depth,namespace)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "Namespace", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(watchable_depth,namespace)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": ".*", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "definition": "label_values(watchable_depth,runner)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "Runner", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(watchable_depth,runner)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timeRangeUpdatedDuringEditOrView": false, + "timepicker": {}, + "timezone": "browser", + "title": "Envoy Gateway Global", + "uid": "bdn8lriao7myoa", + "version": 1, + "weekStart": "" +} diff --git a/etc/grafana-dashboards/envoy-resource-monitor.json b/etc/grafana-dashboards/envoy-resource-monitor.json new file mode 100644 index 000000000..1a4353aef --- /dev/null +++ b/etc/grafana-dashboards/envoy-resource-monitor.json @@ -0,0 +1,249 @@ +{ + "description": "Memory and CPU Usage Monitor for Envoy Gateway and Envoy Proxy.\n", + "graphTooltip": 1, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [ ], + "title": "Envoy Gateway", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "scaleDistribution": { + "log": 10, + "type": "log" + }, + "showPoints": "never" + }, + "unit": "s" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 2, + "interval": "1m", + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (namespace) (\n rate(\n container_cpu_usage_seconds_total{\n container=\"envoy-gateway\"\n }\n [$__rate_interval])\n)\n", + "intervalFactor": 2, + "legendFormat": "{{namespace}}\n" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "never" + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 1 + }, + "id": 3, + "interval": "1m", + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (namespace) (\n container_memory_working_set_bytes{container=\"envoy-gateway\"}\n)\n", + "intervalFactor": 2, + "legendFormat": "{{namespace}}\n" + } + ], + "title": "Memory Usage", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 4, + "panels": [ ], + "title": "Envoy Proxy", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "scaleDistribution": { + "log": 10, + "type": "log" + }, + "showPoints": "never" + }, + "unit": "s" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 10 + }, + "id": 5, + "interval": "1m", + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (pod) (\n rate(\n container_cpu_usage_seconds_total{\n container=\"envoy\"\n }\n [$__rate_interval])\n)\n", + "intervalFactor": 2, + "legendFormat": "{{pod}}\n" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "never" + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 10 + }, + "id": 6, + "interval": "1m", + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum by (pod) (\n container_memory_working_set_bytes{container=\"envoy\"}\n)\n", + "intervalFactor": 2, + "legendFormat": "{{pod}}\n" + } + ], + "title": "Memory Usage", + "type": "timeseries" + } + ], + "schemaVersion": 39, + "templating": { + "list": [ + { + "name": "datasource", + "query": "prometheus", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timezone": "utc", + "title": "Envoy Resources Monitor", + "uid": "f7aeb41676b7865cf31ae49691325f91" +} diff --git a/etc/keystone-sp/shibboleth/attrChecker.html b/etc/keystone-sp/shibboleth/attrChecker.html new file mode 100644 index 000000000..a3ddf6ef5 --- /dev/null +++ b/etc/keystone-sp/shibboleth/attrChecker.html @@ -0,0 +1,57 @@ + + + + + + + + Insufficient Information + + + + + +Logo + +

We're sorry, but you cannot access this service at this time.

+ + +

This service requires information about you that your identity provider +() +did not release. To gain access to this service, your identity provider +must release the required information.

+ + +

+

+Please visit + +the support page +this support page + +for further instructions. +
+

+
+
+ + +

Your session was already invalidated before your information could + be examined for completeness.

+
+ +

+You were trying to access the following URL: +

+

+ + +

For more information about this service, including what user information is +required for access, please visit our +information page.

+
+ + + diff --git a/etc/keystone-sp/shibboleth/attribute-map.xml b/etc/keystone-sp/shibboleth/attribute-map.xml new file mode 100644 index 000000000..53da93e44 --- /dev/null +++ b/etc/keystone-sp/shibboleth/attribute-map.xml @@ -0,0 +1,173 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + --> + diff --git a/etc/keystone-sp/shibboleth/attribute-policy.xml b/etc/keystone-sp/shibboleth/attribute-policy.xml new file mode 100644 index 000000000..64274a936 --- /dev/null +++ b/etc/keystone-sp/shibboleth/attribute-policy.xml @@ -0,0 +1,77 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/etc/keystone-sp/shibboleth/bindingTemplate.html b/etc/keystone-sp/shibboleth/bindingTemplate.html new file mode 100644 index 000000000..59a924b67 --- /dev/null +++ b/etc/keystone-sp/shibboleth/bindingTemplate.html @@ -0,0 +1,58 @@ + + + Shibboleth Authentication Request + + + +

Shibboleth Authentication Request

+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + +
+ + diff --git a/etc/keystone-sp/shibboleth/console.logger b/etc/keystone-sp/shibboleth/console.logger new file mode 100644 index 000000000..dedb731ce --- /dev/null +++ b/etc/keystone-sp/shibboleth/console.logger @@ -0,0 +1,33 @@ +log4j.rootCategory=WARN, console + +# fairly verbose for DEBUG, so generally leave at INFO +log4j.category.XMLTooling.XMLObject=INFO +log4j.category.XMLTooling.XMLObjectBuilder=INFO +log4j.category.XMLTooling.KeyInfoResolver=INFO +log4j.category.Shibboleth.IPRange=INFO +log4j.category.Shibboleth.PropertySet=INFO + +# raise for low-level tracing of SOAP client HTTP/SSL behavior +log4j.category.XMLTooling.libcurl=INFO + +# useful categories to tune independently: +# +# tracing of SAML messages and security policies +#log4j.category.OpenSAML.MessageDecoder=DEBUG +#log4j.category.OpenSAML.MessageEncoder=DEBUG +#log4j.category.OpenSAML.SecurityPolicyRule=DEBUG +# interprocess message remoting +#log4j.category.Shibboleth.Listener=DEBUG +# mapping of requests to applicationId +#log4j.category.Shibboleth.RequestMapper=DEBUG +# high level session cache operations +#log4j.category.Shibboleth.SessionCache=DEBUG +# persistent storage and caching +#log4j.category.XMLTooling.StorageService=DEBUG + +# define the appender + +log4j.appender.console=org.apache.log4j.ConsoleAppender +#log4j.appender.console.layout=org.apache.log4j.BasicLayout +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{%Y-%m-%d %H:%M:%S} %p %c %x: %m%n diff --git a/etc/keystone-sp/shibboleth/discoveryTemplate.html b/etc/keystone-sp/shibboleth/discoveryTemplate.html new file mode 100644 index 000000000..244e1f51e --- /dev/null +++ b/etc/keystone-sp/shibboleth/discoveryTemplate.html @@ -0,0 +1,48 @@ + + + Request for Authentication + + +

Request for Authentication

+ +

This web site requires you to login before proceeding. Please identify + the domain name of your organization:

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +

The system was unable to determine how to proceed using the value you supplied.

+
+ + diff --git a/etc/keystone-sp/shibboleth/example-metadata.xml b/etc/keystone-sp/shibboleth/example-metadata.xml new file mode 100644 index 000000000..1b99d15ed --- /dev/null +++ b/etc/keystone-sp/shibboleth/example-metadata.xml @@ -0,0 +1,172 @@ + + + + + + + + + + example.org + + + + Identities 'R' Us + https://idp.example.org/info/ + https://example.org/images/logo.png + https://example.org/images/favico.png + + + + + + + + + MIICkjCCAfugAwIBAgIJAK7VCxPsh8yrMA0GCSqGSIb3DQEBBAUAMDsxCzAJBgNV + BAYTAlVTMRIwEAYDVQQKEwlJbnRlcm5ldDIxGDAWBgNVBAMTD2lkcC5leGFtcGxl + Lm9yZzAeFw0wNTA2MjAxNTUwNDFaFw0zMjExMDUxNTUwNDFaMDsxCzAJBgNVBAYT + AlVTMRIwEAYDVQQKEwlJbnRlcm5ldDIxGDAWBgNVBAMTD2lkcC5leGFtcGxlLm9y + ZzCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEA2VnUvWYrNhtRUqIxAuFmV8YP + Jhr+OMKJpc/RaEs2C8mk5N5qO+ysClg2cVfkws3O4Lc15AiNdQ0s3ZijYwJK2EEg + 4vmoTl2RrjP1b3PK2h+VbUuYny9enHwDL+Z4bjP/8nmIKlhUSq4DTGXbwdQiWjCd + lQXvDtvHRwX/TaqtHbcCAwEAAaOBnTCBmjAdBgNVHQ4EFgQUlmI7WqzIDJzcfAyU + v2kmk3p9sbAwawYDVR0jBGQwYoAUlmI7WqzIDJzcfAyUv2kmk3p9sbChP6Q9MDsx + CzAJBgNVBAYTAlVTMRIwEAYDVQQKEwlJbnRlcm5ldDIxGDAWBgNVBAMTD2lkcC5l + eGFtcGxlLm9yZ4IJAK7VCxPsh8yrMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEE + BQADgYEAsatF5gh1ZBF1QuXxchKp2BKVOsK+23y+FqhuOuVi/PTMf+Li84Ih25Al + Jyy3OKc0oprM6tCJaiSooy32KTW6a1xhPm2MwuXzD33SPoKItue/ndp8Bhx/PO9U + w14fpgtAk2x8xD7cpHsZ073JHxEcjEetD8PTtrFdNu6GwIrv6Sk= + + + + + + + + + + + + + urn:mace:shibboleth:1.0:nameIdentifier + urn:oasis:names:tc:SAML:2.0:nameid-format:transient + + + + + + + + + + + + example.org + + + + + + + + MIICkjCCAfugAwIBAgIJAK7VCxPsh8yrMA0GCSqGSIb3DQEBBAUAMDsxCzAJBgNV + BAYTAlVTMRIwEAYDVQQKEwlJbnRlcm5ldDIxGDAWBgNVBAMTD2lkcC5leGFtcGxl + Lm9yZzAeFw0wNTA2MjAxNTUwNDFaFw0zMjExMDUxNTUwNDFaMDsxCzAJBgNVBAYT + AlVTMRIwEAYDVQQKEwlJbnRlcm5ldDIxGDAWBgNVBAMTD2lkcC5leGFtcGxlLm9y + ZzCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEA2VnUvWYrNhtRUqIxAuFmV8YP + Jhr+OMKJpc/RaEs2C8mk5N5qO+ysClg2cVfkws3O4Lc15AiNdQ0s3ZijYwJK2EEg + 4vmoTl2RrjP1b3PK2h+VbUuYny9enHwDL+Z4bjP/8nmIKlhUSq4DTGXbwdQiWjCd + lQXvDtvHRwX/TaqtHbcCAwEAAaOBnTCBmjAdBgNVHQ4EFgQUlmI7WqzIDJzcfAyU + v2kmk3p9sbAwawYDVR0jBGQwYoAUlmI7WqzIDJzcfAyUv2kmk3p9sbChP6Q9MDsx + CzAJBgNVBAYTAlVTMRIwEAYDVQQKEwlJbnRlcm5ldDIxGDAWBgNVBAMTD2lkcC5l + eGFtcGxlLm9yZ4IJAK7VCxPsh8yrMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEE + BQADgYEAsatF5gh1ZBF1QuXxchKp2BKVOsK+23y+FqhuOuVi/PTMf+Li84Ih25Al + Jyy3OKc0oprM6tCJaiSooy32KTW6a1xhPm2MwuXzD33SPoKItue/ndp8Bhx/PO9U + w14fpgtAk2x8xD7cpHsZ073JHxEcjEetD8PTtrFdNu6GwIrv6Sk= + + + + + + + + + + + urn:mace:shibboleth:1.0:nameIdentifier + urn:oasis:names:tc:SAML:2.0:nameid-format:transient + + + + + + Example Identity Provider + Identities 'R' Us + http://idp.example.org/ + + + Technical Support + support@idp.example.org + + + diff --git a/etc/keystone-sp/shibboleth/example-shibboleth2.xml b/etc/keystone-sp/shibboleth/example-shibboleth2.xml new file mode 100644 index 000000000..71ebebd5a --- /dev/null +++ b/etc/keystone-sp/shibboleth/example-shibboleth2.xml @@ -0,0 +1,299 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/etc/keystone-sp/shibboleth/globalLogout.html b/etc/keystone-sp/shibboleth/globalLogout.html new file mode 100644 index 000000000..86f205044 --- /dev/null +++ b/etc/keystone-sp/shibboleth/globalLogout.html @@ -0,0 +1,29 @@ + + + + + + + + Global Logout + + + + + +Logo + +

Global Logout

+ +

Status of Global Logout:

+ +

If the message above indicates success, you have been logged out of all +the applications and systems that support the logout mechanism.

+ +

Regardless of the outcome, it is strongly advised that you close your browser +to ensure that you complete the logout process.

+ + + diff --git a/etc/keystone-sp/shibboleth/localLogout.html b/etc/keystone-sp/shibboleth/localLogout.html new file mode 100644 index 000000000..75bd3e170 --- /dev/null +++ b/etc/keystone-sp/shibboleth/localLogout.html @@ -0,0 +1,27 @@ + + + + + + + + Local Logout + + + + + +Logo + +

Local Logout

+ +Status of Local Logout: + +

+ +You MUST close your browser to complete the logout process. + + + diff --git a/etc/keystone-sp/shibboleth/metadataError.html b/etc/keystone-sp/shibboleth/metadataError.html new file mode 100644 index 000000000..e0e6a1b05 --- /dev/null +++ b/etc/keystone-sp/shibboleth/metadataError.html @@ -0,0 +1,35 @@ + + + + + + + + Unknown Identity Provider + + + + + +Logo + +

Unknown or Unusable Identity Provider

+ +

The identity provider supplying your login credentials is not authorized +for use with this service or does not support the necessary capabilities.

+ +

To report this problem, please contact the site administrator at +. +

+ +

Please include the following error message in any email:

+

Identity provider lookup failed at ()

+ +

EntityID:

+
+

:

+ + + diff --git a/etc/keystone-sp/shibboleth/native.logger b/etc/keystone-sp/shibboleth/native.logger new file mode 100644 index 000000000..e9a43a571 --- /dev/null +++ b/etc/keystone-sp/shibboleth/native.logger @@ -0,0 +1,30 @@ +# set overall behavior +log4j.rootCategory=WARN, native_log + +# fairly verbose for DEBUG, so generally leave at WARN/INFO +log4j.category.XMLTooling.XMLObject=WARN +log4j.category.XMLTooling.XMLObjectBuilder=WARN +log4j.category.XMLTooling.KeyInfoResolver=WARN +log4j.category.Shibboleth.IPRange=WARN +log4j.category.Shibboleth.PropertySet=WARN + +# useful categories to tune independently: +# +# interprocess message remoting +#log4j.category.Shibboleth.Listener=DEBUG +# mapping of requests to applicationId +#log4j.category.Shibboleth.RequestMapper=DEBUG +# high level session cache operations +#log4j.category.Shibboleth.SessionCache=DEBUG + +# define the appender + +# Change to SyslogAppender for remote syslog, and set host/port +log4j.appender.native_log=org.apache.log4j.LocalSyslogAppender +#log4j.appender.native_log.syslogHost=localhost +#log4j.appender.native_log.portNumber=514 +log4j.appender.native_log.syslogName=shibboleth +# Facility is numeric, 16 is LOCAL0 +log4j.appender.native_log.facility=16 +log4j.appender.native_log.layout=org.apache.log4j.PatternLayout +log4j.appender.native_log.layout.ConversionPattern=%p %c %x: %m%n diff --git a/etc/keystone-sp/shibboleth/partialLogout.html b/etc/keystone-sp/shibboleth/partialLogout.html new file mode 100644 index 000000000..fe24a7c3d --- /dev/null +++ b/etc/keystone-sp/shibboleth/partialLogout.html @@ -0,0 +1,24 @@ + + + + + + + + Partial Logout + + + + + +Logo + +

Partial Logout

+ +

You remain logged into one or more applications accessed during your session. +To complete the logout process, please close/exit your browser completely.

+ + + diff --git a/etc/keystone-sp/shibboleth/postTemplate.html b/etc/keystone-sp/shibboleth/postTemplate.html new file mode 100644 index 000000000..d8c4728d0 --- /dev/null +++ b/etc/keystone-sp/shibboleth/postTemplate.html @@ -0,0 +1,37 @@ + + + Login Completed + + + +

Login Completed

+ + + +
+ + + + +
+ + diff --git a/etc/keystone-sp/shibboleth/protocols.xml b/etc/keystone-sp/shibboleth/protocols.xml new file mode 100644 index 000000000..648bcbc32 --- /dev/null +++ b/etc/keystone-sp/shibboleth/protocols.xml @@ -0,0 +1,57 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/etc/keystone-sp/shibboleth/security-policy.xml b/etc/keystone-sp/shibboleth/security-policy.xml new file mode 100644 index 000000000..f8eaacda9 --- /dev/null +++ b/etc/keystone-sp/shibboleth/security-policy.xml @@ -0,0 +1,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/etc/keystone-sp/shibboleth/sessionError.html b/etc/keystone-sp/shibboleth/sessionError.html new file mode 100644 index 000000000..7ccf17be7 --- /dev/null +++ b/etc/keystone-sp/shibboleth/sessionError.html @@ -0,0 +1,45 @@ + + + + + + + + <shibmlp errorType/> + + + + + +Logo + +

+ +

The system encountered an error at

+ +

To report this problem, please contact the site administrator at +. +

+ +

Please include the following message in any email:

+

at ()

+ +

+ + +

Error from identity provider:

+
+ Status:
+ + Sub-Status:
+
+ + Message:
+
+
+
+ + + diff --git a/etc/keystone-sp/shibboleth/shibboleth2.xml b/etc/keystone-sp/shibboleth/shibboleth2.xml new file mode 100644 index 000000000..c10a36c1b --- /dev/null +++ b/etc/keystone-sp/shibboleth/shibboleth2.xml @@ -0,0 +1,73 @@ + + + + + + + + + memcached.openstack.svc.cluster.local:11211 + + + + + memcached.openstack.svc.cluster.local:11211 + + + + + + + + + + + SAML2 + + + SAML2 Local + + + + + + + + + + + + + + + + + + + + diff --git a/etc/keystone-sp/shibboleth/shibd.logger b/etc/keystone-sp/shibboleth/shibd.logger new file mode 100644 index 000000000..39950c591 --- /dev/null +++ b/etc/keystone-sp/shibboleth/shibd.logger @@ -0,0 +1,73 @@ +# set overall behavior +log4j.rootCategory=INFO, shibd_log, warn_log + +# fairly verbose for DEBUG, so generally leave at INFO +log4j.category.XMLTooling.XMLObject=INFO +log4j.category.XMLTooling.XMLObjectBuilder=INFO +log4j.category.XMLTooling.KeyInfoResolver=INFO +log4j.category.Shibboleth.IPRange=INFO +log4j.category.Shibboleth.PropertySet=INFO + +# raise for low-level tracing of SOAP client HTTP/SSL behavior +log4j.category.XMLTooling.libcurl=INFO + +# useful categories to tune independently: +# +# tracing of SAML messages and security policies +#log4j.category.OpenSAML.MessageDecoder=DEBUG +#log4j.category.OpenSAML.MessageEncoder=DEBUG +#log4j.category.OpenSAML.SecurityPolicyRule=DEBUG +#log4j.category.XMLTooling.SOAPClient=DEBUG +# interprocess message remoting +#log4j.category.Shibboleth.Listener=DEBUG +# mapping of requests to applicationId +#log4j.category.Shibboleth.RequestMapper=DEBUG +# high level session cache operations +#log4j.category.Shibboleth.SessionCache=DEBUG +# persistent storage and caching +#log4j.category.XMLTooling.StorageService=DEBUG + +# logs XML being signed or verified if set to DEBUG +log4j.category.XMLTooling.Signature.Debugger=INFO, sig_log +log4j.additivity.XMLTooling.Signature.Debugger=false +log4j.ownAppenders.XMLTooling.Signature.Debugger=true + +# the tran log blocks the "default" appender(s) at runtime +# Level should be left at INFO for this category +log4j.category.Shibboleth-TRANSACTION=INFO, tran_log +log4j.additivity.Shibboleth-TRANSACTION=false +log4j.ownAppenders.Shibboleth-TRANSACTION=true + +# uncomment to suppress particular event types +#log4j.category.Shibboleth-TRANSACTION.AuthnRequest=WARN +#log4j.category.Shibboleth-TRANSACTION.Login=WARN +#log4j.category.Shibboleth-TRANSACTION.Logout=WARN + +# define the appenders + +log4j.appender.shibd_log=org.apache.log4j.RollingFileAppender +log4j.appender.shibd_log.fileName=/var/log/shibboleth/shibd.log +log4j.appender.shibd_log.maxFileSize=1000000 +log4j.appender.shibd_log.maxBackupIndex=10 +log4j.appender.shibd_log.layout=org.apache.log4j.PatternLayout +log4j.appender.shibd_log.layout.ConversionPattern=%d{%Y-%m-%d %H:%M:%S} %p %c %x: %m%n + +log4j.appender.warn_log=org.apache.log4j.RollingFileAppender +log4j.appender.warn_log.fileName=/var/log/shibboleth/shibd_warn.log +log4j.appender.warn_log.maxFileSize=1000000 +log4j.appender.warn_log.maxBackupIndex=10 +log4j.appender.warn_log.layout=org.apache.log4j.PatternLayout +log4j.appender.warn_log.layout.ConversionPattern=%d{%Y-%m-%d %H:%M:%S} %p %c %x: %m%n +log4j.appender.warn_log.threshold=WARN + +log4j.appender.tran_log=org.apache.log4j.RollingFileAppender +log4j.appender.tran_log.fileName=/var/log/shibboleth/transaction.log +log4j.appender.tran_log.maxFileSize=1000000 +log4j.appender.tran_log.maxBackupIndex=20 +log4j.appender.tran_log.layout=org.apache.log4j.PatternLayout +log4j.appender.tran_log.layout.ConversionPattern=%d{%Y-%m-%d %H:%M:%S}|%c|%m%n + +log4j.appender.sig_log=org.apache.log4j.FileAppender +log4j.appender.sig_log.fileName=/var/log/shibboleth/signature.log +log4j.appender.sig_log.layout=org.apache.log4j.PatternLayout +log4j.appender.sig_log.layout.ConversionPattern=%m diff --git a/etc/keystone-sp/shibboleth/sslError.html b/etc/keystone-sp/shibboleth/sslError.html new file mode 100644 index 000000000..367366a63 --- /dev/null +++ b/etc/keystone-sp/shibboleth/sslError.html @@ -0,0 +1,33 @@ + + + + + + + + POST Failed + + + + + +Logo + +

POST Failed

+ +

+You have attemped to submit information without the protection +of TLS to this site.
+

+ +

+For the protection of your submission and the integrity of the site, +this is not permitted. Please try accessing the server with a +URL starting with https:// and report this problem +to +

+ + + diff --git a/etc/keystone-sp/shibboleth/sso_callback_template.html b/etc/keystone-sp/shibboleth/sso_callback_template.html new file mode 100644 index 000000000..3364d69e5 --- /dev/null +++ b/etc/keystone-sp/shibboleth/sso_callback_template.html @@ -0,0 +1,22 @@ + + + + Keystone WebSSO redirect + + +
+ Please wait... +
+ + +
+ + + diff --git a/manifests/longhorn/longhorn-general-multi-attach-storageclass.yaml b/manifests/longhorn/longhorn-general-multi-attach-storageclass.yaml new file mode 100644 index 000000000..c2dab8863 --- /dev/null +++ b/manifests/longhorn/longhorn-general-multi-attach-storageclass.yaml @@ -0,0 +1,17 @@ +--- +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: general-multi-attach + annotations: + storageclass.kubernetes.io/is-default-class: "false" +provisioner: driver.longhorn.io +allowVolumeExpansion: true +reclaimPolicy: Delete +volumeBindingMode: Immediate +parameters: + numberOfReplicas: "2" # This example uses a single replica, but you can adjust this value as needed + dataLocality: "best-effort" + staleReplicaTimeout: "2880" + fromBackup: "" + fsType: "ext4" diff --git a/manifests/metallb/metallb-openstack-service-lb.yml b/manifests/metallb/metallb-openstack-service-lb.yml index 733e5d20c..d31e010a8 100644 --- a/manifests/metallb/metallb-openstack-service-lb.yml +++ b/manifests/metallb/metallb-openstack-service-lb.yml @@ -8,6 +8,7 @@ spec: addresses: - 10.74.8.99/32 # This is assumed to be the public LB vip address autoAssign: false + avoidBuggyIPs: true --- apiVersion: metallb.io/v1beta1 kind: L2Advertisement @@ -32,6 +33,7 @@ spec: addresses: - 10.234.0.0/24 autoAssign: false + avoidBuggyIPs: true --- apiVersion: metallb.io/v1beta1 kind: L2Advertisement diff --git a/manifests/utils/utils-openstack-client-admin.yaml b/manifests/utils/utils-openstack-client-admin.yaml index 4351d8529..b45d2cf51 100644 --- a/manifests/utils/utils-openstack-client-admin.yaml +++ b/manifests/utils/utils-openstack-client-admin.yaml @@ -7,7 +7,7 @@ spec: restartPolicy: Always containers: - name: "image-ks-service-registration" - image: docker.io/openstackhelm/heat:2023.1-ubuntu_jammy + image: ghcr.io/rackerlabs/genestack-images/heat:2024.1-latest imagePullPolicy: IfNotPresent command: - sleep diff --git a/mkdocs.yml b/mkdocs.yml index c6343fea2..f5169c3b0 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -193,7 +193,6 @@ nav: - OVN: infrastructure-ovn-setup.md - FluentBit: infrastructure-fluentbit.md - Loki: infrastructure-loki.md - - Sealed Secrets: infrastructure-sealed-secrets.md - OpenStack: - openstack-overview.md - OpenStack Services: @@ -272,15 +271,16 @@ nav: - MariaDB: - Operations: infrastructure-mariadb-ops.md - Gateway API: - - NGINX Gateway: - - Custom Routes: infrastructure-nginx-gateway-api-custom.md - - Rackspace Example Gateway Overview: rackspace-infrastructure-nginx-gateway-api.md - - Creating self-signed CA issuer for Gateway API: infrastructure-nginx-gateway-api-ca-issuer.md + - Custom Routes: infrastructure-nginx-gateway-api-custom.md + - Rackspace Example Gateway Overview: rackspace-infrastructure-nginx-gateway-api.md + - Creating self-signed CA issuer for Gateway API: infrastructure-nginx-gateway-api-ca-issuer.md + - Creating Security Policies: infrastructure-envoy-gateway-api-security.md - Observability: - Observability Overview: observability-info.md - Monitoring Overview: monitoring-info.md - Alerting Overview: alerting-info.md - Logging Overview: genestack-logging.md + - Upgrades: 2024.1-to-2025.1.md - OpenStack: - CLI Access: - Generating Clouds YAML: openstack-clouds.md @@ -295,6 +295,7 @@ nav: - Nova PCI Passthrough: openstack-pci-passthrough.md - Host Aggregates: openstack-host-aggregates.md - Instance Data Recovery: openstack-data-disk-recovery.md + - Vendordata: openstack-vendordata.md - Quota Management: - Quota Management: openstack-quota-managment.md - Images: @@ -346,12 +347,12 @@ nav: - Blog: https://blog.rackspacecloud.com/blog - Regions: - Availability: api-status.md - - SJC3: + - SJC: - "": https://status.api.sjc3.rackspacecloud.com - - "Control Panel": https://skyline.api.sjc3.rackspacecloud.com - - DFW3: + - "Control Panel": https://keystone.api.sjc3.rackspacecloud.com/v3/auth/OS-FEDERATION/websso/saml2?origin=https://skyline.api.sjc3.rackspacecloud.com/api/openstack/skyline/api/v1/websso + - DFW: - "": https://status.api.dfw3.rackspacecloud.com - - "Control Panel": https://skyline.api.dfw3.rackspacecloud.com - - IAD3: + - "Control Panel": https://keystone.api.dfw3.rackspacecloud.com/v3/auth/OS-FEDERATION/websso/saml2?origin=https://skyline.api.dfw3.rackspacecloud.com/api/openstack/skyline/api/v1/websso + - IAD: - "": https://status.api.iad3.rackspacecloud.com - - "Control Panel": https://skyline.api.iad3.rackspacecloud.com + - "Control Panel": https://keystone.api.iad3.rackspacecloud.com/v3/auth/OS-FEDERATION/websso/saml2?origin=https://skyline.api.iad3.rackspacecloud.com/api/openstack/skyline/api/v1/websso diff --git a/requirements.txt b/requirements.txt index 7a1538248..53111561e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,4 @@ ruamel.yaml.clib==0.2.8 kubernetes>=24.2.0 openstacksdk>=1.0.0 python-openstackclient==7.4.0 +dictdiffer==0.9.0 diff --git a/scripts/hyperconverged-lab.sh b/scripts/hyperconverged-lab.sh index fae950748..1b3a30df0 100755 --- a/scripts/hyperconverged-lab.sh +++ b/scripts/hyperconverged-lab.sh @@ -312,8 +312,9 @@ if [ "${HYPERCONVERGED_DEV:-false}" = "true" ]; then echo "HYPERCONVERGED_DEV is true, but we've failed to determine the base genestack directory" exit 1 fi + # NOTE: (brew) we are assuming an Ubunut (apt) based instance here ssh -o ForwardAgent=yes -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -t ${SSH_USERNAME}@${JUMP_HOST_VIP} \ - "timeout 1m bash -c 'while ! sudo apt update; do sleep 2; done' && sudo apt install -y rsync git" + "while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do echo 'Waiting for apt locks to be released...'; sleep 5; done && sudo apt-get update && sudo apt install -y rsync git" echo "Copying the development source code to the jump host" rsync -az \ -e "ssh -o ForwardAgent=yes -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" \ @@ -323,10 +324,6 @@ fi ssh -o ForwardAgent=yes -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -t ${SSH_USERNAME}@${JUMP_HOST_VIP} < /dev/null; then - echo "git could not be found, installing..." - sudo apt update && sudo apt install -y git -fi if [ ! -d "/opt/genestack" ]; then sudo git clone --recurse-submodules -j4 https://github.com/rackerlabs/genestack /opt/genestack else @@ -407,7 +404,7 @@ all: openstack_compute_nodes: vars: enable_iscsi: true - storage_network_multipath: false + custom_multipath: false hosts: ${LAB_NAME_PREFIX}-0.${GATEWAY_DOMAIN}: null ${LAB_NAME_PREFIX}-1.${GATEWAY_DOMAIN}: null @@ -924,6 +921,30 @@ pushd /opt/kube-plugins popd EOC +echo "Creating config for setup-openstack.sh" +ssh -o ForwardAgent=yes -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -t ${SSH_USERNAME}@${JUMP_HOST_VIP} < /etc/genestack/openstack-components.yaml < /dev/null; then + cloud-init status --wait + return $? + else + echo "Error: cloud-init command not found." + return 3 + fi +} + +# Function to wait for Apt and DNF locks, then install packages +wait_and_install_packages() { + local sleep_time=5 # Default sleep time between checks (in seconds) + local pkg_manager="" + local apt_packages=("python3-pip" "python3-venv" "python3-dev" "jq" "build-essential") + local dnf_packages=("python3-pip" "python3-venv" "python3-dev" "jq" "build-essential") + + # Check for Apt locks + echo "Checking for Apt locks..." + while sudo fuser /var/lib/dpkg/lock /var/lib/dpkg/lock-frontend >/dev/null 2>&1 || sudo fuser /var/cache/apt/archives/lock >/dev/null 2>&1; do + echo "Apt lock detected. Waiting for it to be released..." + sleep "$sleep_time" + done + + # Check for DNF process (indicating a DNF operation) + echo "Checking for DNF locks..." + while pgrep dnf >/dev/null; do + echo "DNF process detected. Waiting for it to finish..." + sleep "$sleep_time" + done + + echo "No package manager locks or active processes found. Proceeding with installation." + + # Detect package manager + if command -v apt >/dev/null 2>&1; then + pkg_manager="apt" + elif command -v dnf >/dev/null 2>&1; then + pkg_manager="dnf" + else + echo "Error: Neither Apt nor DNF package manager found. Cannot install packages." + return 1 + fi + + # Install packages based on detected manager + if [[ "$pkg_manager" == "apt" ]]; then + echo "Detected Apt. Installing packages: ${apt_packages[@]}" + sudo apt update + sudo apt install -y "${apt_packages[@]}" # -y to auto-confirm installations + elif [[ "$pkg_manager" == "dnf" ]]; then + echo "Detected DNF. Installing packages: ${dnf_packages[@]}" + sudo dnf check-update # Checks for updates, but does not download or install packages + sudo dnf install -y "${dnf_packages[@]}" # -y to auto-confirm installations + fi + + echo "Package installation complete." +} - # Global functions function success { echo -e "\n\n\x1B[32m>> $1\x1B[39m" } diff --git a/yaml-editor/README.txt b/yaml-editor/README.txt new file mode 100644 index 000000000..d9e7f8b43 --- /dev/null +++ b/yaml-editor/README.txt @@ -0,0 +1,7 @@ +ye -- Yaml Editor +This application is a wrapper around vim. It is designed to pull the base helm config and the corosponding +override file. Merge them together and allow you to edit them in one place. Then when you save, it will +update the overrides file with the new config. Leaving the base file unchanged. + +Syntax: ye +Example: ye nova diff --git a/yaml-editor/ye b/yaml-editor/ye index 25da8b22f..68ee7ed5b 100755 --- a/yaml-editor/ye +++ b/yaml-editor/ye @@ -5,23 +5,64 @@ import subprocess import tempfile import yaml import copy +import logging +from datetime import datetime +import argparse + +# Set up logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + handlers=[ + logging.FileHandler( + f"/var/log/ye_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log" + ), + logging.StreamHandler(), + ], +) + +DEFAULT_REPOS = [ + "openstack-helm", + "openstack-helm-infra", + "grafana", + "mariadb-operator", + "metallb", + "prometheus-community", + "longhorn", + "fluent", + "kubeovn", +] def load_yaml_file(filename): + """Load a YAML file and return its contents as a dictionary.""" try: with open(filename, "r") as f: data = yaml.safe_load(f) + logging.info(f"Loaded YAML file: {filename}") return data if data is not None else {} except FileNotFoundError: + logging.warning(f"File not found: {filename}") + return {} + except yaml.YAMLError as e: + logging.error(f"Error parsing YAML file {filename}: {e}") return {} def save_yaml_file(data, filename): - with open(filename, "w") as f: - yaml.safe_dump(data, f, default_flow_style=False, sort_keys=False) + """Save a dictionary to a YAML file.""" + try: + os.makedirs(os.path.dirname(filename), exist_ok=True) + with open(filename, "w") as f: + yaml.safe_dump(data, f, default_flow_style=False, sort_keys=False) + logging.info(f"Saved YAML file: {filename}") + except Exception as e: + logging.error(f"Error saving YAML file {filename}: {e}") + sys.exit(1) def merge_dicts(base, overrides): + """Merge two dictionaries, with overrides taking precedence.""" result = copy.deepcopy(base) for key, override_value in overrides.items(): if ( @@ -36,22 +77,23 @@ def merge_dicts(base, overrides): def launch_editor(initial_content): + """Launch an editor to modify YAML content and return the edited content.""" editor = os.environ.get("EDITOR", "vim") with tempfile.NamedTemporaryFile(suffix=".yaml", mode="w+", delete=False) as tf: temp_filename = tf.name tf.write(initial_content) tf.flush() - + logging.info(f"Launching editor: {editor} {temp_filename}") subprocess.call([editor, temp_filename]) - with open(temp_filename, "r") as tf: edited_content = tf.read() - os.unlink(temp_filename) + logging.info("Editor closed, temporary file deleted") return edited_content def compute_patch(base, edited): + """Compute the difference between base and edited data, returning the patch.""" patch = {} for key, edited_value in edited.items(): if key not in base: @@ -60,7 +102,7 @@ def compute_patch(base, edited): base_value = base[key] if isinstance(base_value, dict) and isinstance(edited_value, dict): sub_patch = compute_patch(base_value, edited_value) - if sub_patch: # Only include non-empty patches. + if sub_patch: patch[key] = sub_patch else: if edited_value != base_value: @@ -68,45 +110,112 @@ def compute_patch(base, edited): return patch +def load_helm_defaults(service_name, repos): + """Load default values from the Helm chart for the given service, trying multiple repos.""" + for repo in repos: + try: + chart_ref = f"{repo}/{service_name}" + result = subprocess.run( + ["helm", "show", "values", chart_ref], + capture_output=True, + text=True, + check=True, + ) + defaults_str = result.stdout + data = yaml.safe_load(defaults_str) + logging.info( + f"Loaded Helm defaults for service: {service_name} from repo: {repo}" + ) + return data if data is not None else {} + except subprocess.CalledProcessError: + logging.warning(f"Chart '{chart_ref}' not found in repo '{repo}'") + except yaml.YAMLError as e: + logging.warning(f"Error parsing Helm defaults for {chart_ref}: {e}") + except Exception as e: + logging.warning(f"Error loading Helm chart from {repo}: {e}") + logging.warning( + f"No Helm defaults found for {service_name} in any repo. Proceeding without." + ) + return {} + + def main(): - if len(sys.argv) != 3: - print("Usage: {} ".format(sys.argv[0])) - print( - "ye - (YamlEditor) will launch an editor to edit the values in base_yaml_file. If the override_yaml_file exists, " - "it will read its values in and will overlay the valies on top of base." - "After editing, the edited values will be saved to override_yaml_file and base will remain the same." - ) - sys.exit(1) + """Main function to orchestrate YAML editing and patching.""" + parser = argparse.ArgumentParser(description="YamlEditor for Helm configurations") + parser.add_argument( + "service_name", help="Name of the service (e.g., keystone, grafana)" + ) + parser.add_argument( + "--repos", + default=",".join(DEFAULT_REPOS), + help=f"Comma-separated list of Helm repos (default: {','.join(DEFAULT_REPOS)})", + ) + args = parser.parse_args() + + repos = [r.strip() for r in args.repos.split(",")] + service_name = args.service_name + base_filename = f"/opt/genestack/base-helm-configs/{service_name}/{service_name}-helm-overrides.yaml" + override_filename = ( + f"/etc/genestack/helm-configs/{service_name}/{service_name}-helm-overrides.yaml" + ) - base_filename = sys.argv[1] - override_filename = sys.argv[2] + # Load all configuration sources + helm_defaults = load_helm_defaults(service_name, repos) base_data = load_yaml_file(base_filename) if not base_data: - print(f"Error: Base file '{base_filename}' not found or is empty.") - sys.exit(1) - + logging.warning( + f"Base file '{base_filename}' not found or is empty. Proceeding without base." + ) + base_data = {} previous_overrides = load_yaml_file(override_filename) if not isinstance(previous_overrides, dict): previous_overrides = {} - effective_data = merge_dicts(base_data, previous_overrides) + logging.warning( + f"Override file '{override_filename}' is not a valid dictionary, using empty dict" + ) + + # Merge configurations: Helm defaults -> Base -> Overrides + base_merged = merge_dicts(helm_defaults, base_data) + effective_data = merge_dicts(base_merged, previous_overrides) + + # Launch editor with merged configuration initial_yaml_str = yaml.safe_dump( effective_data, default_flow_style=False, sort_keys=False ) + logging.info("Launching editor with merged configuration") print("Launching editor. Modify values as needed, then save and exit.") edited_yaml_str = launch_editor(initial_yaml_str) + + # Parse edited content try: edited_data = yaml.safe_load(edited_yaml_str) if edited_data is None: edited_data = {} + logging.info("Successfully parsed edited YAML") except yaml.YAMLError as e: - print("Error parsing YAML from editor:", e) + logging.error(f"Error parsing YAML from editor: {e}") sys.exit(1) - patch = compute_patch(base_data, edited_data) + + # Compute patch against merged base (Helm defaults + base config) + patch = compute_patch(base_merged, edited_data) print("Computed patch (overrides):") print(yaml.safe_dump(patch, default_flow_style=False, sort_keys=False)) + + # Save patch to override file save_yaml_file(patch, override_filename) print(f"Overrides saved to '{override_filename}'.") if __name__ == "__main__": + if len(sys.argv) < 2: + print( + f"Usage: {sys.argv[0]} \n" + "ye - (YamlEditor) launches an editor to edit Helm configuration values for the specified service. \n" + "It merges Helm chart defaults from specified or default repos, base overrides from /opt/genestack/base-helm-configs//-helm-overrides.yaml, \n" + "and user overrides from /etc/genestack/helm-configs//-helm-overrides.yaml. \n" + "If Helm defaults or base file are unavailable, it proceeds with available data. \n" + "Only changes not in defaults or base are saved to the user override file.\n" + "Created by Jake Briggs Rackspace" + ) + sys.exit(1) main()