diff --git a/.github/workflows/ansible-deploy.yml b/.github/workflows/ansible-deploy.yml new file mode 100644 index 0000000000..8dd016cee1 --- /dev/null +++ b/.github/workflows/ansible-deploy.yml @@ -0,0 +1,86 @@ +name: Ansible Deployment (Lab06) + +on: + push: + branches: [main, master, lab6] + paths: + - "lab6c/ansible/**" + - "!.github/workflows/ansible-deploy.yml" + pull_request: + branches: [main, master, lab6] + paths: + - "lab6c/ansible/**" + +concurrency: + group: ansible-deploy-${{ github.ref }} + cancel-in-progress: true + +jobs: + lint: + name: Ansible Lint + runs-on: ubuntu-latest + defaults: + run: + working-directory: lab6c/ansible + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install Ansible and ansible-lint + run: | + pip install ansible ansible-lint + ansible-galaxy collection install -r requirements.yml + + - name: Run ansible-lint + run: ansible-lint playbooks/*.yml 2>/dev/null || echo "Lint finished (warnings may appear)" + + deploy: + name: Deploy Application + needs: lint + runs-on: ubuntu-latest + if: github.event_name == 'push' + defaults: + run: + working-directory: lab6c/ansible + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install Ansible and collections + run: | + pip install ansible + ansible-galaxy collection install -r requirements.yml + + - name: Setup SSH + run: | + mkdir -p ~/.ssh + echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519 + chmod 600 ~/.ssh/id_ed25519 + ssh-keyscan -H "${{ secrets.VM_HOST }}" >> ~/.ssh/known_hosts 2>/dev/null || true + + - name: Deploy with Ansible + env: + ANSIBLE_VAULT_PASSWORD: ${{ secrets.ANSIBLE_VAULT_PASSWORD }} + run: | + echo "$ANSIBLE_VAULT_PASSWORD" > /tmp/vault_pass + chmod 600 /tmp/vault_pass + ansible-playbook playbooks/deploy.yml \ + --vault-password-file /tmp/vault_pass \ + -e ansible_ssh_private_key_file=~/.ssh/id_ed25519 \ + -e ansible_host=${{ secrets.VM_HOST }} \ + -e ansible_user=${{ secrets.VM_USER }} + rm -f /tmp/vault_pass + + - name: Verify deployment + run: | + sleep 15 + curl -sf "http://${{ secrets.VM_HOST }}:5000/health" || echo "Health check failed" + curl -sf "http://${{ secrets.VM_HOST }}:5000/" || echo "Root check failed" diff --git a/.github/workflows/go-ci.yml b/.github/workflows/go-ci.yml new file mode 100644 index 0000000000..e09a65c488 --- /dev/null +++ b/.github/workflows/go-ci.yml @@ -0,0 +1,76 @@ +name: Go CI (Lab03 Bonus) + +on: + push: + branches: [lab03, main, master] + paths: + - "lab3c/app_go/**" + - ".github/workflows/go-ci.yml" + pull_request: + branches: [lab03, main, master] + paths: + - "lab3c/app_go/**" + - ".github/workflows/go-ci.yml" + +concurrency: + group: go-ci-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + test: + name: Lint and Test + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: "1.22" + + - name: golangci-lint + uses: golangci/golangci-lint-action@v6 + with: + working-directory: lab3c/app_go + args: --timeout=5m + + - name: Run tests + working-directory: lab3c/app_go + run: go test ./... + + docker: + name: Build and Push Docker Image + runs-on: ubuntu-latest + needs: test + if: ${{ github.event_name == 'push' }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Set version (CalVer) + run: echo "VERSION=$(date +%Y.%m.%d)" >> $GITHUB_ENV + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: ./lab3c/app_go + file: ./lab3c/app_go/Dockerfile + push: true + tags: | + ${{ secrets.DOCKERHUB_USERNAME }}/devops-info-go:${{ env.VERSION }} + ${{ secrets.DOCKERHUB_USERNAME }}/devops-info-go:latest + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml new file mode 100644 index 0000000000..d61adcda2b --- /dev/null +++ b/.github/workflows/python-ci.yml @@ -0,0 +1,109 @@ +name: Python CI (Lab03) + +on: + push: + branches: [lab3, main, master] + paths: + - "lab3c/app_python/**" + - ".github/workflows/python-ci.yml" + pull_request: + branches: [lab3, main, master] + paths: + - "lab3c/app_python/**" + - ".github/workflows/python-ci.yml" + +concurrency: + group: python-ci-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + test: + name: Lint and Test + runs-on: ubuntu-latest + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} + strategy: + fail-fast: true + matrix: + python-version: ["3.11", "3.12"] + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + cache-dependency-path: | + lab3c/app_python/requirements.txt + lab3c/app_python/requirements-dev.txt + + - name: Install dependencies + working-directory: lab3c/app_python + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt -r requirements-dev.txt + + - name: Lint (ruff) + working-directory: lab3c/app_python + run: ruff check . + + - name: Run tests with coverage + working-directory: lab3c/app_python + run: pytest --cov=app --cov-report=xml --cov-report=term + + - name: Upload coverage to Codecov + if: ${{ env.CODECOV_TOKEN != '' }} + uses: codecov/codecov-action@v4 + with: + files: lab3c/app_python/coverage.xml + token: ${{ env.CODECOV_TOKEN }} + + - name: Install Snyk CLI + if: ${{ env.SNYK_TOKEN != '' }} + run: npm install -g snyk + + - name: Snyk scan + if: ${{ env.SNYK_TOKEN != '' }} + working-directory: lab3c/app_python + run: snyk test --file=requirements.txt --package-manager=pip + env: + SNYK_TOKEN: ${{ env.SNYK_TOKEN }} + + docker: + name: Build and Push Docker Image + runs-on: ubuntu-latest + needs: test + if: ${{ github.event_name == 'push' }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Set version (CalVer) + run: echo "VERSION=$(date +%Y.%m.%d)" >> $GITHUB_ENV + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: ./lab3c/app_python + file: ./lab3c/app_python/Dockerfile + push: true + tags: | + ${{ secrets.DOCKERHUB_USERNAME }}/devops-info-python:${{ env.VERSION }} + ${{ secrets.DOCKERHUB_USERNAME }}/devops-info-python:latest + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.github/workflows/terraform-ci.yml b/.github/workflows/terraform-ci.yml new file mode 100644 index 0000000000..42a0c50418 --- /dev/null +++ b/.github/workflows/terraform-ci.yml @@ -0,0 +1,51 @@ +name: Terraform Validate (Lab04) + +on: + push: + branches: [lab04, main, master] + paths: + - "lab4c/terraform/**" + - ".github/workflows/terraform-ci.yml" + pull_request: + branches: [lab04, main, master] + paths: + - "lab4c/terraform/**" + - ".github/workflows/terraform-ci.yml" + +jobs: + validate: + name: Format, Validate, Lint + runs-on: ubuntu-latest + defaults: + run: + working-directory: lab4c/terraform + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v3 + with: + terraform_version: "1.9" + terraform_wrapper: false + + - name: Terraform Format Check + run: terraform fmt -check -recursive + + - name: Terraform Init + run: terraform init -backend=false + + - name: Terraform Validate + run: terraform validate + + - name: Setup TFLint + uses: terraform-linters/setup-tflint@v4 + with: + tflint_version: latest + + - name: TFLint Init + run: tflint --init + + - name: TFLint + run: tflint --format compact + continue-on-error: true diff --git a/.gitignore b/.gitignore index 30d74d2584..bd402531fb 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,17 @@ -test \ No newline at end of file +test + +# Ansible +*.retry +.vault_pass +ansible/inventory/*.pyc +__pycache__/ + +# Local lab 5 runtime artifacts +lab5c/ansible/.vault_pass +lab5c/ansible/*.retry + +lab11c/tools/ + +lab12c/tools/ + +.cache/ \ No newline at end of file diff --git a/lab10c/k8s/HELM.md b/lab10c/k8s/HELM.md new file mode 100644 index 0000000000..a0c3a70e2c --- /dev/null +++ b/lab10c/k8s/HELM.md @@ -0,0 +1,176 @@ +# Lab 10 — Helm Package Manager + +All required tasks are completed. Bonus task (library charts) is intentionally not included. + +## 1) Chart Overview + +Chart path: `lab10c/k8s/devops-info` + +Main files: + +- `Chart.yaml` — chart metadata (`apiVersion: v2`, app chart). +- `values.yaml` — default config (replicas, image, service, probes, resources, hook settings). +- `templates/deployment.yaml` — app Deployment template. +- `templates/service.yaml` — Service template. +- `templates/_helpers.tpl` — shared labels/naming helpers. +- `templates/hooks-pre-install-job.yaml` — pre-install hook job. +- `templates/hooks-post-install-job.yaml` — post-install hook job. +- `values-dev.yaml` / `values-prod.yaml` — environment overrides. + +## 2) Configuration Guide + +Important values: + +- `replicaCount` — pod count. +- `image.repository`, `image.tag`, `image.pullPolicy` — container image settings. +- `service.type`, `service.port`, `service.targetPort`, `service.nodePort` — service exposure. +- `resources.requests/limits` — CPU and memory control. +- `livenessProbe.*`, `readinessProbe.*` — health checks (kept enabled). +- `hooks.*` — pre/post install hook behavior. + +Environment files: + +- `values-dev.yaml`: 1 replica, smaller resources, NodePort, `RELEASE_ID=dev`. +- `values-prod.yaml`: 3 replicas, stronger resources, LoadBalancer-ready, `RELEASE_ID=prod`. + +Example commands: + +```bash +helm install devops-dev lab10c/k8s/devops-info -f lab10c/k8s/devops-info/values-dev.yaml +helm upgrade devops-dev lab10c/k8s/devops-info -f lab10c/k8s/devops-info/values-prod.yaml +``` + +## 3) Hook Implementation + +Implemented hooks: + +- **pre-install** job (`weight: -5`) — runs before resource install. +- **post-install** job (`weight: 5`) — runs after install. + +Annotations used: + +- `"helm.sh/hook": pre-install` / `post-install` +- `"helm.sh/hook-weight": ...` +- `"helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation` + +Why: + +- pre-install: quick validation step before main resources. +- post-install: smoke-check style task after release is up. + +Execution order: + +- lower weight runs first (`-5` before `5`). + +Deletion behavior: + +- successful jobs are cleaned automatically (`hook-succeeded`). + +## 4) Installation Evidence + +Helm fundamentals: + +```text +helm version -> v4.0.0 +kubectl cluster-info (kind-lab10) -> control plane reachable +helm show chart prometheus-community/prometheus -> chart metadata displayed +``` + +Release state: + +```text +helm list +NAME NAMESPACE REVISION STATUS CHART APP VERSION +devops-dev default 2 deployed devops-info-0.1.0 1.0.0 +``` + +Kubernetes resources: + +```text +kubectl get deploy,svc,pods -l app.kubernetes.io/instance=devops-dev +deployment/devops-dev-devops-info READY 3/3 +service/devops-dev-devops-info TYPE LoadBalancer +pods 3/3 Running +``` + +Hook execution evidence: + +```text +kubectl get events ... includes: +- SuccessfulCreate job/devops-dev-devops-info-pre-install +- Completed job/devops-dev-devops-info-pre-install +- SuccessfulCreate job/devops-dev-devops-info-post-install +- Completed job/devops-dev-devops-info-post-install +``` + +Hook cleanup evidence: + +```text +kubectl get jobs -l app.kubernetes.io/instance=devops-dev +No resources found +``` + +## 5) Operations + +Install: + +```bash +helm install devops-dev lab10c/k8s/devops-info -f lab10c/k8s/devops-info/values-dev.yaml --wait +``` + +Upgrade: + +```bash +helm upgrade devops-dev lab10c/k8s/devops-info -f lab10c/k8s/devops-info/values-prod.yaml --wait +``` + +Rollback: + +```bash +helm history devops-dev +helm rollback devops-dev 1 --wait +``` + +Uninstall: + +```bash +helm uninstall devops-dev +``` + +## 6) Testing & Validation + +Lint: + +```text +helm lint lab10c/k8s/devops-info -> 0 chart(s) failed +``` + +Template render: + +```text +helm template devops-dev lab10c/k8s/devops-info -f values-dev.yaml +Rendered Deployment, Service, and hook Jobs with expected values. +``` + +Dry run: + +```text +helm install --dry-run --debug devops-dev ... -f values-dev.yaml +Rendered hooks and final manifests correctly. +``` + +App accessibility check: + +```bash +kubectl port-forward service/devops-dev-devops-info 8084:80 +curl http://127.0.0.1:8084/health +``` + +```text +{"status":"healthy", ...} +``` + +## Short value statement + +Helm turns static Kubernetes YAML into reusable packages. +It makes deployments consistent, configurable per environment, and easier to upgrade/rollback. diff --git a/lab10c/k8s/devops-info/.helmignore b/lab10c/k8s/devops-info/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/lab10c/k8s/devops-info/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/lab10c/k8s/devops-info/Chart.yaml b/lab10c/k8s/devops-info/Chart.yaml new file mode 100644 index 0000000000..ebdc6bf296 --- /dev/null +++ b/lab10c/k8s/devops-info/Chart.yaml @@ -0,0 +1,12 @@ +apiVersion: v2 +name: devops-info +description: Helm chart for DevOps Info Python service +type: application +version: 0.1.0 +appVersion: "1.0.0" +keywords: + - python + - fastapi + - devops +maintainers: + - name: Phoenix diff --git a/lab10c/k8s/devops-info/templates/_helpers.tpl b/lab10c/k8s/devops-info/templates/_helpers.tpl new file mode 100644 index 0000000000..c4fa59af21 --- /dev/null +++ b/lab10c/k8s/devops-info/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "devops-info.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "devops-info.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "devops-info.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "devops-info.labels" -}} +helm.sh/chart: {{ include "devops-info.chart" . }} +{{ include "devops-info.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "devops-info.selectorLabels" -}} +app.kubernetes.io/name: {{ include "devops-info.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "devops-info.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "devops-info.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/lab10c/k8s/devops-info/templates/deployment.yaml b/lab10c/k8s/devops-info/templates/deployment.yaml new file mode 100644 index 0000000000..0612f29771 --- /dev/null +++ b/lab10c/k8s/devops-info/templates/deployment.yaml @@ -0,0 +1,56 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "devops-info.fullname" . }} + labels: + {{- include "devops-info.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + selector: + matchLabels: + {{- include "devops-info.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "devops-info.labels" . | nindent 8 }} + spec: + containers: + - name: app + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.service.targetPort }} + protocol: TCP + env: + - name: HOST + value: {{ .Values.env.HOST | quote }} + - name: PORT + value: {{ .Values.env.PORT | quote }} + - name: RELEASE_ID + value: {{ .Values.env.RELEASE_ID | quote }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + livenessProbe: + httpGet: + path: {{ .Values.livenessProbe.path }} + port: http + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + readinessProbe: + httpGet: + path: {{ .Values.readinessProbe.path }} + port: http + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + resources: + {{- toYaml .Values.resources | nindent 12 }} diff --git a/lab10c/k8s/devops-info/templates/hooks-post-install-job.yaml b/lab10c/k8s/devops-info/templates/hooks-post-install-job.yaml new file mode 100644 index 0000000000..223784d7fc --- /dev/null +++ b/lab10c/k8s/devops-info/templates/hooks-post-install-job.yaml @@ -0,0 +1,26 @@ +{{- if .Values.hooks.postInstall.enabled }} +apiVersion: batch/v1 +kind: Job +metadata: + name: "{{ include "devops-info.fullname" . }}-post-install" + labels: + {{- include "devops-info.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": post-install + "helm.sh/hook-weight": "{{ .Values.hooks.postInstall.weight }}" + "helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation +spec: + template: + metadata: + labels: + {{- include "devops-info.selectorLabels" . | nindent 8 }} + spec: + restartPolicy: Never + containers: + - name: post-install-check + image: {{ .Values.hooks.image | quote }} + command: + - sh + - -c + - {{ printf "echo %s" .Values.hooks.postInstall.message | quote }} +{{- end }} diff --git a/lab10c/k8s/devops-info/templates/hooks-pre-install-job.yaml b/lab10c/k8s/devops-info/templates/hooks-pre-install-job.yaml new file mode 100644 index 0000000000..c6544522e9 --- /dev/null +++ b/lab10c/k8s/devops-info/templates/hooks-pre-install-job.yaml @@ -0,0 +1,26 @@ +{{- if .Values.hooks.preInstall.enabled }} +apiVersion: batch/v1 +kind: Job +metadata: + name: "{{ include "devops-info.fullname" . }}-pre-install" + labels: + {{- include "devops-info.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": pre-install + "helm.sh/hook-weight": "{{ .Values.hooks.preInstall.weight }}" + "helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation +spec: + template: + metadata: + labels: + {{- include "devops-info.selectorLabels" . | nindent 8 }} + spec: + restartPolicy: Never + containers: + - name: pre-install-check + image: {{ .Values.hooks.image | quote }} + command: + - sh + - -c + - {{ printf "echo %s" .Values.hooks.preInstall.message | quote }} +{{- end }} diff --git a/lab10c/k8s/devops-info/templates/service.yaml b/lab10c/k8s/devops-info/templates/service.yaml new file mode 100644 index 0000000000..bf4f3f22ad --- /dev/null +++ b/lab10c/k8s/devops-info/templates/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "devops-info.fullname" . }} + labels: + {{- include "devops-info.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: {{ .Values.service.targetPort }} + protocol: TCP + name: http + {{- if eq .Values.service.type "NodePort" }} + nodePort: {{ .Values.service.nodePort }} + {{- end }} + selector: + {{- include "devops-info.selectorLabels" . | nindent 4 }} diff --git a/lab10c/k8s/devops-info/values-dev.yaml b/lab10c/k8s/devops-info/values-dev.yaml new file mode 100644 index 0000000000..995e472abd --- /dev/null +++ b/lab10c/k8s/devops-info/values-dev.yaml @@ -0,0 +1,26 @@ +replicaCount: 1 + +image: + tag: "lab9" + +service: + type: NodePort + +env: + RELEASE_ID: "dev" + +resources: + requests: + cpu: "50m" + memory: "64Mi" + limits: + cpu: "150m" + memory: "128Mi" + +livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 10 + +readinessProbe: + initialDelaySeconds: 3 + periodSeconds: 5 diff --git a/lab10c/k8s/devops-info/values-prod.yaml b/lab10c/k8s/devops-info/values-prod.yaml new file mode 100644 index 0000000000..63347ee5b6 --- /dev/null +++ b/lab10c/k8s/devops-info/values-prod.yaml @@ -0,0 +1,27 @@ +replicaCount: 3 + +image: + tag: "lab9" + +service: + type: LoadBalancer + nodePort: null + +env: + RELEASE_ID: "prod" + +resources: + requests: + cpu: "150m" + memory: "192Mi" + limits: + cpu: "500m" + memory: "512Mi" + +livenessProbe: + initialDelaySeconds: 20 + periodSeconds: 10 + +readinessProbe: + initialDelaySeconds: 8 + periodSeconds: 5 diff --git a/lab10c/k8s/devops-info/values.yaml b/lab10c/k8s/devops-info/values.yaml new file mode 100644 index 0000000000..56feece3fb --- /dev/null +++ b/lab10c/k8s/devops-info/values.yaml @@ -0,0 +1,59 @@ +replicaCount: 3 + +nameOverride: "" +fullnameOverride: "" + +image: + repository: tsixphoenix/devops-info-python + tag: "lab9" + pullPolicy: IfNotPresent + +service: + type: NodePort + port: 80 + targetPort: 5000 + nodePort: 30080 + +env: + HOST: "0.0.0.0" + PORT: "5000" + RELEASE_ID: "v2" + +resources: + requests: + cpu: "100m" + memory: "128Mi" + limits: + cpu: "300m" + memory: "256Mi" + +livenessProbe: + path: /health + initialDelaySeconds: 15 + periodSeconds: 10 + timeoutSeconds: 2 + failureThreshold: 3 + +readinessProbe: + path: /health + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 2 + failureThreshold: 3 + +securityContext: + runAsNonRoot: true + runAsUser: 10001 + allowPrivilegeEscalation: false + readOnlyRootFilesystem: false + +hooks: + image: busybox:1.36 + preInstall: + enabled: true + weight: -5 + message: "pre-install check passed" + postInstall: + enabled: true + weight: 5 + message: "post-install smoke test passed" diff --git a/lab11c/k8s/RUNBOOK.md b/lab11c/k8s/RUNBOOK.md new file mode 100644 index 0000000000..752902bc3a --- /dev/null +++ b/lab11c/k8s/RUNBOOK.md @@ -0,0 +1,92 @@ +# Local Kubernetes runbook (labs 11 & 12) + +What you need: Docker, `kubectl`, `helm` 3+, `git`, and [kind](https://kind.sigs.k8s.io/docs/user/quick-start/). Paths below assume repo root `DevOps-CC` and PowerShell on Windows (adjust paths for bash). + +## 1. Cluster + +```powershell +kind create cluster --name lab11 --wait 5m +kubectl config use-context kind-lab11 +``` + +## 2. Images + +```powershell +docker pull tsixphoenix/devops-info-python:lab9 +docker build -t tsixphoenix/devops-info-python:lab12 .\lab12c\app_python +kind load docker-image tsixphoenix/devops-info-python:lab9 --name lab11 +kind load docker-image tsixphoenix/devops-info-python:lab12 --name lab11 +``` + +Use `IfNotPresent` / registry pull in real use; for kind-only images add `--set image.pullPolicy=Never` on helm install. + +## 3. Vault (lab 11) — install from Git if Helm repo fails + +```powershell +git clone --depth 1 --branch v0.29.1 https://github.com/hashicorp/vault-helm.git .cache\vault-helm +helm upgrade --install vault .cache\vault-helm -n vault --create-namespace ` + --set server.dev.enabled=true --set injector.enabled=true --wait --timeout 5m +``` + +Configure (run in order; ignore “already enabled” errors where noted): + +```powershell +kubectl exec -n vault vault-0 -- vault secrets enable -path=secret kv-v2 +kubectl exec -n vault vault-0 -- vault kv put secret/devops-info/config username="vault-demo-user" password="vault-demo-password" api_key="vault-demo-api-key" +kubectl exec -n vault vault-0 -- sh -c "vault auth enable kubernetes 2>/dev/null; true" +kubectl exec -n vault vault-0 -- sh -c "vault write auth/kubernetes/config kubernetes_host=https://kubernetes.default.svc:443 kubernetes_ca_cert=@/var/run/secrets/kubernetes.io/serviceaccount/ca.crt token_reviewer_jwt=@/var/run/secrets/kubernetes.io/serviceaccount/token issuer=https://kubernetes.default.svc.cluster.local" +"path `"secret/data/devops-info/*`" { capabilities = [`"read`"] }" | kubectl exec -i -n vault vault-0 -- vault policy write devops-info-read - +kubectl exec -n vault vault-0 -- vault write auth/kubernetes/role/devops-info bound_service_account_names=app11-devops-info bound_service_account_namespaces=default policies=devops-info-read ttl=1h +``` + +## 4. Lab 11 app (Helm) + +```powershell +helm upgrade --install app11 .\lab11c\k8s\devops-info -f .\lab11c\k8s\devops-info\values-dev.yaml --set image.pullPolicy=Never --wait --timeout 5m +``` + +Check: `kubectl get pods` — pod should be `2/2` if Vault injector is enabled in values. + +## 5. Imperative Secret (task 1) + +```powershell +kubectl create secret generic app-credentials --from-literal=username=demo-user --from-literal=password=demo-pass +kubectl get secret app-credentials -o yaml +``` + +## 6. Lab 12 app (Helm) + +`values-dev.yaml` uses NodePort **30081** so it does not collide with lab 11 on **30080**. + +```powershell +helm upgrade --install app12 .\lab12c\k8s\devops-info -f .\lab12c\k8s\devops-info\values-dev.yaml --set image.pullPolicy=Never --wait --timeout 5m +``` + +## 7. Quick checks + +```powershell +kubectl get configmap,pvc +kubectl exec deploy/app12-devops-info -c app -- cat /config/config.json +kubectl exec deploy/app12-devops-info -c app -- printenv | findstr APP_ +kubectl exec deploy/app11-devops-info -c app -- ls /vault/secrets +``` + +**Persistence:** bump counter with HTTP calls to `/`, read `/data/visits`, delete the app12 pod, wait for reschedule, read `/data/visits` again — value should match. + +## 8. Tests (no cluster) + +```powershell +cd lab12c\app_python +pip install -r requirements.txt pytest httpx +pytest -q +``` + +## 9. Cleanup + +```powershell +helm uninstall app12 app11 -n default +helm uninstall vault -n vault +kind delete cluster --name lab11 +``` + +The `.cache/` folder with `vault-helm` is gitignored; delete it if you want a clean tree. diff --git a/lab11c/k8s/SECRETS.md b/lab11c/k8s/SECRETS.md new file mode 100644 index 0000000000..e6e266a35b --- /dev/null +++ b/lab11c/k8s/SECRETS.md @@ -0,0 +1,158 @@ +# Lab 11 — secrets & Vault + +Chart is under `lab11c/k8s/devops-info` (v0.2.0). Lab 10 hooks stay off in the default `values.yaml` so they don’t get in the way. + +## kubectl secret + +Create: + +```bash +kubectl create secret generic app-credentials \ + --from-literal=username=demo-user \ + --from-literal=password=demo-pass +``` + +Inspect: + +```bash +kubectl get secret app-credentials -o yaml +``` + +The `data.*` fields are base64 — that’s encoding for the API, not encryption. Decode in PowerShell (username `demo-user` → `ZGVtby11c2Vy`): + +```powershell +[Text.Encoding]::UTF8.GetString([Convert]::FromBase64String("ZGVtby11c2Vy")) +``` + +Without [etcd encryption at rest](https://kubernetes.io/docs/tasks/administer-cluster/encrypt-data/), secrets in etcd are only as safe as the cluster + RBAC. Anyone who can read the Secret object sees the values. + +## Helm secret + +The chart has `templates/secrets.yaml` (when `helmSecret.enabled` is true), values in `values.yaml`, and the deployment uses `envFrom` + `secretRef`. The defaults in the repo are dummy creds (`lab11-helm-demo-user` / `lab11-helm-demo-password`) — fine for classwork; use `--set` or something proper in real use. + +Install: + +```bash +helm upgrade --install app11 lab11c/k8s/devops-info \ + -f lab11c/k8s/devops-info/values-prod.yaml +``` + +Override without editing files: + +```bash +helm upgrade --install app11 lab11c/k8s/devops-info \ + -f lab11c/k8s/devops-info/values-prod.yaml \ + --set helmSecret.username=myuser \ + --set helmSecret.password=mypass +``` + +`kubectl describe pod` only shows the Secret name under env-from, not the cleartext. Inside the container you’ll see them in `printenv` — ok for debugging, just don’t paste real passwords into the repo or chat. Same story if `helm get values` picked up `--set` args. + +## Resources + +CPU/memory live in `values.yaml`, with overrides in `values-dev.yaml` / `values-prod.yaml`. Requests = what scheduling assumes; limits = hard cap (CPU gets throttled, memory can OOM). + +## Vault + +The HashiCorp Helm repo returned 403 from my network, so I installed from source: + +```bash +git clone --depth 1 --branch v0.29.1 https://github.com/hashicorp/vault-helm.git vault-helm +helm install vault ./vault-helm -n vault --create-namespace \ + --set server.dev.enabled=true \ + --set injector.enabled=true +``` + +Check: + +```bash +kubectl get pods -n vault +``` + +Rest is inside `vault-0`. Enable KV v2 on `secret` if it isn’t there yet: + +```bash +kubectl exec -n vault vault-0 -- vault secrets enable -path=secret kv-v2 +``` + +(If it already exists you’ll get an error — ignore.) + +Stuff I used for the app path: + +```bash +kubectl exec -n vault vault-0 -- vault kv put secret/devops-info/config \ + username="vault-demo-user" \ + password="vault-demo-password" \ + api_key="vault-demo-api-key" +``` + +Wire up Kubernetes auth: + +```bash +kubectl exec -n vault vault-0 -- sh -c 'vault auth enable kubernetes 2>/dev/null || true; vault write auth/kubernetes/config \ + kubernetes_host="https://kubernetes.default.svc:443" \ + kubernetes_ca_cert=@/var/run/secrets/kubernetes.io/serviceaccount/ca.crt \ + token_reviewer_jwt=@/var/run/secrets/kubernetes.io/serviceaccount/token \ + issuer="https://kubernetes.default.svc.cluster.local"' +``` + +Policy + role for release `app11` (ServiceAccount `app11-devops-info`): + +```bash +kubectl exec -i -n vault vault-0 -- vault policy write devops-info-read - <<'EOF' +path "secret/data/devops-info/*" { + capabilities = ["read"] +} +EOF + +kubectl exec -n vault vault-0 -- vault write auth/kubernetes/role/devops-info \ + bound_service_account_names=app11-devops-info \ + bound_service_account_namespaces=default \ + policies=devops-info-read \ + ttl=1h +``` + +Turn on the injector in `values-dev.yaml` / `values-prod.yaml` and you get the usual annotations (`vault.hashicorp.com/agent-inject`, `role`, `agent-inject-secret-vaultconfig`, service URL). Pod goes to 2/2 with the agent sidecar. Injected file landed at `/vault/secrets/vaultconfig` for me — I only checked with `ls`/`cat`, didn’t commit contents. + +Rough idea: mutating webhook adds the agent, it logs into Vault with Kubernetes auth, writes files into the volume. + +## Takeaway + +Built-in Secrets are the easy path; etcd encryption + RBAC still matter, rotation is on you. Vault adds policy/audit/rotation story but it’s another moving part. Dev-mode Vault from the lab is not production material. + +--- + +## Evidence (captured on kind v1.31, 2026-04-11) + +**Imperative Secret (YAML fragment):** + +```yaml +data: + password: ZGVtby1wYXNz + username: ZGVtby11c2Vy +kind: Secret +metadata: + name: app-credentials + namespace: default +type: Opaque +``` + +**Vault pods:** + +```text +NAME READY STATUS RESTARTS AGE +vault-0 1/1 Running 0 ... +vault-agent-injector-75f9d67594-xxxxx 1/1 Running 0 ... +``` + +**Helm release pod (injector on):** `app11-devops-info-...` shows `2/2` — app container + `vault-agent`. `kubectl describe pod` lists `Environment Variables from: app11-devops-info-secret` (values only in the container env, not in describe). + +**Env check (demo strings from chart values + Vault file):** variables `username` and `password` present; injected file at `/vault/secrets/vaultconfig` starts with KV-style text (contains `username`, `password`, `api_key` from Vault path `secret/data/devops-info/config`). + +**Policy:** + +```text +path "secret/data/devops-info/*" { capabilities = ["read"] } +``` + +Full local runbook: see `RUNBOOK.md` in this folder. diff --git a/lab11c/k8s/devops-info/.helmignore b/lab11c/k8s/devops-info/.helmignore new file mode 100644 index 0000000000..414bb6e8a6 --- /dev/null +++ b/lab11c/k8s/devops-info/.helmignore @@ -0,0 +1,18 @@ +# Patterns to ignore when building packages. +.DS_Store +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +*.swp +*.bak +*.tmp +*.orig +*~ +.project +.idea/ +*.tmproj +.vscode/ diff --git a/lab11c/k8s/devops-info/Chart.yaml b/lab11c/k8s/devops-info/Chart.yaml new file mode 100644 index 0000000000..881f6c1942 --- /dev/null +++ b/lab11c/k8s/devops-info/Chart.yaml @@ -0,0 +1,12 @@ +apiVersion: v2 +name: devops-info +description: Helm chart for DevOps Info Python service (Lab 11 secrets) +type: application +version: 0.2.0 +appVersion: "1.0.0" +keywords: + - python + - fastapi + - devops +maintainers: + - name: Phoenix diff --git a/lab11c/k8s/devops-info/templates/_helpers.tpl b/lab11c/k8s/devops-info/templates/_helpers.tpl new file mode 100644 index 0000000000..c8fb478ff4 --- /dev/null +++ b/lab11c/k8s/devops-info/templates/_helpers.tpl @@ -0,0 +1,46 @@ +{{- define "devops-info.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{- define "devops-info.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{- define "devops-info.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{- define "devops-info.labels" -}} +helm.sh/chart: {{ include "devops-info.chart" . }} +{{ include "devops-info.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{- define "devops-info.selectorLabels" -}} +app.kubernetes.io/name: {{ include "devops-info.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{- define "devops-info.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "devops-info.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{- define "devops-info.helmSecretName" -}} +{{ include "devops-info.fullname" . }}-secret +{{- end }} diff --git a/lab11c/k8s/devops-info/templates/deployment.yaml b/lab11c/k8s/devops-info/templates/deployment.yaml new file mode 100644 index 0000000000..51b43b3d7e --- /dev/null +++ b/lab11c/k8s/devops-info/templates/deployment.yaml @@ -0,0 +1,70 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "devops-info.fullname" . }} + labels: + {{- include "devops-info.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + selector: + matchLabels: + {{- include "devops-info.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "devops-info.labels" . | nindent 8 }} + {{- if .Values.vault.injector.enabled }} + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: {{ .Values.vault.role | quote }} + vault.hashicorp.com/agent-inject-secret-vaultconfig: {{ .Values.vault.secretPath | quote }} + vault.hashicorp.com/service: {{ .Values.vault.serviceAddr | quote }} + vault.hashicorp.com/tls-skip-verify: "true" + {{- end }} + spec: + serviceAccountName: {{ include "devops-info.serviceAccountName" . }} + containers: + - name: app + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.service.targetPort }} + protocol: TCP + env: + - name: HOST + value: {{ .Values.env.HOST | quote }} + - name: PORT + value: {{ .Values.env.PORT | quote }} + - name: RELEASE_ID + value: {{ .Values.env.RELEASE_ID | quote }} + {{- if .Values.helmSecret.enabled }} + envFrom: + - secretRef: + name: {{ include "devops-info.helmSecretName" . }} + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + livenessProbe: + httpGet: + path: {{ .Values.livenessProbe.path }} + port: http + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + readinessProbe: + httpGet: + path: {{ .Values.readinessProbe.path }} + port: http + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + resources: + {{- toYaml .Values.resources | nindent 12 }} diff --git a/lab11c/k8s/devops-info/templates/secrets.yaml b/lab11c/k8s/devops-info/templates/secrets.yaml new file mode 100644 index 0000000000..86ff891444 --- /dev/null +++ b/lab11c/k8s/devops-info/templates/secrets.yaml @@ -0,0 +1,12 @@ +{{- if .Values.helmSecret.enabled }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "devops-info.helmSecretName" . }} + labels: + {{- include "devops-info.labels" . | nindent 4 }} +type: Opaque +stringData: + username: {{ .Values.helmSecret.username | quote }} + password: {{ .Values.helmSecret.password | quote }} +{{- end }} diff --git a/lab11c/k8s/devops-info/templates/service.yaml b/lab11c/k8s/devops-info/templates/service.yaml new file mode 100644 index 0000000000..40097f15b3 --- /dev/null +++ b/lab11c/k8s/devops-info/templates/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "devops-info.fullname" . }} + labels: + {{- include "devops-info.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: {{ .Values.service.targetPort }} + protocol: TCP + name: http + {{- if and (eq .Values.service.type "NodePort") .Values.service.nodePort }} + nodePort: {{ .Values.service.nodePort }} + {{- end }} + selector: + {{- include "devops-info.selectorLabels" . | nindent 4 }} diff --git a/lab11c/k8s/devops-info/templates/serviceaccount.yaml b/lab11c/k8s/devops-info/templates/serviceaccount.yaml new file mode 100644 index 0000000000..9e91578eba --- /dev/null +++ b/lab11c/k8s/devops-info/templates/serviceaccount.yaml @@ -0,0 +1,8 @@ +{{- if .Values.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "devops-info.serviceAccountName" . }} + labels: + {{- include "devops-info.labels" . | nindent 4 }} +{{- end }} diff --git a/lab11c/k8s/devops-info/values-dev.yaml b/lab11c/k8s/devops-info/values-dev.yaml new file mode 100644 index 0000000000..edc230037b --- /dev/null +++ b/lab11c/k8s/devops-info/values-dev.yaml @@ -0,0 +1,20 @@ +replicaCount: 1 + +service: + type: NodePort + nodePort: 30080 + +env: + RELEASE_ID: "dev" + +resources: + requests: + cpu: "50m" + memory: "64Mi" + limits: + cpu: "200m" + memory: "128Mi" + +vault: + injector: + enabled: true diff --git a/lab11c/k8s/devops-info/values-prod.yaml b/lab11c/k8s/devops-info/values-prod.yaml new file mode 100644 index 0000000000..38348ad27e --- /dev/null +++ b/lab11c/k8s/devops-info/values-prod.yaml @@ -0,0 +1,20 @@ +replicaCount: 3 + +service: + type: LoadBalancer + nodePort: null + +env: + RELEASE_ID: "prod" + +resources: + requests: + cpu: "150m" + memory: "192Mi" + limits: + cpu: "500m" + memory: "512Mi" + +vault: + injector: + enabled: true diff --git a/lab11c/k8s/devops-info/values.yaml b/lab11c/k8s/devops-info/values.yaml new file mode 100644 index 0000000000..a2af76779a --- /dev/null +++ b/lab11c/k8s/devops-info/values.yaml @@ -0,0 +1,74 @@ +replicaCount: 1 + +nameOverride: "" +fullnameOverride: "" + +serviceAccount: + create: true + name: "" + +image: + repository: tsixphoenix/devops-info-python + tag: "lab9" + pullPolicy: IfNotPresent + +service: + type: NodePort + port: 80 + targetPort: 5000 + nodePort: 30080 + +env: + HOST: "0.0.0.0" + PORT: "5000" + RELEASE_ID: "lab11" + +# Helm-managed Secret — lab defaults only; use helm --set or a secret manager in production +helmSecret: + enabled: true + username: "lab11-helm-demo-user" + password: "lab11-helm-demo-password" + +resources: + requests: + cpu: "100m" + memory: "128Mi" + limits: + cpu: "300m" + memory: "256Mi" + +livenessProbe: + path: /health + initialDelaySeconds: 15 + periodSeconds: 10 + timeoutSeconds: 2 + failureThreshold: 3 + +readinessProbe: + path: /health + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 2 + failureThreshold: 3 + +securityContext: + runAsNonRoot: true + runAsUser: 10001 + allowPrivilegeEscalation: false + readOnlyRootFilesystem: false + +# Vault Agent Injector (sidecar) — enable after Vault is configured +vault: + injector: + enabled: false + role: "devops-info" + secretPath: "secret/data/devops-info/config" + secretFileName: "vault-config" + serviceAddr: "http://vault.vault.svc:8200" + +hooks: + image: busybox:1.36 + preInstall: + enabled: false + postInstall: + enabled: false diff --git a/lab12c/app_python/.gitignore b/lab12c/app_python/.gitignore new file mode 100644 index 0000000000..9f5419b034 --- /dev/null +++ b/lab12c/app_python/.gitignore @@ -0,0 +1,5 @@ +data/ +__pycache__/ +.pytest_cache/ +.venv/ +*.pyc diff --git a/lab12c/app_python/Dockerfile b/lab12c/app_python/Dockerfile new file mode 100644 index 0000000000..50528b8254 --- /dev/null +++ b/lab12c/app_python/Dockerfile @@ -0,0 +1,23 @@ +FROM python:3.13-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /app + +RUN useradd -m -u 10001 appuser \ + && mkdir -p /data \ + && chown appuser:appuser /data + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY --chown=appuser:appuser app.py . + +USER appuser + +ENV VISITS_FILE=/data/visits + +EXPOSE 5000 + +CMD ["python", "app.py"] diff --git a/lab12c/app_python/README.md b/lab12c/app_python/README.md new file mode 100644 index 0000000000..bcbbe00e2f --- /dev/null +++ b/lab12c/app_python/README.md @@ -0,0 +1,22 @@ +# DevOps Info (lab 12) + +FastAPI app with a visit counter in a file (`VISITS_FILE`, default `/data/visits`). `/` increments, `/visits` just reads. + +## Run with Docker Compose + +```bash +docker compose up --build +``` + +Hit `http://127.0.0.1:5000/` a bunch, then `/visits`. Restart the stack — counter should pick up where it left off (`./data` is mounted to `/data`). + +## Tests + +```bash +pip install -r requirements.txt pytest httpx +pytest -q +``` + +## Kubernetes image + +Build and push something like `tsixphoenix/devops-info-python:lab12` and point `image.repository` / `image.tag` in `lab12c/k8s/devops-info/values.yaml` at the same thing. diff --git a/lab12c/app_python/app.py b/lab12c/app_python/app.py new file mode 100644 index 0000000000..c3f9c442b7 --- /dev/null +++ b/lab12c/app_python/app.py @@ -0,0 +1,332 @@ +""" +DevOps Info Service +FastAPI application module. +""" + +from __future__ import annotations + +import json +import logging +import os +import platform +import socket +import tempfile +import threading +import time +from datetime import datetime, timezone +from pathlib import Path + +import uvicorn +from fastapi import FastAPI, Request +from fastapi.responses import JSONResponse, Response +from prometheus_client import ( + CONTENT_TYPE_LATEST, + CollectorRegistry, + Counter, + Gauge, + Histogram, + generate_latest, +) +from starlette.exceptions import HTTPException as StarletteHTTPException + +_prometheus_registry = CollectorRegistry() + +http_requests_total = Counter( + "http_requests_total", + "Total HTTP requests", + ["method", "endpoint", "status"], + registry=_prometheus_registry, +) +http_request_duration_seconds = Histogram( + "http_request_duration_seconds", + "HTTP request duration in seconds", + ["method", "endpoint"], + buckets=(0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0), + registry=_prometheus_registry, +) +http_requests_in_progress = Gauge( + "http_requests_in_progress", + "HTTP requests currently being processed", + registry=_prometheus_registry, +) +devops_info_endpoint_calls = Counter( + "devops_info_endpoint_calls", + "Endpoint calls for DevOps info service", + ["endpoint"], + registry=_prometheus_registry, +) +devops_info_system_collection_seconds = Histogram( + "devops_info_system_collection_seconds", + "System info collection time in seconds", + buckets=(0.001, 0.005, 0.01, 0.025, 0.05, 0.1), + registry=_prometheus_registry, +) + +HOST = os.getenv("HOST", "0.0.0.0") +PORT = int(os.getenv("PORT", "5000")) +DEBUG = os.getenv("DEBUG", "False").lower() == "true" +VISITS_FILE = os.getenv("VISITS_FILE", "/data/visits") + +SERVICE_NAME = "devops-info-service" +SERVICE_VERSION = "1.0.0" +SERVICE_DESCRIPTION = "DevOps course info service" +SERVICE_FRAMEWORK = "FastAPI" + +START_TIME = datetime.now(timezone.utc) + +logger = logging.getLogger("devops-info-service") +logger.setLevel(logging.INFO) + +handler = logging.StreamHandler() +handler.setLevel(logging.INFO) +logger.handlers = [handler] + +app = FastAPI( + title="DevOps Info Service", + version=SERVICE_VERSION, + description=SERVICE_DESCRIPTION, +) + +_visits_lock = threading.Lock() + + +def _read_count_unlocked() -> int: + path = Path(VISITS_FILE) + if not path.is_file(): + return 0 + try: + raw = path.read_text(encoding="utf-8").strip() + return max(0, int(raw)) + except (ValueError, OSError): + return 0 + + +def _write_count_atomic(n: int) -> None: + path = Path(VISITS_FILE) + path.parent.mkdir(parents=True, exist_ok=True) + fd, tmp_path = tempfile.mkstemp(dir=str(path.parent), text=True) + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + f.write(str(n)) + os.replace(tmp_path, path) + except Exception: + try: + os.unlink(tmp_path) + except OSError: + pass + raise + + +def increment_visits() -> int: + with _visits_lock: + n = _read_count_unlocked() + 1 + _write_count_atomic(n) + return n + + +def get_visits() -> int: + with _visits_lock: + return _read_count_unlocked() + + +def _format_uptime(seconds: int) -> str: + hours = seconds // 3600 + minutes = (seconds % 3600) // 60 + hour_label = "hour" if hours == 1 else "hours" + minute_label = "minute" if minutes == 1 else "minutes" + return f"{hours} {hour_label}, {minutes} {minute_label}" + + +def get_uptime() -> dict[str, int | str]: + delta = datetime.now(timezone.utc) - START_TIME + seconds = int(delta.total_seconds()) + return { + "seconds": seconds, + "human": _format_uptime(seconds), + } + + +def get_system_info() -> dict[str, str | int]: + return { + "hostname": socket.gethostname(), + "platform": platform.system(), + "platform_version": platform.release(), + "architecture": platform.machine(), + "cpu_count": os.cpu_count() or 0, + "python_version": platform.python_version(), + } + + +def isoformat_utc(dt: datetime) -> str: + return dt.astimezone(timezone.utc).isoformat().replace("+00:00", "Z") + + +def _normalize_endpoint(path: str) -> str: + if path in ("/", "/health", "/metrics", "/visits"): + return path + return "other" + + +@app.middleware("http") +async def observability_middleware(request: Request, call_next): + start_perf = time.perf_counter() + endpoint = _normalize_endpoint(request.url.path) + http_requests_in_progress.inc() + req_ts = datetime.now(timezone.utc) + logger.info( + json.dumps( + { + "timestamp": isoformat_utc(req_ts), + "level": "INFO", + "service": SERVICE_NAME, + "event": "request", + "method": request.method, + "path": request.url.path, + "client_ip": request.client.host if request.client else "unknown", + "user_agent": request.headers.get("user-agent", "unknown"), + } + ) + ) + try: + response = await call_next(request) + status = str(response.status_code) + http_requests_total.labels( + method=request.method, endpoint=endpoint, status=status + ).inc() + http_request_duration_seconds.labels( + method=request.method, endpoint=endpoint + ).observe(time.perf_counter() - start_perf) + logger.info( + json.dumps( + { + "timestamp": isoformat_utc(datetime.now(timezone.utc)), + "level": "INFO", + "service": SERVICE_NAME, + "event": "response", + "method": request.method, + "path": request.url.path, + "status": response.status_code, + "client_ip": request.client.host if request.client else "unknown", + "user_agent": request.headers.get("user-agent", "unknown"), + } + ) + ) + return response + finally: + http_requests_in_progress.dec() + + +@app.exception_handler(StarletteHTTPException) +async def http_exception_handler(request: Request, exc: StarletteHTTPException): + if exc.status_code == 404: + return JSONResponse( + status_code=404, + content={ + "error": "Not Found", + "message": "Endpoint does not exist", + }, + ) + return JSONResponse( + status_code=exc.status_code, + content={"error": exc.detail}, + ) + + +@app.exception_handler(Exception) +async def unhandled_exception_handler(request: Request, exc: Exception): + logger.error( + json.dumps( + { + "timestamp": isoformat_utc(datetime.now(timezone.utc)), + "level": "ERROR", + "service": SERVICE_NAME, + "event": "exception", + "method": request.method, + "path": request.url.path, + "error": str(exc), + } + ) + ) + return JSONResponse( + status_code=500, + content={ + "error": "Internal Server Error", + "message": "An unexpected error occurred", + }, + ) + + +@app.get("/metrics") +async def metrics(): + return Response( + content=generate_latest(_prometheus_registry), + media_type=CONTENT_TYPE_LATEST, + ) + + +@app.get("/visits") +async def visits(): + devops_info_endpoint_calls.labels(endpoint="/visits").inc() + total = get_visits() + return {"visits": total, "file": VISITS_FILE} + + +@app.get("/") +async def root(request: Request): + devops_info_endpoint_calls.labels(endpoint="/").inc() + visit_total = increment_visits() + t0 = time.perf_counter() + sys_info = get_system_info() + devops_info_system_collection_seconds.observe(time.perf_counter() - t0) + uptime = get_uptime() + now = datetime.now(timezone.utc) + + response = { + "service": { + "name": SERVICE_NAME, + "version": SERVICE_VERSION, + "description": SERVICE_DESCRIPTION, + "framework": SERVICE_FRAMEWORK, + }, + "system": sys_info, + "runtime": { + "uptime_seconds": uptime["seconds"], + "uptime_human": uptime["human"], + "current_time": isoformat_utc(now), + "timezone": "UTC", + }, + "visits": { + "total": visit_total, + "file": VISITS_FILE, + }, + "request": { + "client_ip": request.client.host if request.client else "unknown", + "user_agent": request.headers.get("user-agent", "unknown"), + "method": request.method, + "path": request.url.path, + }, + "endpoints": [ + {"path": "/", "method": "GET", "description": "Service information"}, + {"path": "/visits", "method": "GET", "description": "Visit counter (read-only)"}, + {"path": "/health", "method": "GET", "description": "Health check"}, + {"path": "/metrics", "method": "GET", "description": "Prometheus metrics"}, + ], + } + + return response + + +@app.get("/health") +async def health(): + devops_info_endpoint_calls.labels(endpoint="/health").inc() + uptime = get_uptime() + return { + "status": "healthy", + "timestamp": isoformat_utc(datetime.now(timezone.utc)), + "uptime_seconds": uptime["seconds"], + } + + +if __name__ == "__main__": + logger.info("Starting DevOps Info Service on %s:%s", HOST, PORT) + uvicorn.run("app:app", host=HOST, port=PORT, reload=DEBUG, log_level="info") diff --git a/lab12c/app_python/docker-compose.yml b/lab12c/app_python/docker-compose.yml new file mode 100644 index 0000000000..9138918fd3 --- /dev/null +++ b/lab12c/app_python/docker-compose.yml @@ -0,0 +1,9 @@ +services: + devops-info: + build: . + ports: + - "5000:5000" + environment: + VISITS_FILE: /data/visits + volumes: + - ./data:/data diff --git a/lab12c/app_python/requirements.txt b/lab12c/app_python/requirements.txt new file mode 100644 index 0000000000..d663691063 --- /dev/null +++ b/lab12c/app_python/requirements.txt @@ -0,0 +1,3 @@ +fastapi==0.128.6 +uvicorn[standard]==0.32.0 +prometheus-client==0.23.1 diff --git a/lab12c/app_python/tests/__init__.py b/lab12c/app_python/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lab12c/app_python/tests/test_app.py b/lab12c/app_python/tests/test_app.py new file mode 100644 index 0000000000..b6ecc0f9bf --- /dev/null +++ b/lab12c/app_python/tests/test_app.py @@ -0,0 +1,106 @@ +from datetime import datetime, timezone + +import pytest +from fastapi.testclient import TestClient + +from app import _format_uptime, app, get_system_info, get_uptime, isoformat_utc + + +@pytest.fixture +def client(tmp_path, monkeypatch): + vf = tmp_path / "visits" + monkeypatch.setenv("VISITS_FILE", str(vf)) + # Reload app module so VISITS_FILE is picked up + import importlib + + import app as app_module + + importlib.reload(app_module) + from app import app as reloaded + + with TestClient(reloaded) as c: + yield c + + +def test_root_increments_visits(client): + r1 = client.get("/") + assert r1.status_code == 200 + assert r1.json()["visits"]["total"] == 1 + + r2 = client.get("/") + assert r2.json()["visits"]["total"] == 2 + + +def test_visits_read_only(client): + client.get("/") + client.get("/") + r = client.get("/visits") + assert r.status_code == 200 + assert r.json()["visits"] == 2 + + +def test_root_endpoint_structure(client): + response = client.get("/") + assert response.status_code == 200 + + data = response.json() + assert "service" in data + assert "system" in data + assert "runtime" in data + assert "request" in data + assert "endpoints" in data + assert "visits" in data + assert data["visits"]["total"] >= 1 + + service = data["service"] + assert service["name"] == "devops-info-service" + assert service["framework"] == "FastAPI" + + system = data["system"] + for key in [ + "hostname", + "platform", + "platform_version", + "architecture", + "cpu_count", + "python_version", + ]: + assert key in system + + runtime = data["runtime"] + assert isinstance(runtime["uptime_seconds"], int) + assert isinstance(runtime["uptime_human"], str) + assert runtime["timezone"] == "UTC" + + +def test_health_endpoint_structure(client): + response = client.get("/health") + assert response.status_code == 200 + data = response.json() + assert data["status"] == "healthy" + assert isinstance(data["uptime_seconds"], int) + assert "timestamp" in data + + +def test_not_found_returns_json(client): + response = client.get("/does-not-exist") + assert response.status_code == 404 + data = response.json() + assert data["error"] == "Not Found" + + +def test_helpers_are_consistent(): + system = get_system_info() + assert system["hostname"] + assert system["platform"] + assert system["python_version"] + + uptime = get_uptime() + assert uptime["seconds"] >= 0 + assert "hours" in uptime["human"] or "hour" in uptime["human"] + + +def test_format_and_iso_helpers(): + assert _format_uptime(3660) == "1 hour, 1 minute" + test_dt = datetime(2024, 1, 1, tzinfo=timezone.utc) + assert isoformat_utc(test_dt) == "2024-01-01T00:00:00Z" diff --git a/lab12c/k8s/CONFIGMAPS.md b/lab12c/k8s/CONFIGMAPS.md new file mode 100644 index 0000000000..22bca71a50 --- /dev/null +++ b/lab12c/k8s/CONFIGMAPS.md @@ -0,0 +1,138 @@ +# Lab 12 — ConfigMaps & persistence + +App: `lab12c/app_python`. Chart: `lab12c/k8s/devops-info` (v0.3.0). + +## App + +Visit counter lives in a file (`VISITS_FILE`, default `/data/visits`). Each `GET /` bumps it under a lock; writes go through a temp file + `os.replace`. `GET /visits` is read-only. The JSON from `/` includes `visits.total` and the file path. + +Docker Compose maps `./data` to `/data` — bounce the stack a few times and the number should stick. + +Tests: run `pytest` in `lab12c/app_python`; they point `VISITS_FILE` at a temp path. + +## ConfigMaps + +`files/config.json` ships with the chart. `templates/configmap.yaml` builds two objects: one embeds that JSON via `.Files.Get`, the other exposes `APP_NAME`, `APP_ENV`, `LOG_LEVEL` from values. + +Deployment mounts the file ConfigMap at `/config` → `/config/config.json`. The env ConfigMap hooks in with `envFrom` when `config.injectEnv` is true. + +Snippet from `helm template app12 lab12c/k8s/devops-info -f lab12c/k8s/devops-info/values-dev.yaml`: + +```yaml +data: + config.json: |- + { + "appName": "devops-info-service", + "environment": "development", + "features": { + "metricsEnabled": true, + "verboseLogging": false + } + } +data: + APP_NAME: "devops-info-service" + APP_ENV: "development" + LOG_LEVEL: "info" +``` + +Sanity checks (release name `app12`): + +```bash +kubectl get configmap -l app.kubernetes.io/instance=app12 +kubectl exec deploy/app12-devops-info -c app -- cat /config/config.json +kubectl exec deploy/app12-devops-info -c app -- printenv | grep APP_ +``` + +On Windows host use `findstr APP_` instead of `grep` if you run `kubectl exec` from PowerShell and pipe outside the container. + +## PVC + +`templates/pvc.yaml` requests `ReadWriteOnce` storage, size from `persistence.size`. If `persistence.storageClass` is empty, the cluster default StorageClass applies (kind/minikube usually give you one). + +Deployment uses either that PVC or `emptyDir` when persistence is off. Mount is `/data`, app uses `VISITS_FILE=/data/visits`. + +RWO = one pod at a time on that volume the normal way, so `values-prod.yaml` keeps a single replica. Scaling out with a file counter on RWO doesn’t fly without RWX or moving state somewhere else. + +```yaml +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Mi +``` + +## Cluster install notes + +Build/push `tsixphoenix/devops-info-python:lab12`, or load into kind: + +```bash +kind load docker-image tsixphoenix/devops-info-python:lab12 --name lab11 +``` + +Then: + +```bash +helm upgrade --install app12 lab12c/k8s/devops-info -f lab12c/k8s/devops-info/values-dev.yaml --set image.pullPolicy=Never +``` + +If **lab 11** is installed on the same cluster, both charts default to NodePort `30080` — Kubernetes will reject the second Service. `values-dev.yaml` for lab 12 sets `nodePort: 30081` so app11 can keep `30080`. + +## ConfigMap vs Secret + +ConfigMap = non-sensitive config (flags, log level, JSON metadata). Secret = passwords, keys, certs. Even Secrets are base64 in the API, not magic crypto — if it’s sensitive, treat it as sensitive. For heavy stuff see lab 11 / Vault. + +--- + +## Evidence (captured on kind v1.31, 2026-04-11) + +**Unit tests:** + +```text +....... [100%] +7 passed in 0.39s +``` + +**Helm lint (chart ok):** + +```text +==> Linting lab12c/k8s/devops-info +[INFO] Chart.yaml: icon is recommended +1 chart(s) linted, 0 chart(s) failed +``` + +**ConfigMaps + PVC:** + +```text +NAME DATA AGE +configmap/app12-devops-info-env 3 ... +configmap/app12-devops-info-file 1 ... + +NAME STATUS CAPACITY ACCESS MODES STORAGECLASS AGE +persistentvolumeclaim/app12-devops-info-data Bound 100Mi RWO standard ... +``` + +**File inside pod (`/config/config.json`):** + +```json +{ + "appName": "devops-info-service", + "environment": "development", + "features": { + "metricsEnabled": true, + "verboseLogging": false + } +} +``` + +**Env from ConfigMap:** + +```text +APP_ENV=development +APP_NAME=devops-info-service +LOG_LEVEL=info +``` + +**Persistence (same PVC after pod delete):** after three hits to `/`, `/data/visits` contained `3`. Deleted the pod; new pod still showed `3` in `/data/visits`. + +Step-by-step for Vault + both charts: `lab11c/k8s/RUNBOOK.md`. diff --git a/lab12c/k8s/devops-info/.helmignore b/lab12c/k8s/devops-info/.helmignore new file mode 100644 index 0000000000..414bb6e8a6 --- /dev/null +++ b/lab12c/k8s/devops-info/.helmignore @@ -0,0 +1,18 @@ +# Patterns to ignore when building packages. +.DS_Store +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +*.swp +*.bak +*.tmp +*.orig +*~ +.project +.idea/ +*.tmproj +.vscode/ diff --git a/lab12c/k8s/devops-info/Chart.yaml b/lab12c/k8s/devops-info/Chart.yaml new file mode 100644 index 0000000000..8d0388757b --- /dev/null +++ b/lab12c/k8s/devops-info/Chart.yaml @@ -0,0 +1,12 @@ +apiVersion: v2 +name: devops-info +description: Helm chart for DevOps Info Python service (Lab 12 ConfigMaps & PVC) +type: application +version: 0.3.0 +appVersion: "1.0.0" +keywords: + - python + - fastapi + - devops +maintainers: + - name: Phoenix diff --git a/lab12c/k8s/devops-info/files/config.json b/lab12c/k8s/devops-info/files/config.json new file mode 100644 index 0000000000..ef5fa6219d --- /dev/null +++ b/lab12c/k8s/devops-info/files/config.json @@ -0,0 +1,8 @@ +{ + "appName": "devops-info-service", + "environment": "development", + "features": { + "metricsEnabled": true, + "verboseLogging": false + } +} diff --git a/lab12c/k8s/devops-info/templates/_helpers.tpl b/lab12c/k8s/devops-info/templates/_helpers.tpl new file mode 100644 index 0000000000..c8fb478ff4 --- /dev/null +++ b/lab12c/k8s/devops-info/templates/_helpers.tpl @@ -0,0 +1,46 @@ +{{- define "devops-info.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{- define "devops-info.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{- define "devops-info.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{- define "devops-info.labels" -}} +helm.sh/chart: {{ include "devops-info.chart" . }} +{{ include "devops-info.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{- define "devops-info.selectorLabels" -}} +app.kubernetes.io/name: {{ include "devops-info.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{- define "devops-info.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "devops-info.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{- define "devops-info.helmSecretName" -}} +{{ include "devops-info.fullname" . }}-secret +{{- end }} diff --git a/lab12c/k8s/devops-info/templates/analysis-template.yaml b/lab12c/k8s/devops-info/templates/analysis-template.yaml new file mode 100644 index 0000000000..d36bd8c363 --- /dev/null +++ b/lab12c/k8s/devops-info/templates/analysis-template.yaml @@ -0,0 +1,19 @@ +{{- if and .Values.rollouts.enabled .Values.rollouts.analysis.enabled }} +apiVersion: argoproj.io/v1alpha1 +kind: AnalysisTemplate +metadata: + name: {{ include "devops-info.fullname" . }}-success-rate + labels: + {{- include "devops-info.labels" . | nindent 4 }} +spec: + metrics: + - name: health-check + interval: {{ .Values.rollouts.analysis.interval }} + count: {{ .Values.rollouts.analysis.count }} + failureLimit: {{ .Values.rollouts.analysis.failureLimit }} + successCondition: result == "healthy" + provider: + web: + url: http://{{ include "devops-info.fullname" . }}.{{ .Release.Namespace }}.svc.cluster.local/health + jsonPath: "{$.status}" +{{- end }} diff --git a/lab12c/k8s/devops-info/templates/configmap.yaml b/lab12c/k8s/devops-info/templates/configmap.yaml new file mode 100644 index 0000000000..bf47386382 --- /dev/null +++ b/lab12c/k8s/devops-info/templates/configmap.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "devops-info.fullname" . }}-file + labels: + {{- include "devops-info.labels" . | nindent 4 }} +data: + config.json: |- +{{ .Files.Get "files/config.json" | indent 4 }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "devops-info.fullname" . }}-env + labels: + {{- include "devops-info.labels" . | nindent 4 }} +data: + APP_NAME: {{ .Values.config.appName | quote }} + APP_ENV: {{ .Values.config.environment | quote }} + LOG_LEVEL: {{ .Values.config.logLevel | quote }} diff --git a/lab12c/k8s/devops-info/templates/deployment.yaml b/lab12c/k8s/devops-info/templates/deployment.yaml new file mode 100644 index 0000000000..ce6240990f --- /dev/null +++ b/lab12c/k8s/devops-info/templates/deployment.yaml @@ -0,0 +1,97 @@ +{{- if not .Values.rollouts.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "devops-info.fullname" . }} + labels: + {{- include "devops-info.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + selector: + matchLabels: + {{- include "devops-info.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "devops-info.labels" . | nindent 8 }} + {{- if .Values.vault.injector.enabled }} + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: {{ .Values.vault.role | quote }} + vault.hashicorp.com/agent-inject-secret-vaultconfig: {{ .Values.vault.secretPath | quote }} + vault.hashicorp.com/service: {{ .Values.vault.serviceAddr | quote }} + vault.hashicorp.com/tls-skip-verify: "true" + {{- end }} + spec: + serviceAccountName: {{ include "devops-info.serviceAccountName" . }} + volumes: + - name: app-config + configMap: + name: {{ include "devops-info.fullname" . }}-file + {{- if .Values.persistence.enabled }} + - name: data + persistentVolumeClaim: + claimName: {{ include "devops-info.fullname" . }}-data + {{- else }} + - name: data + emptyDir: {} + {{- end }} + containers: + - name: app + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.service.targetPort }} + protocol: TCP + env: + - name: HOST + value: {{ .Values.env.HOST | quote }} + - name: PORT + value: {{ .Values.env.PORT | quote }} + - name: RELEASE_ID + value: {{ .Values.env.RELEASE_ID | quote }} + - name: VISITS_FILE + value: {{ .Values.visits.file | quote }} + {{- if or .Values.helmSecret.enabled .Values.config.injectEnv }} + envFrom: + {{- if .Values.helmSecret.enabled }} + - secretRef: + name: {{ include "devops-info.helmSecretName" . }} + {{- end }} + {{- if .Values.config.injectEnv }} + - configMapRef: + name: {{ include "devops-info.fullname" . }}-env + {{- end }} + {{- end }} + volumeMounts: + - name: app-config + mountPath: /config + - name: data + mountPath: /data + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + livenessProbe: + httpGet: + path: {{ .Values.livenessProbe.path }} + port: http + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + readinessProbe: + httpGet: + path: {{ .Values.readinessProbe.path }} + port: http + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + resources: + {{- toYaml .Values.resources | nindent 12 }} +{{- end }} diff --git a/lab12c/k8s/devops-info/templates/pvc.yaml b/lab12c/k8s/devops-info/templates/pvc.yaml new file mode 100644 index 0000000000..72ae9abc7e --- /dev/null +++ b/lab12c/k8s/devops-info/templates/pvc.yaml @@ -0,0 +1,17 @@ +{{- if .Values.persistence.enabled }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "devops-info.fullname" . }}-data + labels: + {{- include "devops-info.labels" . | nindent 4 }} +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.persistence.size }} + {{- if .Values.persistence.storageClass }} + storageClassName: {{ .Values.persistence.storageClass | quote }} + {{- end }} +{{- end }} diff --git a/lab12c/k8s/devops-info/templates/rollout.yaml b/lab12c/k8s/devops-info/templates/rollout.yaml new file mode 100644 index 0000000000..fd0727e132 --- /dev/null +++ b/lab12c/k8s/devops-info/templates/rollout.yaml @@ -0,0 +1,107 @@ +{{- if .Values.rollouts.enabled }} +apiVersion: argoproj.io/v1alpha1 +kind: Rollout +metadata: + name: {{ include "devops-info.fullname" . }} + labels: + {{- include "devops-info.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "devops-info.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "devops-info.labels" . | nindent 8 }} + {{- if .Values.vault.injector.enabled }} + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: {{ .Values.vault.role | quote }} + vault.hashicorp.com/agent-inject-secret-vaultconfig: {{ .Values.vault.secretPath | quote }} + vault.hashicorp.com/service: {{ .Values.vault.serviceAddr | quote }} + vault.hashicorp.com/tls-skip-verify: "true" + {{- end }} + spec: + serviceAccountName: {{ include "devops-info.serviceAccountName" . }} + volumes: + - name: app-config + configMap: + name: {{ include "devops-info.fullname" . }}-file + {{- if .Values.persistence.enabled }} + - name: data + persistentVolumeClaim: + claimName: {{ include "devops-info.fullname" . }}-data + {{- else }} + - name: data + emptyDir: {} + {{- end }} + containers: + - name: app + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.service.targetPort }} + protocol: TCP + env: + - name: HOST + value: {{ .Values.env.HOST | quote }} + - name: PORT + value: {{ .Values.env.PORT | quote }} + - name: RELEASE_ID + value: {{ .Values.env.RELEASE_ID | quote }} + - name: VISITS_FILE + value: {{ .Values.visits.file | quote }} + {{- if or .Values.helmSecret.enabled .Values.config.injectEnv }} + envFrom: + {{- if .Values.helmSecret.enabled }} + - secretRef: + name: {{ include "devops-info.helmSecretName" . }} + {{- end }} + {{- if .Values.config.injectEnv }} + - configMapRef: + name: {{ include "devops-info.fullname" . }}-env + {{- end }} + {{- end }} + volumeMounts: + - name: app-config + mountPath: /config + - name: data + mountPath: /data + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + livenessProbe: + httpGet: + path: {{ .Values.livenessProbe.path }} + port: http + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + readinessProbe: + httpGet: + path: {{ .Values.readinessProbe.path }} + port: http + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + strategy: + {{- if eq .Values.rollouts.strategy "blueGreen" }} + blueGreen: + activeService: {{ include "devops-info.fullname" . }} + previewService: {{ include "devops-info.fullname" . }}-preview + autoPromotionEnabled: {{ .Values.rollouts.blueGreen.autoPromotionEnabled }} + {{- if .Values.rollouts.blueGreen.autoPromotionSeconds }} + autoPromotionSeconds: {{ .Values.rollouts.blueGreen.autoPromotionSeconds }} + {{- end }} + scaleDownDelaySeconds: {{ .Values.rollouts.blueGreen.scaleDownDelaySeconds }} + {{- else }} + canary: + steps: + {{- toYaml .Values.rollouts.canary.steps | nindent 8 }} + {{- end }} +{{- end }} diff --git a/lab12c/k8s/devops-info/templates/secrets.yaml b/lab12c/k8s/devops-info/templates/secrets.yaml new file mode 100644 index 0000000000..86ff891444 --- /dev/null +++ b/lab12c/k8s/devops-info/templates/secrets.yaml @@ -0,0 +1,12 @@ +{{- if .Values.helmSecret.enabled }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "devops-info.helmSecretName" . }} + labels: + {{- include "devops-info.labels" . | nindent 4 }} +type: Opaque +stringData: + username: {{ .Values.helmSecret.username | quote }} + password: {{ .Values.helmSecret.password | quote }} +{{- end }} diff --git a/lab12c/k8s/devops-info/templates/service-preview.yaml b/lab12c/k8s/devops-info/templates/service-preview.yaml new file mode 100644 index 0000000000..d880b331d8 --- /dev/null +++ b/lab12c/k8s/devops-info/templates/service-preview.yaml @@ -0,0 +1,17 @@ +{{- if and .Values.rollouts.enabled (eq .Values.rollouts.strategy "blueGreen") }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "devops-info.fullname" . }}-preview + labels: + {{- include "devops-info.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: {{ .Values.service.targetPort }} + protocol: TCP + name: http + selector: + {{- include "devops-info.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/lab12c/k8s/devops-info/templates/service.yaml b/lab12c/k8s/devops-info/templates/service.yaml new file mode 100644 index 0000000000..40097f15b3 --- /dev/null +++ b/lab12c/k8s/devops-info/templates/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "devops-info.fullname" . }} + labels: + {{- include "devops-info.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: {{ .Values.service.targetPort }} + protocol: TCP + name: http + {{- if and (eq .Values.service.type "NodePort") .Values.service.nodePort }} + nodePort: {{ .Values.service.nodePort }} + {{- end }} + selector: + {{- include "devops-info.selectorLabels" . | nindent 4 }} diff --git a/lab12c/k8s/devops-info/templates/serviceaccount.yaml b/lab12c/k8s/devops-info/templates/serviceaccount.yaml new file mode 100644 index 0000000000..9e91578eba --- /dev/null +++ b/lab12c/k8s/devops-info/templates/serviceaccount.yaml @@ -0,0 +1,8 @@ +{{- if .Values.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "devops-info.serviceAccountName" . }} + labels: + {{- include "devops-info.labels" . | nindent 4 }} +{{- end }} diff --git a/lab12c/k8s/devops-info/values-bluegreen.yaml b/lab12c/k8s/devops-info/values-bluegreen.yaml new file mode 100644 index 0000000000..a551df57dd --- /dev/null +++ b/lab12c/k8s/devops-info/values-bluegreen.yaml @@ -0,0 +1,7 @@ +rollouts: + enabled: true + strategy: blueGreen + blueGreen: + autoPromotionEnabled: false + autoPromotionSeconds: null + scaleDownDelaySeconds: 30 diff --git a/lab12c/k8s/devops-info/values-dev.yaml b/lab12c/k8s/devops-info/values-dev.yaml new file mode 100644 index 0000000000..8e24124da5 --- /dev/null +++ b/lab12c/k8s/devops-info/values-dev.yaml @@ -0,0 +1,28 @@ +replicaCount: 1 + +service: + type: NodePort + # Use 30081 if another chart (e.g. lab11) already uses 30080 on the same cluster + nodePort: 30081 + +env: + RELEASE_ID: "dev" + +config: + environment: "development" + +resources: + requests: + cpu: "50m" + memory: "64Mi" + limits: + cpu: "200m" + memory: "128Mi" + +vault: + injector: + enabled: false + +rollouts: + enabled: true + strategy: canary diff --git a/lab12c/k8s/devops-info/values-prod.yaml b/lab12c/k8s/devops-info/values-prod.yaml new file mode 100644 index 0000000000..5deb43560b --- /dev/null +++ b/lab12c/k8s/devops-info/values-prod.yaml @@ -0,0 +1,32 @@ +# ReadWriteOnce PVC: keep a single replica or use shared storage (RWX) instead. +replicaCount: 1 + +service: + type: LoadBalancer + nodePort: null + +env: + RELEASE_ID: "prod" + +config: + environment: "production" + +resources: + requests: + cpu: "150m" + memory: "192Mi" + limits: + cpu: "500m" + memory: "512Mi" + +vault: + injector: + enabled: false + +rollouts: + enabled: true + strategy: blueGreen + blueGreen: + autoPromotionEnabled: false + autoPromotionSeconds: null + scaleDownDelaySeconds: 30 diff --git a/lab12c/k8s/devops-info/values.yaml b/lab12c/k8s/devops-info/values.yaml new file mode 100644 index 0000000000..d463589652 --- /dev/null +++ b/lab12c/k8s/devops-info/values.yaml @@ -0,0 +1,113 @@ +replicaCount: 1 + +nameOverride: "" +fullnameOverride: "" + +serviceAccount: + create: true + name: "" + +image: + repository: tsixphoenix/devops-info-python + tag: "lab12" + pullPolicy: IfNotPresent + +service: + type: NodePort + port: 80 + targetPort: 5000 + nodePort: 30080 + +env: + HOST: "0.0.0.0" + PORT: "5000" + RELEASE_ID: "lab12" + +visits: + file: "/data/visits" + +config: + appName: "devops-info-service" + environment: "development" + logLevel: "info" + injectEnv: true + +helmSecret: + enabled: false + username: "lab12-helm-demo-user" + password: "lab12-helm-demo-password" + +persistence: + enabled: true + size: 100Mi + storageClass: "" + +resources: + requests: + cpu: "100m" + memory: "128Mi" + limits: + cpu: "300m" + memory: "256Mi" + +livenessProbe: + path: /health + initialDelaySeconds: 15 + periodSeconds: 10 + timeoutSeconds: 2 + failureThreshold: 3 + +readinessProbe: + path: /health + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 2 + failureThreshold: 3 + +securityContext: + runAsNonRoot: true + runAsUser: 10001 + allowPrivilegeEscalation: false + readOnlyRootFilesystem: false + +vault: + injector: + enabled: false + role: "devops-info" + secretPath: "secret/data/devops-info/config" + secretFileName: "vault-config" + serviceAddr: "http://vault.vault.svc:8200" + +hooks: + image: busybox:1.36 + preInstall: + enabled: false + postInstall: + enabled: false + +rollouts: + enabled: true + strategy: canary + canary: + steps: + - setWeight: 20 + - pause: {} + - setWeight: 40 + - pause: + duration: 30s + - setWeight: 60 + - pause: + duration: 30s + - setWeight: 80 + - pause: + duration: 30s + - setWeight: 100 + blueGreen: + autoPromotionEnabled: false + autoPromotionSeconds: null + scaleDownDelaySeconds: 30 + analysis: + enabled: true + interval: 10s + count: 3 + failureLimit: 1 diff --git a/lab13c/docs/1.jpg b/lab13c/docs/1.jpg new file mode 100644 index 0000000000..f6df5383df Binary files /dev/null and b/lab13c/docs/1.jpg differ diff --git a/lab13c/docs/2.jpg b/lab13c/docs/2.jpg new file mode 100644 index 0000000000..c693e272be Binary files /dev/null and b/lab13c/docs/2.jpg differ diff --git a/lab13c/docs/3.jpg b/lab13c/docs/3.jpg new file mode 100644 index 0000000000..0ef73985f8 Binary files /dev/null and b/lab13c/docs/3.jpg differ diff --git a/lab13c/k8s/ARGOCD.md b/lab13c/k8s/ARGOCD.md new file mode 100644 index 0000000000..a1a3091acf --- /dev/null +++ b/lab13c/k8s/ARGOCD.md @@ -0,0 +1,140 @@ +# Lab 13: Argo CD + +No ApplicationSet bonus in this repo. + +Helm chart lives at `lab12c/k8s/devops-info` in [DevOps-CC](https://github.com/TsixPhoenix/DevOps-CC). Argo `Application` YAML is under `lab13c/k8s/argocd/`. + +Applications use `targetRevision: lab12` because that branch already has the chart on GitHub. After you merge or push chart updates elsewhere, change `targetRevision` in all three manifests to match. + +## Install (Helm) + +```powershell +helm repo add argo https://argoproj.github.io/argo-helm +helm repo update +kubectl create namespace argocd +helm upgrade --install argocd argo/argo-cd -n argocd --version 7.7.16 ` + --set configs.params.server.insecure=true ` + --set server.extraArgs="{--insecure}" ` + --wait --timeout 10m +``` + +Check pods: `kubectl get pods -n argocd` (everything should be Running or Completed). + +## UI + +Insecure setup for kind only. Forward port 80 on the Service: + +```powershell +kubectl port-forward svc/argocd-server -n argocd 8080:80 +``` + +Browser: `http://localhost:8080`. User `admin`. Password: + +```powershell +$pw = kubectl -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" +[Text.Encoding]::UTF8.GetString([Convert]::FromBase64String($pw)) +``` + +## CLI (Windows) + +Grab `argocd-windows-amd64.exe` from [releases](https://github.com/argoproj/argo-cd/releases). With another forward, e.g. `18080:80`: + +```powershell +kubectl port-forward svc/argocd-server -n argocd 18080:80 +argocd login localhost:18080 --username admin --password "" --plaintext +argocd app list --plaintext --server localhost:18080 +``` + +## Application files + +| File | Namespace | Values | Sync | +|------|-----------|--------|------| +| `application.yaml` | default | `values.yaml` | manual | +| `application-dev.yaml` | dev | `values-dev.yaml` | automated, prune, selfHeal | +| `application-prod.yaml` | prod | `values-prod.yaml` | manual | + +```powershell +kubectl apply -f lab13c/k8s/argocd/application.yaml +kubectl apply -f lab13c/k8s/argocd/application-dev.yaml +kubectl apply -f lab13c/k8s/argocd/application-prod.yaml +``` + +Sync the two manual apps after CLI login: + +```text +argocd app sync devops-info --plaintext --server localhost:18080 +argocd app sync devops-info-prod --plaintext --server localhost:18080 +``` + +`devops-info-dev` syncs on its own. + +Image `tsixphoenix/devops-info-python:lab12` is often missing on Docker Hub. Build and load into kind: + +```powershell +docker build -t tsixphoenix/devops-info-python:lab12 .\lab12c\app_python +kind load docker-image tsixphoenix/devops-info-python:lab12 --name lab11 +``` + +## Environments + +dev: one replica, smaller resources, NodePort 30081, `RELEASE_ID=dev`, auto-sync + selfHeal. + +prod: one replica, PVC RWO, bigger limits, Service type LoadBalancer in values (on kind external IP stays pending; pod still runs), `RELEASE_ID=prod`, manual sync. + +default (single app from `application.yaml`): `values.yaml`, NodePort 30080, manual sync. + +Prod stays manual so someone clicks Sync after reviewing the diff. + +## Self-heal and drift (dev) + +Scaling `devops-info-dev` to 5 replicas was reverted back to 1 in about 20 seconds with selfHeal on. + +Deleting a pod in dev gets you a new pod from the ReplicaSet. That is normal Kubernetes behavior, not Argo CD fixing Git drift. + +Changing `values-dev.yaml` in Git (e.g. `replicaCount`), committing, and pushing to `lab12` makes the dev app go OutOfSync within a few minutes (default poll ~3 min), then it auto-syncs. Prod shows OutOfSync until you sync it manually. + +Extra labels on the Deployment may stick until the next comparison; replica count is the clean demo. `argocd app diff devops-info-dev` shows drift. + +Kubernetes keeps replica counts for a Deployment. Argo CD reapplies the full desired state from Git on sync/selfHeal. Poll interval is configurable; default is on the order of a few minutes. + +## Run output (kind, Apr 2026) + +Applications: + +```text +NAME SYNC STATUS HEALTH STATUS +devops-info Synced Healthy +devops-info-dev Synced Healthy +devops-info-prod Synced Progressing +``` + +On kind, prod can sit at Progressing when the Service is LoadBalancer with no external IP; workloads were still Running. + +dev: + +```text +deployment.apps/devops-info-dev 1/1 +service/devops-info-dev NodePort 80:30081/TCP +``` + +prod: + +```text +deployment.apps/devops-info-prod 1/1 +service/devops-info-prod LoadBalancer 80:32607/TCP EXTERNAL-IP pending +``` + +default: + +```text +deployment.apps/devops-info 1/1 +service/devops-info NodePort 80:30080/TCP +``` + +Scale test: `kubectl scale deployment devops-info-dev -n dev --replicas=5` then back to 1 replica. + +## 8. UI screenshots + +All screenshots in the folder docs/ + +--- diff --git a/lab13c/k8s/ROLLOUTS.md b/lab13c/k8s/ROLLOUTS.md new file mode 100644 index 0000000000..6d496d6730 --- /dev/null +++ b/lab13c/k8s/ROLLOUTS.md @@ -0,0 +1,221 @@ +# Lab 14: Argo Rollouts Progressive Delivery + +Helm chart source for this lab: `lab12c/k8s/devops-info`. + +This lab replaces a classic `Deployment` with Argo `Rollout` and adds: +- canary traffic shifting with pauses and manual promotion; +- blue-green release flow with preview service; +- automated health analysis for rollback decisions. + +## 1) Argo Rollouts setup + +### Install controller + +```powershell +kubectl create namespace argo-rollouts +kubectl apply -n argo-rollouts -f https://github.com/argoproj/argo-rollouts/releases/latest/download/install.yaml +kubectl get pods -n argo-rollouts +``` + +Expected: controller pod is `Running`. + +### Install kubectl plugin (Windows PowerShell) + +```powershell +$version = (Invoke-RestMethod https://api.github.com/repos/argoproj/argo-rollouts/releases/latest).tag_name +Invoke-WebRequest -Uri "https://github.com/argoproj/argo-rollouts/releases/download/$version/kubectl-argo-rollouts-windows-amd64.exe" -OutFile "$env:USERPROFILE\kubectl-argo-rollouts.exe" +[Environment]::SetEnvironmentVariable("Path", $env:Path + ";$env:USERPROFILE", [EnvironmentVariableTarget]::User) +``` + +Restart terminal and verify: + +```powershell +kubectl argo rollouts version +``` + +### Install dashboard + +```powershell +kubectl apply -n argo-rollouts -f https://github.com/argoproj/argo-rollouts/releases/latest/download/dashboard-install.yaml +kubectl port-forward svc/argo-rollouts-dashboard -n argo-rollouts 3100:3100 +``` + +Open: `http://localhost:3100`. + +## 2) Rollout vs Deployment (key differences) + +- `kind` changes from `Deployment` to `Rollout`. +- `spec.strategy` supports advanced modes (`canary`, `blueGreen`), not only rolling update knobs. +- Rollouts can pause, require manual promotion, and run metric checks (`AnalysisTemplate`). +- Rollouts support explicit abort/retry flow for safer rollback handling. + +## 3) Canary deployment implementation + +### What was changed + +- Added template `templates/rollout.yaml` (enabled by `rollouts.enabled`). +- Kept legacy `templates/deployment.yaml` behind guard `if not .Values.rollouts.enabled`. +- Default strategy in `values.yaml` is `canary`. +- Canary progression: + - 20% -> manual pause + - 40% -> pause 30s + - 60% -> pause 30s + - 80% -> pause 30s + - 100% + +### Deploy canary + +```powershell +helm upgrade --install devops-info .\lab12c\k8s\devops-info -n default --create-namespace +kubectl argo rollouts get rollout devops-info -n default -w +``` + +Trigger new revision: + +```powershell +helm upgrade --install devops-info .\lab12c\k8s\devops-info -n default ` + --set image.tag=lab14-canary-1 ` + --set env.RELEASE_ID=lab14-canary-1 +``` + +Promote first manual pause: + +```powershell +kubectl argo rollouts promote devops-info -n default +``` + +Abort rollout (rollback test): + +```powershell +kubectl argo rollouts abort devops-info -n default +kubectl argo rollouts get rollout devops-info -n default +``` + +Retry aborted rollout: + +```powershell +kubectl argo rollouts retry rollout devops-info -n default +``` + +## 4) Blue-green deployment implementation + +### What was changed + +- Added preview service template: `templates/service-preview.yaml`. +- Added blue-green values profile: `values-bluegreen.yaml`. +- `values-prod.yaml` also uses `rollouts.strategy=blueGreen`. +- Blue-green config uses: + - `activeService: ` + - `previewService: -preview` + - `autoPromotionEnabled: false` (manual cutover) + +### Deploy blue-green + +```powershell +helm upgrade --install devops-info-bg .\lab12c\k8s\devops-info -n default ` + -f .\lab12c\k8s\devops-info\values-bluegreen.yaml ` + --set env.RELEASE_ID=lab14-bg-blue +``` + +Trigger green revision: + +```powershell +helm upgrade --install devops-info-bg .\lab12c\k8s\devops-info -n default ` + -f .\lab12c\k8s\devops-info\values-bluegreen.yaml ` + --set image.tag=lab14-bg-green ` + --set env.RELEASE_ID=lab14-bg-green +``` + +Port-forward active and preview: + +```powershell +kubectl port-forward svc/devops-info-bg -n default 8080:80 +kubectl port-forward svc/devops-info-bg-preview -n default 8081:80 +``` + +Promote preview to active: + +```powershell +kubectl argo rollouts promote devops-info-bg -n default +``` + +Instant rollback: + +```powershell +kubectl argo rollouts undo devops-info-bg -n default +``` + +## 5) Bonus: automated analysis + +### What was changed + +- Added `templates/analysis-template.yaml`. +- Analysis is controlled by `rollouts.analysis.*` values and enabled by default. +- Canary steps can include analysis gate via values. +- Health check uses `/health` and expects JSON `{"status":"healthy"}`. + +Example canary with analysis gate: + +```yaml +rollouts: + canary: + steps: + - setWeight: 20 + - analysis: + templates: + - templateName: devops-info-success-rate + - pause: {} + - setWeight: 100 +``` + +Watch analysis runs: + +```powershell +kubectl get analysisrun -n default +kubectl describe analysisrun -n default +``` + +If analysis fails (failure limit reached), rollout is automatically aborted and traffic stays on stable ReplicaSet. + +## 6) Strategy comparison and recommendations + +### Canary + +- Best for risky changes, gradual verification on real traffic. +- Lower blast radius due to percentage-based rollout. +- Slower release and rollback compared to hard switching. + +Use when: +- new feature logic can fail partially; +- you need progressive exposure and close monitoring. + +### Blue-green + +- Best for fast cutover and fast rollback. +- Easy A/B validation via separate preview service. +- Requires extra resources because both versions run together. + +Use when: +- release must switch instantly; +- preview acceptance tests are mandatory before go-live. + +## 7) Useful CLI commands + +```powershell +kubectl argo rollouts list rollouts -A +kubectl argo rollouts get rollout -n -w +kubectl argo rollouts promote -n +kubectl argo rollouts abort -n +kubectl argo rollouts retry rollout -n +kubectl argo rollouts undo -n +kubectl argo rollouts dashboard +``` + +## 8) Screenshots checklist (dashboard) + +Add screenshots to `lab13c/docs/`: +- canary rollout at 20% paused; +- canary promoted to 40/60/80; +- aborted canary rollback; +- blue-green preview and active before promotion; +- blue-green after promotion. diff --git a/lab13c/k8s/RUNBOOK.md b/lab13c/k8s/RUNBOOK.md new file mode 100644 index 0000000000..a315986c0e --- /dev/null +++ b/lab13c/k8s/RUNBOOK.md @@ -0,0 +1,72 @@ +# Lab 13: kind + Argo CD (copy-paste) + +Repo root is `DevOps-CC`. Cluster name below is `lab11`; change `--name` if yours differs. + +## Cluster and image + +```powershell +kind create cluster --name lab11 --wait 5m +docker build -t tsixphoenix/devops-info-python:lab12 .\lab12c\app_python +kind load docker-image tsixphoenix/devops-info-python:lab12 --name lab11 +``` + +## Argo CD + +```powershell +helm repo add argo https://argoproj.github.io/argo-helm +helm repo update +kubectl create namespace argocd +helm upgrade --install argocd argo/argo-cd -n argocd --version 7.7.16 ` + --set configs.params.server.insecure=true ` + --set server.extraArgs="{--insecure}" ` + --wait --timeout 10m +``` + +## Applications + +```powershell +kubectl apply -f .\lab13c\k8s\argocd\application.yaml +kubectl apply -f .\lab13c\k8s\argocd\application-dev.yaml +kubectl apply -f .\lab13c\k8s\argocd\application-prod.yaml +``` + +`devops-info-dev` syncs alone. For the other two use the UI Sync button or CLI. + +## CLI sync for manual apps + +```powershell +Invoke-WebRequest -Uri "https://github.com/argoproj/argo-cd/releases/download/v2.13.3/argocd-windows-amd64.exe" -OutFile "$env:TEMP\argocd.exe" +$pwB64 = kubectl -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" +$pw = [Text.Encoding]::UTF8.GetString([Convert]::FromBase64String($pwB64)) +Start-Process kubectl -ArgumentList "port-forward","svc/argocd-server","-n","argocd","18080:80" -WindowStyle Hidden +Start-Sleep 5 +& "$env:TEMP\argocd.exe" login localhost:18080 --username admin --password $pw --plaintext +& "$env:TEMP\argocd.exe" app sync devops-info --plaintext --server localhost:18080 +& "$env:TEMP\argocd.exe" app sync devops-info-prod --plaintext --server localhost:18080 +``` + +## Quick checks + +```powershell +kubectl get applications -n argocd +kubectl get pods -n dev +kubectl get pods -n prod +kubectl get pods -n default +``` + +## Self-heal check (dev) + +```powershell +kubectl scale deployment devops-info-dev -n dev --replicas=5 +Start-Sleep 30 +kubectl get deploy -n dev devops-info-dev +``` + +Expect 1 replica again. + +## Tear down + +```powershell +helm uninstall argocd -n argocd +kind delete cluster --name lab11 +``` diff --git a/lab13c/k8s/argocd/application-dev.yaml b/lab13c/k8s/argocd/application-dev.yaml new file mode 100644 index 0000000000..2d1f3470d0 --- /dev/null +++ b/lab13c/k8s/argocd/application-dev.yaml @@ -0,0 +1,24 @@ +# Lab 13 Task 3 — dev: auto-sync + selfHeal + prune +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: devops-info-dev + namespace: argocd +spec: + project: default + source: + repoURL: https://github.com/TsixPhoenix/DevOps-CC.git + targetRevision: lab12 + path: lab12c/k8s/devops-info + helm: + valueFiles: + - values-dev.yaml + destination: + server: https://kubernetes.default.svc + namespace: dev + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/lab13c/k8s/argocd/application-prod.yaml b/lab13c/k8s/argocd/application-prod.yaml new file mode 100644 index 0000000000..8b08118b6b --- /dev/null +++ b/lab13c/k8s/argocd/application-prod.yaml @@ -0,0 +1,21 @@ +# Lab 13 Task 3 — prod: manual sync only (no automated block) +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: devops-info-prod + namespace: argocd +spec: + project: default + source: + repoURL: https://github.com/TsixPhoenix/DevOps-CC.git + targetRevision: lab12 + path: lab12c/k8s/devops-info + helm: + valueFiles: + - values-prod.yaml + destination: + server: https://kubernetes.default.svc + namespace: prod + syncPolicy: + syncOptions: + - CreateNamespace=true diff --git a/lab13c/k8s/argocd/application.yaml b/lab13c/k8s/argocd/application.yaml new file mode 100644 index 0000000000..cf56216046 --- /dev/null +++ b/lab13c/k8s/argocd/application.yaml @@ -0,0 +1,21 @@ +# Lab 13 Task 2 — single Application, manual sync, default namespace +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: devops-info + namespace: argocd +spec: + project: default + source: + repoURL: https://github.com/TsixPhoenix/DevOps-CC.git + targetRevision: lab12 + path: lab12c/k8s/devops-info + helm: + valueFiles: + - values.yaml + destination: + server: https://kubernetes.default.svc + namespace: default + syncPolicy: + syncOptions: + - CreateNamespace=true diff --git a/lab14c/docs/1.jpg b/lab14c/docs/1.jpg new file mode 100644 index 0000000000..5ff4d754a7 Binary files /dev/null and b/lab14c/docs/1.jpg differ diff --git a/lab14c/docs/2.jpg b/lab14c/docs/2.jpg new file mode 100644 index 0000000000..5c920fe2aa Binary files /dev/null and b/lab14c/docs/2.jpg differ diff --git a/lab14c/docs/3.jpg b/lab14c/docs/3.jpg new file mode 100644 index 0000000000..96fe8f4cb2 Binary files /dev/null and b/lab14c/docs/3.jpg differ diff --git a/lab14c/docs/4.jpg b/lab14c/docs/4.jpg new file mode 100644 index 0000000000..665a5d3797 Binary files /dev/null and b/lab14c/docs/4.jpg differ diff --git a/lab14c/docs/5.jpg b/lab14c/docs/5.jpg new file mode 100644 index 0000000000..6cc01c77e2 Binary files /dev/null and b/lab14c/docs/5.jpg differ diff --git a/lab14c/docs/6.jpg b/lab14c/docs/6.jpg new file mode 100644 index 0000000000..7c20a9c31c Binary files /dev/null and b/lab14c/docs/6.jpg differ diff --git a/lab14c/docs/7.jpg b/lab14c/docs/7.jpg new file mode 100644 index 0000000000..6e6be4cb3b Binary files /dev/null and b/lab14c/docs/7.jpg differ diff --git a/lab2c/app_go/.dockerignore b/lab2c/app_go/.dockerignore new file mode 100644 index 0000000000..55a3b7cb13 --- /dev/null +++ b/lab2c/app_go/.dockerignore @@ -0,0 +1,7 @@ +*.exe +*.log +.git/ +.gitignore +.idea/ +.vscode/ +docs/ diff --git a/lab2c/app_go/Dockerfile b/lab2c/app_go/Dockerfile new file mode 100644 index 0000000000..534bac98be --- /dev/null +++ b/lab2c/app_go/Dockerfile @@ -0,0 +1,21 @@ +FROM golang:1.22 AS builder + +WORKDIR /src + +COPY go.mod ./ +RUN go mod download + +COPY main.go ./ +RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o devops-info + +FROM gcr.io/distroless/static-debian12:nonroot + +WORKDIR /app +COPY --from=builder /src/devops-info /app/devops-info + +ENV HOST=0.0.0.0 \ + PORT=5000 + +EXPOSE 5000 + +ENTRYPOINT ["/app/devops-info"] diff --git a/lab2c/app_go/README.md b/lab2c/app_go/README.md new file mode 100644 index 0000000000..36e81eb856 --- /dev/null +++ b/lab2c/app_go/README.md @@ -0,0 +1,41 @@ +# DevOps Info Service (Go) + +## Overview +Compiled-language version of the DevOps info service. It exposes the same two endpoints as the Python app and keeps the JSON response structure consistent. + +## Prerequisites +- Go 1.22+ installed + +## Build and Run +Run directly: +```bash +go run main.go +``` + +Build a binary: +```bash +go build -o devops-info +./devops-info +``` + +Windows build/run: +```bash +go build -o devops-info.exe +.\devops-info.exe +``` + +Custom config examples: +```bash +PORT=8080 go run main.go +HOST=127.0.0.1 PORT=3000 go run main.go +``` + +## API Endpoints +- `GET /` - Service and system information +- `GET /health` - Health check + +## Configuration +| Variable | Default | Description | +| --- | --- | --- | +| `HOST` | `0.0.0.0` | Bind address for the server | +| `PORT` | `5000` | Port to listen on | diff --git a/lab2c/app_go/docs/LAB02.md b/lab2c/app_go/docs/LAB02.md new file mode 100644 index 0000000000..71a016acd1 --- /dev/null +++ b/lab2c/app_go/docs/LAB02.md @@ -0,0 +1,131 @@ +# LAB02 - Docker Containerization (Go, Multi-Stage) + +## Multi-Stage Build Strategy +I used a two-stage Dockerfile: +1. **Builder stage** (`golang:1.22`) to compile the binary. +2. **Runtime stage** (`distroless/static-debian12:nonroot`) to run only the binary. + +This keeps the final image small and removes the Go toolchain from production. + +Dockerfile snippet: +```dockerfile +FROM golang:1.22 AS builder +WORKDIR /src +COPY go.mod ./ +RUN go mod download +COPY main.go ./ +RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o devops-info + +FROM gcr.io/distroless/static-debian12:nonroot +COPY --from=builder /src/devops-info /app/devops-info +ENTRYPOINT ["/app/devops-info"] +``` + + +Image size output: +```text +tsixphoenix/devops-info-go latest 7fc572b1d863 4 minutes ago 17.7MB +``` + +## Build and Run Evidence +Build output: +```text +docker build -t tsixphoenix/devops-info-go:latest . +[+] Building 35.3s (16/16) FINISHED docker:desktop-linux + => [internal] load build definition from Dockerfile 0.0s + => => transferring dockerfile: 396B 0.0s + => [internal] load metadata for gcr.io/distroless/static-debian12:nonroot 1.8s + => [internal] load metadata for docker.io/library/golang:1.22 2.4s + => [auth] library/golang:pull token for registry-1.docker.io 0.0s + => [internal] load .dockerignore 0.0s + => => transferring context: 91B 0.0s + => [builder 1/6] FROM docker.io/library/golang:1.22@sha256:1cf6c45ba39db9fd6db16922041d074a63c935556a05c5ccb62d181034df7f02 22.6s + => => resolve docker.io/library/golang:1.22@sha256:1cf6c45ba39db9fd6db16922041d074a63c935556a05c5ccb62d181034df7f02 0.0s + => => sha256:1451027d3c0ee892b96310c034788bbe22b30b8ea2d075edbd09acfeaaaa439f 126B / 126B 0.4s + => => sha256:afa154b433c7f72db064d19e1bcfa84ee196ad29120328f6bdb2c5fbd7b8eeac 69.36MB / 69.36MB 8.8s + => => sha256:3b7f19923e1501f025b9459750b20f5df37af452482f75b91205f345d1c0e1b5 92.33MB / 92.33MB 10.0s + => => sha256:35af2a7690f2b43e7237d1fae8e3f2350dfb25f3249e9cf65121866f9c56c772 64.39MB / 64.39MB 8.1s + => => sha256:32b550be6cb62359a0f3a96bc0dc289f8b45d097eaad275887f163c6780b4108 24.06MB / 24.06MB 3.8s + => => sha256:a492eee5e55976c7d3feecce4c564aaf6f14fb07fdc5019d06f4154eddc93fde 48.48MB / 48.48MB 5.2s + => => extracting sha256:a492eee5e55976c7d3feecce4c564aaf6f14fb07fdc5019d06f4154eddc93fde 2.3s + => => extracting sha256:32b550be6cb62359a0f3a96bc0dc289f8b45d097eaad275887f163c6780b4108 0.8s + => => extracting sha256:35af2a7690f2b43e7237d1fae8e3f2350dfb25f3249e9cf65121866f9c56c772 2.5s + => => extracting sha256:3b7f19923e1501f025b9459750b20f5df37af452482f75b91205f345d1c0e1b5 2.0s + => => extracting sha256:afa154b433c7f72db064d19e1bcfa84ee196ad29120328f6bdb2c5fbd7b8eeac 5.1s + => => extracting sha256:1451027d3c0ee892b96310c034788bbe22b30b8ea2d075edbd09acfeaaaa439f 0.0s + => => extracting sha256:4f4fb700ef54461cfa02571ae0db9a0dc1e0cdb5577484a6d75e68dc38e8acc1 0.0s + => [internal] load build context 0.1s + => => transferring context: 6.51kB 0.0s + => [stage-1 1/3] FROM gcr.io/distroless/static-debian12:nonroot@sha256:cba10d7abd3e203428e86f5b2d7fd5eb7d8987c387864ae4996cf97191b33764 2.9s + => => resolve gcr.io/distroless/static-debian12:nonroot@sha256:cba10d7abd3e203428e86f5b2d7fd5eb7d8987c387864ae4996cf97191b33764 0.0s + => => sha256:069d1e267530c2e681fbd4d481553b4d05f98082b18fafac86e7f12996dddd0b 131.91kB / 131.91kB 0.6s + => => sha256:dcaa5a89b0ccda4b283e16d0b4d0891cd93d5fe05c6798f7806781a6a2d84354 314B / 314B 0.4s + => => sha256:4aa0ea1413d37a58615488592a0b827ea4b2e48fa5a77cf707d0e35f025e613f 385B / 385B 0.4s + => => sha256:dd64bf2dd177757451a98fcdc999a339c35dee5d9872d8f4dc69c8f3c4dd0112 80B / 80B 0.4s + => => sha256:52630fc75a18675c530ed9eba5f55eca09b03e91bd5bc15307918bbc1a7e7296 162B / 162B 0.3s + => => sha256:3214acf345c0cc6bbdb56b698a41ccdefc624a09d6beb0d38b5de0b2303ecaf4 123B / 123B 0.3s + => => sha256:7c12895b777bcaa8ccae0605b4de635b68fc32d60fa08f421dc3818bf55ee212 188B / 188B 0.3s + => => sha256:2780920e5dbfbe103d03a583ed75345306e572ec5a48cb10361f046767d9f29a 67B / 67B 0.3s + => => sha256:62de241dac5fe19d5f8f4defe034289006ddaa0f2cca735db4718fe2a23e504e 31.24kB / 31.24kB 0.6s + => => sha256:017886f7e1764618ffad6fbd503c42a60076c63adc16355cac80f0f311cae4c9 544.07kB / 544.07kB 0.7s + => => sha256:bfb59b82a9b65e47d485e53b3e815bca3b3e21a095bd0cb88ced9ac0b48062bf 13.36kB / 13.36kB 0.6s + => => sha256:fab8c4b3fa32236a59c44cc504a69b18788d5c17c045691c2d682267ae8cf468 104.22kB / 104.22kB 0.6s + => => extracting sha256:fab8c4b3fa32236a59c44cc504a69b18788d5c17c045691c2d682267ae8cf468 0.1s + => => extracting sha256:bfb59b82a9b65e47d485e53b3e815bca3b3e21a095bd0cb88ced9ac0b48062bf 0.1s + => => extracting sha256:017886f7e1764618ffad6fbd503c42a60076c63adc16355cac80f0f311cae4c9 0.5s + => => extracting sha256:62de241dac5fe19d5f8f4defe034289006ddaa0f2cca735db4718fe2a23e504e 0.1s + => => extracting sha256:2780920e5dbfbe103d03a583ed75345306e572ec5a48cb10361f046767d9f29a 0.0s + => => extracting sha256:7c12895b777bcaa8ccae0605b4de635b68fc32d60fa08f421dc3818bf55ee212 0.0s + => => extracting sha256:3214acf345c0cc6bbdb56b698a41ccdefc624a09d6beb0d38b5de0b2303ecaf4 0.1s + => => extracting sha256:52630fc75a18675c530ed9eba5f55eca09b03e91bd5bc15307918bbc1a7e7296 0.1s + => => extracting sha256:dd64bf2dd177757451a98fcdc999a339c35dee5d9872d8f4dc69c8f3c4dd0112 0.0s + => => extracting sha256:4aa0ea1413d37a58615488592a0b827ea4b2e48fa5a77cf707d0e35f025e613f 0.0s + => => extracting sha256:dcaa5a89b0ccda4b283e16d0b4d0891cd93d5fe05c6798f7806781a6a2d84354 0.0s + => => extracting sha256:069d1e267530c2e681fbd4d481553b4d05f98082b18fafac86e7f12996dddd0b 0.0s + => [stage-1 2/3] WORKDIR /app 0.1s + => [builder 2/6] WORKDIR /src 0.5s + => [builder 3/6] COPY go.mod ./ 0.1s + => [builder 4/6] RUN go mod download 0.5s + => [builder 5/6] COPY main.go ./ 0.1s + => [builder 6/6] RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o devops-info 8.1s + => [stage-1 3/3] COPY --from=builder /src/devops-info /app/devops-info 0.1s + => exporting to image 0.6s + => => exporting layers 0.4s + => => exporting manifest sha256:39177489cedb41b9d9f566a8be5d09c8ffe938f98b590aa0ebb987f1cf38d7a6 0.0s + => => exporting config sha256:d86ea6d9a836253c87a0ac2232aa6f03cdc8198146f9acdba1f3d31c617bca82 0.0s + => => exporting attestation manifest sha256:79e9867f53966cbf5943864985b72aeed88ea8a8349789577aee72d45045e5af 0.0s + => => exporting manifest list sha256:7fc572b1d86304a2634962e06610c7cf4295c4a466b6e52aed34f93550555008 0.0s + => => naming to docker.io/tsixphoenix/devops-info-go:latest 0.0s + => => unpacking to docker.io/tsixphoenix/devops-info-go:latest 0.1s + +``` + +Run output: +```text +docker run --rm -p 5000:5000 --name devops-info-go tsixphoenix/devops-info-go:latest +2026/01/29 12:37:42 Starting DevOps Info Service on 0.0.0.0:5000 +``` + +Endpoint checks: +```text +curl http://localhost:5000/ +{"service":{"name":"devops-info-service","version":"1.0.0","description":"DevOps course info service","framework":"Go net/http"},"system":{"hostname":"50a30efde177","platform":"linux","platform_version":"Distroless","architecture":"amd64","cpu_count":12,"python_version":"go1.22.12"},"runtime":{"uptime_seconds":79,"uptime_human":"0 hours, 1 minute","current_time":"2026-01-29T12:39:02Z","timezone":"UTC"},"request":{"client_ip":"172.17.0.1","user_agent":"curl/8.16.0","method":"GET","path":"/"},"endpoints":[{"path":"/","method":"GET","description":"Service information"},{"path":"/health","method":"GET","description":"Health check"}]} + +curl http://localhost:5000/health +{"status":"healthy","timestamp":"2026-01-29T12:39:31Z","uptime_seconds":108} + +2026/01/29 12:39:02 Request: GET / +2026/01/29 12:39:02 Response: GET / -> 200 (418.191µs) +2026/01/29 12:39:31 Request: GET /health +2026/01/29 12:39:31 Response: GET /health -> 200 (114.664µs) +``` + +## Technical Analysis +- The builder stage contains the full Go toolchain; the runtime stage does not. +- If I shipped the builder stage, the image would be much larger and include tools that should not be in production. +- A static binary lets me use a minimal base image. +- The final image runs as a non-root user, which reduces risk. + +## Challenges and Solutions +- I made sure the binary was static (CGO disabled) so it works in a minimal runtime image. +- Distroless images do not include a shell, so debugging is done in the builder stage, not in the runtime image. diff --git a/lab2c/app_go/go.mod b/lab2c/app_go/go.mod new file mode 100644 index 0000000000..7a7fcedd1c --- /dev/null +++ b/lab2c/app_go/go.mod @@ -0,0 +1,3 @@ +module devops-info-service + +go 1.22 diff --git a/lab2c/app_go/main.go b/lab2c/app_go/main.go new file mode 100644 index 0000000000..2abcd3938a --- /dev/null +++ b/lab2c/app_go/main.go @@ -0,0 +1,257 @@ +package main + +import ( + "encoding/json" + "fmt" + "log" + "net" + "net/http" + "os" + "runtime" + "strings" + "time" +) + +type Service struct { + Name string `json:"name"` + Version string `json:"version"` + Description string `json:"description"` + Framework string `json:"framework"` +} + +type System struct { + Hostname string `json:"hostname"` + Platform string `json:"platform"` + PlatformVersion string `json:"platform_version"` + Architecture string `json:"architecture"` + CPUCount int `json:"cpu_count"` + PythonVersion string `json:"python_version"` +} + +type Runtime struct { + UptimeSeconds int `json:"uptime_seconds"` + UptimeHuman string `json:"uptime_human"` + CurrentTime string `json:"current_time"` + Timezone string `json:"timezone"` +} + +type RequestInfo struct { + ClientIP string `json:"client_ip"` + UserAgent string `json:"user_agent"` + Method string `json:"method"` + Path string `json:"path"` +} + +type Endpoint struct { + Path string `json:"path"` + Method string `json:"method"` + Description string `json:"description"` +} + +type Response struct { + Service Service `json:"service"` + System System `json:"system"` + Runtime Runtime `json:"runtime"` + Request RequestInfo `json:"request"` + Endpoints []Endpoint `json:"endpoints"` +} + +var startTime = time.Now().UTC() + +func main() { + host := getenv("HOST", "0.0.0.0") + port := getenv("PORT", "5000") + addr := net.JoinHostPort(host, port) + + mux := http.NewServeMux() + mux.HandleFunc("/", rootHandler) + mux.HandleFunc("/health", healthHandler) + + handler := recoverMiddleware(loggingMiddleware(mux)) + + server := &http.Server{ + Addr: addr, + Handler: handler, + ReadHeaderTimeout: 5 * time.Second, + } + + log.Printf("Starting DevOps Info Service on %s", addr) + if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed { + log.Fatalf("server error: %v", err) + } +} + +func rootHandler(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/" { + writeNotFound(w) + return + } + if r.Method != http.MethodGet { + writeMethodNotAllowed(w) + return + } + + uptimeSeconds, uptimeHuman := getUptime() + now := time.Now().UTC() + + hostname, _ := os.Hostname() + response := Response{ + Service: Service{ + Name: "devops-info-service", + Version: "1.0.0", + Description: "DevOps course info service", + Framework: "Go net/http", + }, + System: System{ + Hostname: hostname, + Platform: runtime.GOOS, + PlatformVersion: getPlatformVersion(), + Architecture: runtime.GOARCH, + CPUCount: runtime.NumCPU(), + PythonVersion: runtime.Version(), + }, + Runtime: Runtime{ + UptimeSeconds: uptimeSeconds, + UptimeHuman: uptimeHuman, + CurrentTime: now.Format(time.RFC3339), + Timezone: "UTC", + }, + Request: RequestInfo{ + ClientIP: getClientIP(r), + UserAgent: r.UserAgent(), + Method: r.Method, + Path: r.URL.Path, + }, + Endpoints: []Endpoint{ + {Path: "/", Method: "GET", Description: "Service information"}, + {Path: "/health", Method: "GET", Description: "Health check"}, + }, + } + + writeJSON(w, http.StatusOK, response) +} + +func healthHandler(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/health" { + writeNotFound(w) + return + } + if r.Method != http.MethodGet { + writeMethodNotAllowed(w) + return + } + + uptimeSeconds, _ := getUptime() + payload := map[string]any{ + "status": "healthy", + "timestamp": time.Now().UTC().Format(time.RFC3339), + "uptime_seconds": uptimeSeconds, + } + + writeJSON(w, http.StatusOK, payload) +} + +func getUptime() (int, string) { + seconds := int(time.Since(startTime).Seconds()) + hours := seconds / 3600 + minutes := (seconds % 3600) / 60 + hourLabel := "hours" + if hours == 1 { + hourLabel = "hour" + } + minuteLabel := "minutes" + if minutes == 1 { + minuteLabel = "minute" + } + return seconds, fmt.Sprintf("%d %s, %d %s", hours, hourLabel, minutes, minuteLabel) +} + +func getClientIP(r *http.Request) string { + if forwarded := r.Header.Get("X-Forwarded-For"); forwarded != "" { + parts := strings.Split(forwarded, ",") + return strings.TrimSpace(parts[0]) + } + host, _, err := net.SplitHostPort(r.RemoteAddr) + if err == nil { + return host + } + return r.RemoteAddr +} + +func getPlatformVersion() string { + if value := os.Getenv("OS"); value != "" { + return value + } + if data, err := os.ReadFile("/etc/os-release"); err == nil { + for _, line := range strings.Split(string(data), "\n") { + if strings.HasPrefix(line, "PRETTY_NAME=") { + return strings.Trim(strings.TrimPrefix(line, "PRETTY_NAME="), "\"") + } + } + } + return "unknown" +} + +func writeJSON(w http.ResponseWriter, status int, payload any) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + if err := json.NewEncoder(w).Encode(payload); err != nil { + log.Printf("json encode error: %v", err) + } +} + +func writeNotFound(w http.ResponseWriter) { + writeJSON(w, http.StatusNotFound, map[string]string{ + "error": "Not Found", + "message": "Endpoint does not exist", + }) +} + +func writeMethodNotAllowed(w http.ResponseWriter) { + writeJSON(w, http.StatusMethodNotAllowed, map[string]string{ + "error": "Method Not Allowed", + "message": "Only GET is supported for this endpoint", + }) +} + +type statusRecorder struct { + http.ResponseWriter + status int +} + +func (recorder *statusRecorder) WriteHeader(code int) { + recorder.status = code + recorder.ResponseWriter.WriteHeader(code) +} + +func loggingMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + recorder := &statusRecorder{ResponseWriter: w, status: http.StatusOK} + start := time.Now() + log.Printf("Request: %s %s", r.Method, r.URL.Path) + next.ServeHTTP(recorder, r) + log.Printf("Response: %s %s -> %d (%s)", r.Method, r.URL.Path, recorder.status, time.Since(start)) + }) +} + +func recoverMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + defer func() { + if err := recover(); err != nil { + log.Printf("panic recovered: %v", err) + writeJSON(w, http.StatusInternalServerError, map[string]string{ + "error": "Internal Server Error", + "message": "An unexpected error occurred", + }) + } + }() + next.ServeHTTP(w, r) + }) +} + +func getenv(key, fallback string) string { + if value := os.Getenv(key); value != "" { + return value + } + return fallback +} diff --git a/lab2c/app_python/.dockerignore b/lab2c/app_python/.dockerignore new file mode 100644 index 0000000000..b7738de7b8 --- /dev/null +++ b/lab2c/app_python/.dockerignore @@ -0,0 +1,12 @@ +__pycache__/ +*.py[cod] +*.log +venv/ +.venv/ +.env +.git/ +.gitignore +.idea/ +.vscode/ +docs/ +tests/ diff --git a/lab2c/app_python/.gitignore b/lab2c/app_python/.gitignore new file mode 100644 index 0000000000..8052e93c8b --- /dev/null +++ b/lab2c/app_python/.gitignore @@ -0,0 +1,14 @@ +__pycache__/ +*.py[cod] +*.log +venv/ +.venv/ +.env + +# IDE +.idea/ +.vscode/ + +# OS +.DS_Store +Thumbs.db diff --git a/lab2c/app_python/Dockerfile b/lab2c/app_python/Dockerfile new file mode 100644 index 0000000000..76219e6c10 --- /dev/null +++ b/lab2c/app_python/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.13-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /app + +RUN useradd -m -u 10001 appuser + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY --chown=appuser:appuser app.py . + +USER appuser + +EXPOSE 5000 + +CMD ["python", "app.py"] diff --git a/lab2c/app_python/README.md b/lab2c/app_python/README.md new file mode 100644 index 0000000000..742a7439f4 --- /dev/null +++ b/lab2c/app_python/README.md @@ -0,0 +1,72 @@ +# DevOps Info Service (FastAPI) + +## Overview +Small service returning system info about the machine it runs on, plus a health check. + +## Prerequisites +- Python 3.11+ +- pip +- (Optional) venv tool + +## Installation +### Windows +```bash +python -m venv venv +.\venv\Scripts\Activate.ps1 +pip install -r requirements.txt +``` + +### macOS/Linux +```bash +python -m venv venv +source venv/bin/activate +pip install -r requirements.txt +``` + +## Running the Application +```bash +python app.py +``` + +Custom config examples: +```bash +PORT=8080 python app.py +HOST=127.0.0.1 PORT=3000 python app.py +``` + +FastAPI docs: +- `http://localhost:/docs` + +## API Endpoints +- `GET /` - Service and system information +- `GET /health` - Health check + +## Configuration +| Variable | Default | Description | +| --- | --- | --- | +| `HOST` | `0.0.0.0` | Bind address for the server | +| `PORT` | `5000` | Port to listen on | +| `DEBUG` | `False` | Enable auto-reload | + +## Docker +Command patterns (replace the placeholders with your values): + +**Build locally** +```bash +docker build -t /: . +``` + +**Run container** +```bash +docker run --rm -p :5000 --name /: +``` + +**Pull from Docker Hub** +```bash +docker pull /: +``` + +Optional env overrides: +```bash +docker run --rm -e PORT=5000 -e HOST=0.0.0.0 -p :5000 /: +``` diff --git a/lab2c/app_python/app.py b/lab2c/app_python/app.py new file mode 100644 index 0000000000..8935b94091 --- /dev/null +++ b/lab2c/app_python/app.py @@ -0,0 +1,158 @@ +""" +DevOps Info Service +FastAPI application module. +""" + +from __future__ import annotations + +import logging +import os +import platform +import socket +from datetime import datetime, timezone + +import uvicorn +from fastapi import FastAPI, Request +from fastapi.responses import JSONResponse +from starlette.exceptions import HTTPException as StarletteHTTPException + +# Config +HOST = os.getenv("HOST", "0.0.0.0") +PORT = int(os.getenv("PORT", "5000")) +DEBUG = os.getenv("DEBUG", "False").lower() == "true" + +SERVICE_NAME = "devops-info-service" +SERVICE_VERSION = "1.0.0" +SERVICE_DESCRIPTION = "DevOps course info service" +SERVICE_FRAMEWORK = "FastAPI" + +START_TIME = datetime.now(timezone.utc) + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger("devops-info-service") + +app = FastAPI( + title="DevOps Info Service", + version=SERVICE_VERSION, + description=SERVICE_DESCRIPTION, +) + + +def _format_uptime(seconds: int) -> str: + hours = seconds // 3600 + minutes = (seconds % 3600) // 60 + hour_label = "hour" if hours == 1 else "hours" + minute_label = "minute" if minutes == 1 else "minutes" + return f"{hours} {hour_label}, {minutes} {minute_label}" + + +def get_uptime() -> dict[str, int | str]: + delta = datetime.now(timezone.utc) - START_TIME + seconds = int(delta.total_seconds()) + return { + "seconds": seconds, + "human": _format_uptime(seconds), + } + + +def get_system_info() -> dict[str, str | int]: + return { + "hostname": socket.gethostname(), + "platform": platform.system(), + "platform_version": platform.release(), + "architecture": platform.machine(), + "cpu_count": os.cpu_count() or 0, + "python_version": platform.python_version(), + } + + +def isoformat_utc(dt: datetime) -> str: + return dt.astimezone(timezone.utc).isoformat().replace("+00:00", "Z") + + +@app.middleware("http") +async def log_requests(request: Request, call_next): + logger.info("Request: %s %s", request.method, request.url.path) + response = await call_next(request) + logger.info("Response: %s %s -> %s", request.method, request.url.path, response.status_code) + return response + + +@app.exception_handler(StarletteHTTPException) +async def http_exception_handler(request: Request, exc: StarletteHTTPException): + if exc.status_code == 404: + return JSONResponse( + status_code=404, + content={ + "error": "Not Found", + "message": "Endpoint does not exist", + }, + ) + return JSONResponse( + status_code=exc.status_code, + content={"error": exc.detail}, + ) + + +@app.exception_handler(Exception) +async def unhandled_exception_handler(request: Request, exc: Exception): + logger.exception("Unhandled error: %s", exc) + return JSONResponse( + status_code=500, + content={ + "error": "Internal Server Error", + "message": "An unexpected error occurred", + }, + ) + + +@app.get("/") +async def root(request: Request): + uptime = get_uptime() + now = datetime.now(timezone.utc) + + response = { + "service": { + "name": SERVICE_NAME, + "version": SERVICE_VERSION, + "description": SERVICE_DESCRIPTION, + "framework": SERVICE_FRAMEWORK, + }, + "system": get_system_info(), + "runtime": { + "uptime_seconds": uptime["seconds"], + "uptime_human": uptime["human"], + "current_time": isoformat_utc(now), + "timezone": "UTC", + }, + "request": { + "client_ip": request.client.host if request.client else "unknown", + "user_agent": request.headers.get("user-agent", "unknown"), + "method": request.method, + "path": request.url.path, + }, + "endpoints": [ + {"path": "/", "method": "GET", "description": "Service information"}, + {"path": "/health", "method": "GET", "description": "Health check"}, + ], + } + + return response + + +@app.get("/health") +async def health(): + uptime = get_uptime() + return { + "status": "healthy", + "timestamp": isoformat_utc(datetime.now(timezone.utc)), + "uptime_seconds": uptime["seconds"], + } + + +if __name__ == "__main__": + logger.info("Starting DevOps Info Service on %s:%s", HOST, PORT) + uvicorn.run("app:app", host=HOST, port=PORT, reload=DEBUG, log_level="info") diff --git a/lab2c/app_python/docs/LAB02.md b/lab2c/app_python/docs/LAB02.md new file mode 100644 index 0000000000..dd91a49278 --- /dev/null +++ b/lab2c/app_python/docs/LAB02.md @@ -0,0 +1,111 @@ +# LAB02 - Docker Containerization (Python) + +## Docker Best Practices Applied +- **Pinned base image**: `python:3.13-slim` keeps the image small and reproducible. +- **Non-root user**: the container runs as `appuser`, so the service does not run as root. +- **Layer caching**: dependencies are installed before copying the app so rebuilds are faster. +- **Minimal copy**: only `requirements.txt` and `app.py` are copied into the image. +- **.dockerignore**: excluded tests, docs, and virtualenvs to keep the build context small. + +Dockerfile snippet: +```dockerfile +FROM python:3.13-slim +WORKDIR /app +RUN useradd -m -u 10001 appuser +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +COPY --chown=appuser:appuser app.py . +USER appuser +``` + +## Image Information and Decisions +- **Base image choice**: `python:3.13-slim` is a good balance of size and compatibility. +- **Final image size**: `` +- **Layer structure**: dependencies are installed in their own layer to benefit from caching. +- **Optimization choices**: small base image, no extra build tools, only required files copied. + +Image size output: +```text +tsixphoenix/devops-info-python beta 04eec5e16beb 5 minutes ago 228MB +``` + +## Build and Run Process +Build output: +```text +docker build -t tsixphoenix/devops-info-python:beta . +[+] Building 16.7s (11/11) FINISHED docker:desktop-linux + => [internal] load build definition from Dockerfile 0.0s + => => transferring dockerfile: 332B 0.0s + => [internal] load metadata for docker.io/library/python:3.13-slim 2.3s + => [internal] load .dockerignore 0.0s + => => transferring context: 133B 0.0s + => [1/6] FROM docker.io/library/python:3.13-slim@sha256:51e1a0a317fdb6e170dc791bbeae63fac5272c82f43958ef74a34e170c6f8b18 2.4s + => => resolve docker.io/library/python:3.13-slim@sha256:51e1a0a317fdb6e170dc791bbeae63fac5272c82f43958ef74a34e170c6f8b18 0.0s + => => sha256:8843ea38a07e15ac1b99c72108fbb492f737032986cc0b65ed351f84e5521879 1.29MB / 1.29MB 0.5s + => => sha256:36b6de65fd8d6bd36071ea9efa7d078ebdc11ecc23d2426ec9c3e9f092ae824d 249B / 249B 0.6s + => => sha256:0bee50492702eb5d822fbcbac8f545a25f5fe173ec8030f57691aefcc283bbc9 11.79MB / 11.79MB 1.5s + => => extracting sha256:8843ea38a07e15ac1b99c72108fbb492f737032986cc0b65ed351f84e5521879 0.3s + => => extracting sha256:0bee50492702eb5d822fbcbac8f545a25f5fe173ec8030f57691aefcc283bbc9 0.8s + => => extracting sha256:36b6de65fd8d6bd36071ea9efa7d078ebdc11ecc23d2426ec9c3e9f092ae824d 0.0s + => [internal] load build context 0.0s + => => transferring context: 4.60kB 0.0s + => [2/6] WORKDIR /app 0.1s + => [3/6] RUN useradd -m -u 10001 appuser 0.6s + => [4/6] COPY requirements.txt . 0.0s + => [5/6] RUN pip install --no-cache-dir -r requirements.txt 8.8s + => [6/6] COPY --chown=appuser:appuser app.py . 0.1s + => exporting to image 2.1s + => => exporting layers 1.4s + => => exporting manifest sha256:89257312508e9a26af1f7400253d9556816a0fc9230a414836bcedb8a4881c86 0.0s + => => exporting config sha256:a7d85cde725e6fdfb1dfbccbb9daadb4138561a5698ac01f5f6e2780b62994f3 0.0s + => => exporting attestation manifest sha256:82c962563c14aaa47813d2f1b62afb9806c83dbb0519256fd9954a50ea14fd3f 0.0s + => => exporting manifest list sha256:04eec5e16beb90a39cdac694238e9c6301410b6fa987d7b7788c03287ed57da0 0.0s + => => naming to docker.io/tsixphoenix/devops-info-python:beta 0.0s + => => unpacking to docker.io/tsixphoenix/devops-info-python:beta +``` + +Run output (container start): +```text +docker run --rm -p 5000:5000 --name devops-info tsixphoenix/devops-info-python:beta +2026-01-29 12:23:57,799 - INFO - Starting DevOps Info Service on 0.0.0.0:5000 +INFO: Started server process [1] +INFO: Waiting for application startup. +INFO: Application startup complete. +INFO: Uvicorn running on http://0.0.0.0:5000 (Press CTRL+C to quit) +``` + +Endpoint checks: +```text +curl http://localhost:5000/ +{"service":{"name":"devops-info-service","version":"1.0.0","description":"DevOps course info service","framework":"FastAPI"},"system":{"hostname":"d65d9dfde3f9","platform":"Linux","platform_version":"6.6.87.2-microsoft-standard-WSL2","architecture":"x86_64","cpu_count":12,"python_version":"3.13.11"},"runtime":{"uptime_seconds":98,"uptime_human":"0 hours, 1 minute","current_time":"2026-01-29T12:25:35.964833Z","timezone":"UTC"},"request":{"client_ip":"172.17.0.1","user_agent":"curl/8.16.0","method":"GET","path":"/"},"endpoints":[{"path":"/","method":"GET","description":"Service information"},{"path":"/health","method":"GET","description":"Health check"}]} + +curl http://localhost:5000/health +{"status":"healthy","timestamp":"2026-01-29T12:25:56.660917Z","uptime_seconds":118} + +2026-01-29 12:25:35,964 - INFO - Request: GET / +2026-01-29 12:25:35,965 - INFO - Response: GET / -> 200 +INFO: 172.17.0.1:54462 - "GET / HTTP/1.1" 200 OK +2026-01-29 12:25:56,659 - INFO - Request: GET /health +2026-01-29 12:25:56,661 - INFO - Response: GET /health -> 200 +INFO: 172.17.0.1:57328 - "GET /health HTTP/1.1" 200 OK +``` + +Docker Hub repository URL: +``` +https://hub.docker.com/repository/docker/tsixphoenix/devops-info-python/general +``` + +Tagging strategy: +``` +version tag +``` + +## Technical Analysis +- The Dockerfile copies `requirements.txt` first so dependency layers are cached between builds. +- If I copied the whole project before installing dependencies, every code change would bust the cache. +- Running as a non-root user reduces risk if a container is compromised. +- `.dockerignore` keeps the build context small, which speeds up the build and reduces image size. + +## Challenges and Solutions +- I verified the app binds to `0.0.0.0` so it is reachable from outside the container. +- I double-checked that only the needed files are copied into the image to avoid bloating it. diff --git a/lab2c/app_python/requirements.txt b/lab2c/app_python/requirements.txt new file mode 100644 index 0000000000..792449289f --- /dev/null +++ b/lab2c/app_python/requirements.txt @@ -0,0 +1,2 @@ +fastapi==0.115.0 +uvicorn[standard]==0.32.0 diff --git a/lab2c/app_python/tests/__init__.py b/lab2c/app_python/tests/__init__.py new file mode 100644 index 0000000000..792d600548 --- /dev/null +++ b/lab2c/app_python/tests/__init__.py @@ -0,0 +1 @@ +# diff --git a/lab3c/app_go/.dockerignore b/lab3c/app_go/.dockerignore new file mode 100644 index 0000000000..55a3b7cb13 --- /dev/null +++ b/lab3c/app_go/.dockerignore @@ -0,0 +1,7 @@ +*.exe +*.log +.git/ +.gitignore +.idea/ +.vscode/ +docs/ diff --git a/lab3c/app_go/Dockerfile b/lab3c/app_go/Dockerfile new file mode 100644 index 0000000000..534bac98be --- /dev/null +++ b/lab3c/app_go/Dockerfile @@ -0,0 +1,21 @@ +FROM golang:1.22 AS builder + +WORKDIR /src + +COPY go.mod ./ +RUN go mod download + +COPY main.go ./ +RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o devops-info + +FROM gcr.io/distroless/static-debian12:nonroot + +WORKDIR /app +COPY --from=builder /src/devops-info /app/devops-info + +ENV HOST=0.0.0.0 \ + PORT=5000 + +EXPOSE 5000 + +ENTRYPOINT ["/app/devops-info"] diff --git a/lab3c/app_go/README.md b/lab3c/app_go/README.md new file mode 100644 index 0000000000..36e81eb856 --- /dev/null +++ b/lab3c/app_go/README.md @@ -0,0 +1,41 @@ +# DevOps Info Service (Go) + +## Overview +Compiled-language version of the DevOps info service. It exposes the same two endpoints as the Python app and keeps the JSON response structure consistent. + +## Prerequisites +- Go 1.22+ installed + +## Build and Run +Run directly: +```bash +go run main.go +``` + +Build a binary: +```bash +go build -o devops-info +./devops-info +``` + +Windows build/run: +```bash +go build -o devops-info.exe +.\devops-info.exe +``` + +Custom config examples: +```bash +PORT=8080 go run main.go +HOST=127.0.0.1 PORT=3000 go run main.go +``` + +## API Endpoints +- `GET /` - Service and system information +- `GET /health` - Health check + +## Configuration +| Variable | Default | Description | +| --- | --- | --- | +| `HOST` | `0.0.0.0` | Bind address for the server | +| `PORT` | `5000` | Port to listen on | diff --git a/lab3c/app_go/docs/LAB03.md b/lab3c/app_go/docs/LAB03.md new file mode 100644 index 0000000000..2ae68b20de --- /dev/null +++ b/lab3c/app_go/docs/LAB03.md @@ -0,0 +1,16 @@ +# LAB03 - CI/CD (Go Bonus) + +## Multi-App CI Summary +I added a separate workflow for the Go app with its own path filters. This keeps Python and Go CI independent and avoids running jobs that are not needed. + +## Path Filters +- Go workflow runs only when `lab3c/app_go/**` or its workflow file changes. +- Python workflow runs only when `lab3c/app_python/**` or its workflow file changes. + +## Workflow Evidence +- **Go workflow run:** +- **Docker image on Docker Hub:** + +## Notes +- Go CI uses `go test` and a basic lint step. +- Docker builds use the same CalVer tag scheme as Python. diff --git a/lab3c/app_go/go.mod b/lab3c/app_go/go.mod new file mode 100644 index 0000000000..7a7fcedd1c --- /dev/null +++ b/lab3c/app_go/go.mod @@ -0,0 +1,3 @@ +module devops-info-service + +go 1.22 diff --git a/lab3c/app_go/main.go b/lab3c/app_go/main.go new file mode 100644 index 0000000000..2abcd3938a --- /dev/null +++ b/lab3c/app_go/main.go @@ -0,0 +1,257 @@ +package main + +import ( + "encoding/json" + "fmt" + "log" + "net" + "net/http" + "os" + "runtime" + "strings" + "time" +) + +type Service struct { + Name string `json:"name"` + Version string `json:"version"` + Description string `json:"description"` + Framework string `json:"framework"` +} + +type System struct { + Hostname string `json:"hostname"` + Platform string `json:"platform"` + PlatformVersion string `json:"platform_version"` + Architecture string `json:"architecture"` + CPUCount int `json:"cpu_count"` + PythonVersion string `json:"python_version"` +} + +type Runtime struct { + UptimeSeconds int `json:"uptime_seconds"` + UptimeHuman string `json:"uptime_human"` + CurrentTime string `json:"current_time"` + Timezone string `json:"timezone"` +} + +type RequestInfo struct { + ClientIP string `json:"client_ip"` + UserAgent string `json:"user_agent"` + Method string `json:"method"` + Path string `json:"path"` +} + +type Endpoint struct { + Path string `json:"path"` + Method string `json:"method"` + Description string `json:"description"` +} + +type Response struct { + Service Service `json:"service"` + System System `json:"system"` + Runtime Runtime `json:"runtime"` + Request RequestInfo `json:"request"` + Endpoints []Endpoint `json:"endpoints"` +} + +var startTime = time.Now().UTC() + +func main() { + host := getenv("HOST", "0.0.0.0") + port := getenv("PORT", "5000") + addr := net.JoinHostPort(host, port) + + mux := http.NewServeMux() + mux.HandleFunc("/", rootHandler) + mux.HandleFunc("/health", healthHandler) + + handler := recoverMiddleware(loggingMiddleware(mux)) + + server := &http.Server{ + Addr: addr, + Handler: handler, + ReadHeaderTimeout: 5 * time.Second, + } + + log.Printf("Starting DevOps Info Service on %s", addr) + if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed { + log.Fatalf("server error: %v", err) + } +} + +func rootHandler(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/" { + writeNotFound(w) + return + } + if r.Method != http.MethodGet { + writeMethodNotAllowed(w) + return + } + + uptimeSeconds, uptimeHuman := getUptime() + now := time.Now().UTC() + + hostname, _ := os.Hostname() + response := Response{ + Service: Service{ + Name: "devops-info-service", + Version: "1.0.0", + Description: "DevOps course info service", + Framework: "Go net/http", + }, + System: System{ + Hostname: hostname, + Platform: runtime.GOOS, + PlatformVersion: getPlatformVersion(), + Architecture: runtime.GOARCH, + CPUCount: runtime.NumCPU(), + PythonVersion: runtime.Version(), + }, + Runtime: Runtime{ + UptimeSeconds: uptimeSeconds, + UptimeHuman: uptimeHuman, + CurrentTime: now.Format(time.RFC3339), + Timezone: "UTC", + }, + Request: RequestInfo{ + ClientIP: getClientIP(r), + UserAgent: r.UserAgent(), + Method: r.Method, + Path: r.URL.Path, + }, + Endpoints: []Endpoint{ + {Path: "/", Method: "GET", Description: "Service information"}, + {Path: "/health", Method: "GET", Description: "Health check"}, + }, + } + + writeJSON(w, http.StatusOK, response) +} + +func healthHandler(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/health" { + writeNotFound(w) + return + } + if r.Method != http.MethodGet { + writeMethodNotAllowed(w) + return + } + + uptimeSeconds, _ := getUptime() + payload := map[string]any{ + "status": "healthy", + "timestamp": time.Now().UTC().Format(time.RFC3339), + "uptime_seconds": uptimeSeconds, + } + + writeJSON(w, http.StatusOK, payload) +} + +func getUptime() (int, string) { + seconds := int(time.Since(startTime).Seconds()) + hours := seconds / 3600 + minutes := (seconds % 3600) / 60 + hourLabel := "hours" + if hours == 1 { + hourLabel = "hour" + } + minuteLabel := "minutes" + if minutes == 1 { + minuteLabel = "minute" + } + return seconds, fmt.Sprintf("%d %s, %d %s", hours, hourLabel, minutes, minuteLabel) +} + +func getClientIP(r *http.Request) string { + if forwarded := r.Header.Get("X-Forwarded-For"); forwarded != "" { + parts := strings.Split(forwarded, ",") + return strings.TrimSpace(parts[0]) + } + host, _, err := net.SplitHostPort(r.RemoteAddr) + if err == nil { + return host + } + return r.RemoteAddr +} + +func getPlatformVersion() string { + if value := os.Getenv("OS"); value != "" { + return value + } + if data, err := os.ReadFile("/etc/os-release"); err == nil { + for _, line := range strings.Split(string(data), "\n") { + if strings.HasPrefix(line, "PRETTY_NAME=") { + return strings.Trim(strings.TrimPrefix(line, "PRETTY_NAME="), "\"") + } + } + } + return "unknown" +} + +func writeJSON(w http.ResponseWriter, status int, payload any) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + if err := json.NewEncoder(w).Encode(payload); err != nil { + log.Printf("json encode error: %v", err) + } +} + +func writeNotFound(w http.ResponseWriter) { + writeJSON(w, http.StatusNotFound, map[string]string{ + "error": "Not Found", + "message": "Endpoint does not exist", + }) +} + +func writeMethodNotAllowed(w http.ResponseWriter) { + writeJSON(w, http.StatusMethodNotAllowed, map[string]string{ + "error": "Method Not Allowed", + "message": "Only GET is supported for this endpoint", + }) +} + +type statusRecorder struct { + http.ResponseWriter + status int +} + +func (recorder *statusRecorder) WriteHeader(code int) { + recorder.status = code + recorder.ResponseWriter.WriteHeader(code) +} + +func loggingMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + recorder := &statusRecorder{ResponseWriter: w, status: http.StatusOK} + start := time.Now() + log.Printf("Request: %s %s", r.Method, r.URL.Path) + next.ServeHTTP(recorder, r) + log.Printf("Response: %s %s -> %d (%s)", r.Method, r.URL.Path, recorder.status, time.Since(start)) + }) +} + +func recoverMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + defer func() { + if err := recover(); err != nil { + log.Printf("panic recovered: %v", err) + writeJSON(w, http.StatusInternalServerError, map[string]string{ + "error": "Internal Server Error", + "message": "An unexpected error occurred", + }) + } + }() + next.ServeHTTP(w, r) + }) +} + +func getenv(key, fallback string) string { + if value := os.Getenv(key); value != "" { + return value + } + return fallback +} diff --git a/lab3c/app_go/main_test.go b/lab3c/app_go/main_test.go new file mode 100644 index 0000000000..b8ba60fefa --- /dev/null +++ b/lab3c/app_go/main_test.go @@ -0,0 +1,54 @@ +package main + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "testing" +) + +func TestRootHandlerOK(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/", nil) + rec := httptest.NewRecorder() + + rootHandler(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d", rec.Code) + } + + var payload map[string]any + if err := json.Unmarshal(rec.Body.Bytes(), &payload); err != nil { + t.Fatalf("invalid json: %v", err) + } + + if _, ok := payload["service"]; !ok { + t.Fatal("missing service section") + } + if _, ok := payload["system"]; !ok { + t.Fatal("missing system section") + } + if _, ok := payload["runtime"]; !ok { + t.Fatal("missing runtime section") + } +} + +func TestHealthHandlerOK(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/health", nil) + rec := httptest.NewRecorder() + + healthHandler(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d", rec.Code) + } + + var payload map[string]any + if err := json.Unmarshal(rec.Body.Bytes(), &payload); err != nil { + t.Fatalf("invalid json: %v", err) + } + + if payload["status"] != "healthy" { + t.Fatalf("unexpected status: %v", payload["status"]) + } +} diff --git a/lab3c/app_python/.dockerignore b/lab3c/app_python/.dockerignore new file mode 100644 index 0000000000..b7738de7b8 --- /dev/null +++ b/lab3c/app_python/.dockerignore @@ -0,0 +1,12 @@ +__pycache__/ +*.py[cod] +*.log +venv/ +.venv/ +.env +.git/ +.gitignore +.idea/ +.vscode/ +docs/ +tests/ diff --git a/lab3c/app_python/.gitignore b/lab3c/app_python/.gitignore new file mode 100644 index 0000000000..8052e93c8b --- /dev/null +++ b/lab3c/app_python/.gitignore @@ -0,0 +1,14 @@ +__pycache__/ +*.py[cod] +*.log +venv/ +.venv/ +.env + +# IDE +.idea/ +.vscode/ + +# OS +.DS_Store +Thumbs.db diff --git a/lab3c/app_python/Dockerfile b/lab3c/app_python/Dockerfile new file mode 100644 index 0000000000..76219e6c10 --- /dev/null +++ b/lab3c/app_python/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.13-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /app + +RUN useradd -m -u 10001 appuser + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY --chown=appuser:appuser app.py . + +USER appuser + +EXPOSE 5000 + +CMD ["python", "app.py"] diff --git a/lab3c/app_python/README.md b/lab3c/app_python/README.md new file mode 100644 index 0000000000..e12a3ea6bc --- /dev/null +++ b/lab3c/app_python/README.md @@ -0,0 +1,78 @@ +# DevOps Info Service (FastAPI) + +[![Python CI](https://github.com/TsixPhoenix/DevOps-CC/actions/workflows/python-ci.yml/badge.svg?branch=lab03)](https://github.com/TsixPhoenix/DevOps-CC/actions/workflows/python-ci.yml) + + +## Overview +Small service returning system info about the machine it runs on, plus a health check. + +## Prerequisites +- Python 3.11+ +- pip +- (Optional) venv tool + +## Installation +```bash +python -m venv venv +.\venv\Scripts\Activate.ps1 +pip install -r requirements.txt -r requirements-dev.txt +``` + +## Running the Application +```bash +python app.py +``` + +Custom config examples: +```bash +PORT=8080 python app.py +HOST=127.0.0.1 PORT=3000 python app.py +``` + +FastAPI docs: +- `http://localhost:/docs` + +## Tests +Run locally: +```bash +pytest +``` + +Run with coverage: +```bash +pytest --cov=app --cov-report=term +``` + +## API Endpoints +- `GET /` - Service and system information +- `GET /health` - Health check + +## Configuration +| Variable | Default | Description | +| --- | --- | --- | +| `HOST` | `0.0.0.0` | Bind address for the server | +| `PORT` | `5000` | Port to listen on | +| `DEBUG` | `False` | Enable auto-reload | + +## Docker +Command patterns (replace the placeholders with your values): + +**Build locally** +```bash +docker build -t /: . +``` + +**Run container** +```bash +docker run --rm -p :5000 --name /: +``` + +**Pull from Docker Hub** +```bash +docker pull /: +``` + +Optional env overrides: +```bash +docker run --rm -e PORT=5000 -e HOST=0.0.0.0 -p :5000 /: +``` diff --git a/lab3c/app_python/app.py b/lab3c/app_python/app.py new file mode 100644 index 0000000000..763238476c --- /dev/null +++ b/lab3c/app_python/app.py @@ -0,0 +1,287 @@ +""" +DevOps Info Service +FastAPI application module. +""" + +from __future__ import annotations + +import json +import logging +import os +import platform +import socket +import time +from datetime import datetime, timezone + +import uvicorn +from fastapi import FastAPI, Request +from fastapi.responses import JSONResponse, Response +from prometheus_client import ( + CONTENT_TYPE_LATEST, + CollectorRegistry, + Counter, + Gauge, + Histogram, + generate_latest, +) +from starlette.exceptions import HTTPException as StarletteHTTPException + +# Use a custom registry so we don't clash with the default one (avoids "Duplicated timeseries" when the app module is loaded more than once). +_prometheus_registry = CollectorRegistry() + +# Prometheus metrics (RED: Rate, Errors, Duration) +http_requests_total = Counter( + "http_requests_total", + "Total HTTP requests", + ["method", "endpoint", "status"], + registry=_prometheus_registry, +) +http_request_duration_seconds = Histogram( + "http_request_duration_seconds", + "HTTP request duration in seconds", + ["method", "endpoint"], + buckets=(0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0), + registry=_prometheus_registry, +) +http_requests_in_progress = Gauge( + "http_requests_in_progress", + "HTTP requests currently being processed", + registry=_prometheus_registry, +) +# Application-specific metrics +devops_info_endpoint_calls = Counter( + "devops_info_endpoint_calls", + "Endpoint calls for DevOps info service", + ["endpoint"], + registry=_prometheus_registry, +) +devops_info_system_collection_seconds = Histogram( + "devops_info_system_collection_seconds", + "System info collection time in seconds", + buckets=(0.001, 0.005, 0.01, 0.025, 0.05, 0.1), + registry=_prometheus_registry, +) + +# Config +HOST = os.getenv("HOST", "0.0.0.0") +PORT = int(os.getenv("PORT", "5000")) +DEBUG = os.getenv("DEBUG", "False").lower() == "true" + +SERVICE_NAME = "devops-info-service" +SERVICE_VERSION = "1.0.0" +SERVICE_DESCRIPTION = "DevOps course info service" +SERVICE_FRAMEWORK = "FastAPI" + +START_TIME = datetime.now(timezone.utc) + +logger = logging.getLogger("devops-info-service") +logger.setLevel(logging.INFO) + +handler = logging.StreamHandler() +handler.setLevel(logging.INFO) +logger.handlers = [handler] + +app = FastAPI( + title="DevOps Info Service", + version=SERVICE_VERSION, + description=SERVICE_DESCRIPTION, +) + + +def _format_uptime(seconds: int) -> str: + hours = seconds // 3600 + minutes = (seconds % 3600) // 60 + hour_label = "hour" if hours == 1 else "hours" + minute_label = "minute" if minutes == 1 else "minutes" + return f"{hours} {hour_label}, {minutes} {minute_label}" + + +def get_uptime() -> dict[str, int | str]: + delta = datetime.now(timezone.utc) - START_TIME + seconds = int(delta.total_seconds()) + return { + "seconds": seconds, + "human": _format_uptime(seconds), + } + + +def get_system_info() -> dict[str, str | int]: + return { + "hostname": socket.gethostname(), + "platform": platform.system(), + "platform_version": platform.release(), + "architecture": platform.machine(), + "cpu_count": os.cpu_count() or 0, + "python_version": platform.python_version(), + } + + +def isoformat_utc(dt: datetime) -> str: + return dt.astimezone(timezone.utc).isoformat().replace("+00:00", "Z") + + +def _normalize_endpoint(path: str) -> str: + """Normalize path for metric labels to keep cardinality low.""" + if path in ("/", "/health", "/metrics"): + return path + return "other" + + +@app.middleware("http") +async def log_requests(request: Request, call_next): + start_time = time.perf_counter() + endpoint = _normalize_endpoint(request.url.path) + http_requests_in_progress.inc() + try: + response = await call_next(request) + status = str(response.status_code) + http_requests_total.labels( + method=request.method, endpoint=endpoint, status=status + ).inc() + http_request_duration_seconds.labels( + method=request.method, endpoint=endpoint + ).observe(time.perf_counter() - start_time) + return response + finally: + http_requests_in_progress.dec() + + +@app.middleware("http") +async def log_requests(request: Request, call_next): + start_time = datetime.now(timezone.utc) + logger.info( + json.dumps( + { + "timestamp": isoformat_utc(start_time), + "level": "INFO", + "service": SERVICE_NAME, + "event": "request", + "method": request.method, + "path": request.url.path, + "client_ip": request.client.host if request.client else "unknown", + "user_agent": request.headers.get("user-agent", "unknown"), + } + ) + ) + response = await call_next(request) + end_time = datetime.now(timezone.utc) + logger.info( + json.dumps( + { + "timestamp": isoformat_utc(end_time), + "level": "INFO", + "service": SERVICE_NAME, + "event": "response", + "method": request.method, + "path": request.url.path, + "status": response.status_code, + "client_ip": request.client.host if request.client else "unknown", + "user_agent": request.headers.get("user-agent", "unknown"), + } + ) + ) + return response + + +@app.exception_handler(StarletteHTTPException) +async def http_exception_handler(request: Request, exc: StarletteHTTPException): + if exc.status_code == 404: + return JSONResponse( + status_code=404, + content={ + "error": "Not Found", + "message": "Endpoint does not exist", + }, + ) + return JSONResponse( + status_code=exc.status_code, + content={"error": exc.detail}, + ) + + +@app.exception_handler(Exception) +async def unhandled_exception_handler(request: Request, exc: Exception): + logger.error( + json.dumps( + { + "timestamp": isoformat_utc(datetime.now(timezone.utc)), + "level": "ERROR", + "service": SERVICE_NAME, + "event": "exception", + "method": request.method, + "path": request.url.path, + "error": str(exc), + } + ) + ) + return JSONResponse( + status_code=500, + content={ + "error": "Internal Server Error", + "message": "An unexpected error occurred", + }, + ) + + +@app.get("/metrics") +async def metrics(): + """Prometheus metrics endpoint.""" + return Response( + content=generate_latest(_prometheus_registry), + media_type=CONTENT_TYPE_LATEST, + ) + + +@app.get("/") +async def root(request: Request): + devops_info_endpoint_calls.labels(endpoint="/").inc() + t0 = time.perf_counter() + sys_info = get_system_info() + devops_info_system_collection_seconds.observe(time.perf_counter() - t0) + uptime = get_uptime() + now = datetime.now(timezone.utc) + + response = { + "service": { + "name": SERVICE_NAME, + "version": SERVICE_VERSION, + "description": SERVICE_DESCRIPTION, + "framework": SERVICE_FRAMEWORK, + }, + "system": sys_info, + "runtime": { + "uptime_seconds": uptime["seconds"], + "uptime_human": uptime["human"], + "current_time": isoformat_utc(now), + "timezone": "UTC", + }, + "request": { + "client_ip": request.client.host if request.client else "unknown", + "user_agent": request.headers.get("user-agent", "unknown"), + "method": request.method, + "path": request.url.path, + }, + "endpoints": [ + {"path": "/", "method": "GET", "description": "Service information"}, + {"path": "/health", "method": "GET", "description": "Health check"}, + {"path": "/metrics", "method": "GET", "description": "Prometheus metrics"}, + ], + } + + return response + + +@app.get("/health") +async def health(): + devops_info_endpoint_calls.labels(endpoint="/health").inc() + uptime = get_uptime() + return { + "status": "healthy", + "timestamp": isoformat_utc(datetime.now(timezone.utc)), + "uptime_seconds": uptime["seconds"], + } + + +if __name__ == "__main__": + logger.info("Starting DevOps Info Service on %s:%s", HOST, PORT) + uvicorn.run("app:app", host=HOST, port=PORT, reload=DEBUG, log_level="info") diff --git a/lab3c/app_python/docs/LAB03.md b/lab3c/app_python/docs/LAB03.md new file mode 100644 index 0000000000..530353eaba --- /dev/null +++ b/lab3c/app_python/docs/LAB03.md @@ -0,0 +1,66 @@ +# LAB03 - CI/CD (Python) + +## 1. Overview +**Testing framework:** I used `pytest`. The syntax is clean, fixtures are easy to work with, and it is the default choice in most Python projects I see. + +**What tests cover:** The tests hit `GET /`, `GET /health`, a 404 case, and helper functions like uptime formatting. I focused on structure and types instead of exact machine values. + +**Workflow triggers:** CI runs on push and pull requests to `lab03`, `main`, or `master`, but only when `lab3c/app_python/**` or the workflow file changes. + +**Versioning strategy:** I chose CalVer (YYYY.MM.DD). It is simple, and this service is released continuously rather than as a library. + +## 2. Workflow Evidence +Add real links and outputs after you run CI: +- **Successful workflow run:** https://github.com/TsixPhoenix/DevOps-CC/actions/runs/21865003310/job/63103839665 +- **Tests passing locally:** +pytest +============================================================================================ test session starts ============================================================================================ +platform win32 -- Python 3.12.2, pytest-9.0.2, pluggy-1.6.0 +rootdir: C:\Users\Phoenix\PycharmProjects\DevOps\DevOps-CC\lab3c\app_python +configfile: pyproject.toml +plugins: anyio-4.12.1, cov-7.0.0 +collected 5 items + +tests\test_app.py ..... [100%] + +============================================================================================= 5 passed in 0.36s ============================================================================================= +- **Docker image on Docker Hub:** https://hub.docker.com/repository/docker/tsixphoenix/devops-info-python/general +- **Status badge:** Works, shows green check + +## 3. Best Practices Implemented +- **Dependency caching:** `actions/setup-python` caches pip packages to speed up installs. +- **Job separation:** tests run in one job, Docker build/push depends on test success. +- **Conditional push:** Docker images only push on `push` events (not on PRs). +- **Concurrency:** newer runs cancel older runs for the same branch. +- **Path filters:** CI runs only when the Python app changes (monorepo friendly). +- **Snyk scanning:** dependency scan runs in CI. + + +Snyk result: +``` +Run snyk test --file=requirements.txt --package-manager=pip + +Testing /home/runner/work/DevOps-CC/DevOps-CC/lab3c/app_python... + +Organization: tsixphoenix +Package manager: pip +Target file: requirements.txt +Project name: app_python +Open source: no +Project path: /home/runner/work/DevOps-CC/DevOps-CC/lab3c/app_python +Licenses: enabled + +✔ Tested 13 dependencies for known issues, no vulnerable paths found. +``` + +## 4. Key Decisions +**Versioning Strategy:** CalVer fits a small service that ships frequently. It is easy to read and does not require manual version bumps. + +**Docker Tags:** The workflow publishes `YYYY.MM.DD` and `latest` tags for the same image. + +**Workflow Triggers:** I used path filters to avoid running Python CI when only Go code changes. + +**Test Coverage:** Core endpoints and helper functions are tested. I did not try to cover every logging line. + +## 5. Challenges +- Everything was clear, because of experience of setting up CI/CD in my company workspace. diff --git a/lab3c/app_python/pyproject.toml b/lab3c/app_python/pyproject.toml new file mode 100644 index 0000000000..efb9a85312 --- /dev/null +++ b/lab3c/app_python/pyproject.toml @@ -0,0 +1,3 @@ +[tool.ruff] +select = ["E", "F"] +ignore = ["E501"] diff --git a/lab3c/app_python/requirements-dev.txt b/lab3c/app_python/requirements-dev.txt new file mode 100644 index 0000000000..c6610506e3 --- /dev/null +++ b/lab3c/app_python/requirements-dev.txt @@ -0,0 +1,5 @@ +pytest +pytest-cov +requests +ruff +httpx diff --git a/lab3c/app_python/requirements.txt b/lab3c/app_python/requirements.txt new file mode 100644 index 0000000000..d663691063 --- /dev/null +++ b/lab3c/app_python/requirements.txt @@ -0,0 +1,3 @@ +fastapi==0.128.6 +uvicorn[standard]==0.32.0 +prometheus-client==0.23.1 diff --git a/lab3c/app_python/tests/__init__.py b/lab3c/app_python/tests/__init__.py new file mode 100644 index 0000000000..792d600548 --- /dev/null +++ b/lab3c/app_python/tests/__init__.py @@ -0,0 +1 @@ +# diff --git a/lab3c/app_python/tests/test_app.py b/lab3c/app_python/tests/test_app.py new file mode 100644 index 0000000000..ff942f197e --- /dev/null +++ b/lab3c/app_python/tests/test_app.py @@ -0,0 +1,66 @@ +from datetime import datetime, timezone + +from fastapi.testclient import TestClient + +from app import _format_uptime, app, get_system_info, get_uptime, isoformat_utc + + +client = TestClient(app) + + +def test_root_endpoint_structure(): + response = client.get("/") + assert response.status_code == 200 + + data = response.json() + assert "service" in data + assert "system" in data + assert "runtime" in data + assert "request" in data + assert "endpoints" in data + + service = data["service"] + assert service["name"] == "devops-info-service" + assert service["framework"] == "FastAPI" + + system = data["system"] + for key in ["hostname", "platform", "platform_version", "architecture", "cpu_count", "python_version"]: + assert key in system + + runtime = data["runtime"] + assert isinstance(runtime["uptime_seconds"], int) + assert isinstance(runtime["uptime_human"], str) + assert runtime["timezone"] == "UTC" + + +def test_health_endpoint_structure(): + response = client.get("/health") + assert response.status_code == 200 + data = response.json() + assert data["status"] == "healthy" + assert isinstance(data["uptime_seconds"], int) + assert "timestamp" in data + + +def test_not_found_returns_json(): + response = client.get("/does-not-exist") + assert response.status_code == 404 + data = response.json() + assert data["error"] == "Not Found" + + +def test_helpers_are_consistent(): + system = get_system_info() + assert system["hostname"] + assert system["platform"] + assert system["python_version"] + + uptime = get_uptime() + assert uptime["seconds"] >= 0 + assert "hours" in uptime["human"] or "hour" in uptime["human"] + + +def test_format_and_iso_helpers(): + assert _format_uptime(3660) == "1 hour, 1 minute" + test_dt = datetime(2024, 1, 1, tzinfo=timezone.utc) + assert isoformat_utc(test_dt) == "2024-01-01T00:00:00Z" diff --git a/lab4c/docs/LAB04.md b/lab4c/docs/LAB04.md new file mode 100644 index 0000000000..5af0ddc8e7 --- /dev/null +++ b/lab4c/docs/LAB04.md @@ -0,0 +1,190 @@ +# LAB04 — Infrastructure as Code (Terraform & Pulumi) + +## 1. Cloud Provider & Infrastructure + +- **Cloud provider:** Yandex Cloud. +- **Rationale:** Used Yandex Cloud for this lab because of the free tier +- **Instance:** standard-v3, 2 cores 20%, 1 GB RAM, 10 GB disk. +- **Zone:** ru-central1-a. +- **Estimated cost:** Effectively $0 with the free tier for this kind of usage. +- **Resources created:** + - 1× VPC network + - 1× subnet + - 1× security group (SSH 22, HTTP 80, 5000) + - 1× compute instance (Ubuntu 22.04) + - Public IP + +## 2. Terraform Implementation + +- **Terraform version:** Terraform v1.14.5 +- **Project structure:** `terraform/` — main.tf (provider, Ubuntu image data source, VPC, subnet, security group, instance), variables.tf, outputs.tf, terraform.tfvars (gitignored). Auth via service account key path in tfvars +- **Key decisions:** Variables for folder_id, zone, SSH key path, and SSH CIDR so the same code works across environments. Data source for the latest Ubuntu 22.04 LTS image. Security group restricts SSH to our IP only; HTTP and 5000 are open for the app. +- **Challenges:** Getting auth right at first; I ended up putting the key file path in terraform.tfvars). Also hit the VPC network quota once and had to extend it. + +**Terminal output:** + +- `terraform init`: + ``` +terraform init +Initializing the backend... +Initializing provider plugins... +- Finding yandex-cloud/yandex versions matching "~> 0.100"... +- Installing yandex-cloud/yandex v0.187.0... +- Installed yandex-cloud/yandex v0.187.0 + ``` +- `terraform plan`: + ``` +terraform plan +data.yandex_compute_image.ubuntu: Reading... +data.yandex_compute_image.ubuntu: Read complete after 0s [id=***********] + +Terraform used the selected providers to generate the following execution plan. Resource actions are indicated with the +following symbols: + + create + +Terraform will perform the following actions: + + # yandex_compute_instance.lab4 will be created + + resource "yandex_compute_instance" "lab4" { + ``` +- `terraform apply`: + ``` +terraform apply +data.yandex_compute_image.ubuntu: Reading... +data.yandex_compute_image.ubuntu: Read complete after 0s [id=***********] + +Terraform used the selected providers to generate the following execution plan. Resource actions are indicated with the +following symbols: + + create + +Terraform will perform the following actions: + + # yandex_compute_instance.lab4 will be created + + resource "yandex_compute_instance" "lab4" { + ``` +- `SSH to VM`: + ``` +The programs included with the Ubuntu system are free software; +the exact distribution terms for each program are described in the +individual files in /usr/share/doc/*/copyright. + +Ubuntu comes with ABSOLUTELY NO WARRANTY, to the extent permitted by +applicable law. + +To run a command as administrator (user "root"), use "sudo ". +See "man sudo_root" for details. + +ubuntu@fhm24d5clqr3oh7b101s:~$ + ``` + +## 3. Pulumi Implementation + +- **Pulumi version and language:** Pulumi v3.222.0, Python 3.x. +- **How it differs from Terraform:** Same logical resources (VPC, subnet, security group, VM), but defined in Python. You get normal Python (loops, functions, types) and the same state/plan/apply workflow. +- **Advantages:** Felt easier. Outputs are straightforward. +- **Challenges:** Initial setup took a bit: venv, `setuptools<82` for `pkg_resources`, and provider auth. + +**Terminal output:** + +- `pulumi preview`: + ``` + pulumi preview +Previewing update (dev) + +View in Browser (Ctrl+O): + + Type Name Plan Info + + pulumi:pulumi:Stack lab4c-vm-dev create 2 messages + + ├─ yandex:index:VpcNetwork lab4c-network create + + ├─ yandex:index:VpcSubnet lab4c-subnet create + + ├─ yandex:index:VpcSecurityGroup lab4c-vm-sg create + + └─ yandex:index:ComputeInstance lab4c-vm create +Diagnostics: + pulumi:pulumi:Stack (lab4c-vm-dev): + import pkg_resources + +Outputs: + public_ip : [unknown] + ssh_command: [unknown] + +Resources: + + 5 to create + ``` +- `pulumi up`: + ``` + pulumi up +Previewing update (dev) + +View in Browser (Ctrl+O): + Type Name Plan Info + pulumi:pulumi:Stack lab4c-vm-dev 2 messages + + ├─ yandex:index:VpcNetwork lab4c-network create + + ├─ yandex:index:VpcSubnet lab4c-subnet create + + ├─ yandex:index:VpcSecurityGroup lab4c-vm-sg create + + └─ yandex:index:ComputeInstance lab4c-vm create +Diagnostics: + pulumi:pulumi:Stack (lab4c-vm-dev): + import pkg_resources + + [Pulumi Neo] Would you like help with these diagnostics? + +Outputs: + + public_ip : [unknown] + + ssh_command: [unknown] + +Resources: + + 4 to create + 1 unchanged + +Do you want to perform this update? yes +Updating (dev) + ``` +- SSH to VM: + ``` + The programs included with the Ubuntu system are free software; +the exact distribution terms for each program are described in the +individual files in /usr/share/doc/*/copyright. + +Ubuntu comes with ABSOLUTELY NO WARRANTY, to the extent permitted by +applicable law. + +To run a command as administrator (user "root"), use "sudo ". +See "man sudo_root" for details. + +ubuntu@fhm8nea1kubnsde4ooqn:~$ + ``` + +## 4. Terraform vs Pulumi Comparison + +- **Ease of learning:** Terraform is easier if you only care about “describe resources in a file and apply.” HCL is small and focused. Pulumi is easier if you already know Python and want to use normal code; +- **Code readability:** Both are readable. Terraform is very declarative: you see resources and attributes. Pulumi looks like normal code, so you can structure it with variables and functions. +- **Debugging:** With Terraform, you rely on plan/apply messages and sometimes `terraform state`. With Pulumi, you get Python stack traces and can add prints or a debugger; the program runs in your environment, which helps. +- **Documentation:** all services are well documented +- **Use case:** I’d pick Terraform when the team is standardizing on it, when you want maximum portability (HCL, big ecosystem), or when you’re mostly gluing provider resources. I’d pick Pulumi when the team is code-first, when you want to share logic with the rest of your app (same language, tests, refactors), or when you need loops, conditionals, or abstractions that are clumsy in HCL. + +## 5. Lab 5 Preparation & Cleanup + +**VM for Lab 5:** + +- **Keeping VM for Lab 5?** No. +- **Plan for Lab 5:** Will recreate a cloud VM when needed + +**Cleanup status:** +``` +terraform destroy +Destroy complete! Resources: 4 destroyed. +``` +and +``` +pulumi destroy +Previewing destroy (dev) + +View in Browser (Ctrl+O): + + Type Name Plan + - pulumi:pulumi:Stack lab4c-vm-dev delete + - ├─ yandex:index:ComputeInstance lab4c-vm delete + - ├─ yandex:index:VpcSubnet lab4c-subnet delete + - ├─ yandex:index:VpcSecurityGroup lab4c-vm-sg delete + - └─ yandex:index:VpcNetwork lab4c-network delete +``` diff --git a/lab4c/pulumi/.gitignore b/lab4c/pulumi/.gitignore new file mode 100644 index 0000000000..25c1585ba2 --- /dev/null +++ b/lab4c/pulumi/.gitignore @@ -0,0 +1,19 @@ +# Pulumi +Pulumi.*.yaml +!Pulumi.yaml +.pulumi/ + +# Python +__pycache__/ +*.py[cod] +venv/ +.venv/ +*.egg-info/ + +# IDE +.idea/ +.vscode/ + +# OS +.DS_Store +Thumbs.db diff --git a/lab4c/pulumi/Pulumi.yaml b/lab4c/pulumi/Pulumi.yaml new file mode 100644 index 0000000000..a3e2d9124e --- /dev/null +++ b/lab4c/pulumi/Pulumi.yaml @@ -0,0 +1,4 @@ +name: lab4c-vm +runtime: python +description: Lab 4 - VM on Yandex cloud + diff --git a/lab4c/pulumi/README.md b/lab4c/pulumi/README.md new file mode 100644 index 0000000000..5c263e239d --- /dev/null +++ b/lab4c/pulumi/README.md @@ -0,0 +1,50 @@ +# Lab 4 — Pulumi (Yandex Cloud) + +Same infrastructure as the Terraform stack: one VM, VPC, subnet, security group (SSH, HTTP, 5000). + +## Prerequisites + +- Pulumi CLI 3.x +- Python 3.9+ +- Yandex Cloud account (same auth as for Terraform: `YANDEX_TOKEN` or service account key) + +## Config + + +```bash +pulumi config set folder_id your-yandex-folder-id +pulumi config set ssh_cidr "YOUR_IP/32" +pulumi config set ssh_public_key "$(cat %USERPROFILE%\.ssh\id_rsa.pub)" +``` + +```powershell +pulumi config set ssh_public_key "$(Get-Content $env:USERPROFILE\.ssh\id_rsa.pub -Raw)" +``` + +Optional: `pulumi config set zone ru-central1-a` + +## Setup + +1. Log in to Pulumi: `pulumi login` +2. Create stack: `pulumi stack init dev` +3. Install deps and run: + + ```powershell + python -m venv venv + .\venv\Scripts\Activate.ps1 + pip install -r requirements.txt + pulumi preview + pulumi up + ``` + +4. SSH to VM: + + ```powershell + ssh ubuntu@$(pulumi stack output public_ip) + ``` + +## Cleanup + +```bash +pulumi destroy +``` diff --git a/lab4c/pulumi/__main__.py b/lab4c/pulumi/__main__.py new file mode 100644 index 0000000000..54482de1c3 --- /dev/null +++ b/lab4c/pulumi/__main__.py @@ -0,0 +1,102 @@ +"""Lab 4 - Create VM on Yandex Cloud (same as Terraform).""" +import os +import pulumi + +config = pulumi.Config() +key_file = config.get("yandex_service_account_key_file") +if key_file: + os.environ["YANDEX_SERVICE_ACCOUNT_KEY_FILE"] = key_file + +import pulumi_yandex as yandex + +folder_id = config.require("folder_id") +zone = config.get("zone") or "ru-central1-a" +ssh_public_key = config.require("ssh_public_key") +ssh_cidr = config.require("ssh_cidr") + +# Ubuntu 22.04 LTS +image = yandex.get_compute_image(family="ubuntu-2204-lts") + +network = yandex.VpcNetwork( + "lab4c-network", + name="lab4c-network", + folder_id=folder_id, +) + +subnet = yandex.VpcSubnet( + "lab4c-subnet", + name="lab4c-subnet", + network_id=network.id, + zone=zone, + folder_id=folder_id, + v4_cidr_blocks=["10.0.1.0/24"], +) + +sg = yandex.VpcSecurityGroup( + "lab4c-vm-sg", + name="lab4c-vm-sg", + network_id=network.id, + folder_id=folder_id, + description="Allow SSH, HTTP, and port 5000 for Lab 4", + ingresses=[ + yandex.VpcSecurityGroupIngressArgs( + description="SSH", + port=22, + protocol="TCP", + v4_cidr_blocks=[ssh_cidr], + ), + yandex.VpcSecurityGroupIngressArgs( + description="HTTP", + port=80, + protocol="TCP", + v4_cidr_blocks=["0.0.0.0/0"], + ), + yandex.VpcSecurityGroupIngressArgs( + description="App 5000", + port=5000, + protocol="TCP", + v4_cidr_blocks=["0.0.0.0/0"], + ), + ], + egresses=[ + yandex.VpcSecurityGroupEgressArgs( + description="Any", + protocol="ANY", + v4_cidr_blocks=["0.0.0.0/0"], + ), + ], +) + +vm = yandex.ComputeInstance( + "lab4c-vm", + name="lab4c-vm", + zone=zone, + folder_id=folder_id, + platform_id="standard-v3", + resources=yandex.ComputeInstanceResourcesArgs( + cores=2, + memory=1, + core_fraction=20, + ), + boot_disk=yandex.ComputeInstanceBootDiskArgs( + initialize_params=yandex.ComputeInstanceBootDiskInitializeParamsArgs( + image_id=image.id, + size=10, + type="network-hdd", + ), + ), + network_interfaces=[ + yandex.ComputeInstanceNetworkInterfaceArgs( + subnet_id=subnet.id, + nat=True, + security_group_ids=[sg.id], + ), + ], + metadata={ + "ssh-keys": f"ubuntu:{ssh_public_key}", + }, + labels={"lab": "lab04"}, +) + +pulumi.export("public_ip", vm.network_interfaces[0].nat_ip_address) +pulumi.export("ssh_command", pulumi.Output.concat("ssh ubuntu@", vm.network_interfaces[0].nat_ip_address)) diff --git a/lab4c/pulumi/requirements.txt b/lab4c/pulumi/requirements.txt new file mode 100644 index 0000000000..c2955fc5a6 --- /dev/null +++ b/lab4c/pulumi/requirements.txt @@ -0,0 +1,3 @@ +pulumi>=3.0.0 +pulumi-yandex>=0.13.0 +setuptools>=65.0.0,<82 diff --git a/lab4c/terraform/.gitignore b/lab4c/terraform/.gitignore new file mode 100644 index 0000000000..df419991b7 --- /dev/null +++ b/lab4c/terraform/.gitignore @@ -0,0 +1,21 @@ +# Terraform +*.tfstate +*.tfstate.* +.terraform/ +.terraform.lock.hcl +terraform.tfvars +*.tfvars +*.tfvars.json +crash.log +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +# Credentials +*.pem +*.key +*.json +!package.json +credentials +.env diff --git a/lab4c/terraform/.tflint.hcl b/lab4c/terraform/.tflint.hcl new file mode 100644 index 0000000000..75d15f14aa --- /dev/null +++ b/lab4c/terraform/.tflint.hcl @@ -0,0 +1,3 @@ +plugin "terraform" { + enabled = true +} diff --git a/lab4c/terraform/README.md b/lab4c/terraform/README.md new file mode 100644 index 0000000000..aee3cb8444 --- /dev/null +++ b/lab4c/terraform/README.md @@ -0,0 +1,59 @@ +# Lab 4 — Terraform (Yandex Cloud) + +Creates one VM (free tier: 2 cores 20%, 1 GB RAM, 10 GB disk), VPC, subnet, security group (SSH, HTTP, 5000), and outputs public IP. + +## Prerequisites + +- Terraform 1.9+ +- Yandex Cloud account +- SSH key pair on your machine (e.g. `ssh-keygen`); you will use the **public** key path in Terraform + +## Authentication + +Use one of these (do not commit secrets): + +1. **OAuth token (quick):** + `set YANDEX_TOKEN=your_oauth_token` (cmd) or `$env:YANDEX_TOKEN = "..."` (PowerShell) + +2. **Service account key file:** + Create a service account in Yandex Cloud Console, create an authorized key (JSON), then: + `set YANDEX_SERVICE_ACCOUNT_KEY_FILE=C:\path\to\key.json` + or in `terraform.tfvars`: `yandex_token` (prefer env vars). + +3. **Folder ID:** + In Console: Cloud → folder → copy ID. Set in `terraform.tfvars` as `yandex_folder_id`. + +## Setup + +1. Copy and edit variables: + - **Windows:** `copy terraform.tfvars.example terraform.tfvars` + - **Linux/macOS:** `cp terraform.tfvars.example terraform.tfvars` + Edit: + - `yandex_folder_id` — your folder ID + - `yandex_zone` — e.g. `ru-central1-a` + - `ssh_public_key_path` — full path to your `.pub` file (e.g. `C:\Users\You\.ssh\id_rsa.pub` or `%USERPROFILE%\.ssh\id_rsa.pub`) + - `ssh_cidr` — your IP/32 (e.g. from https://ifconfig.me) + +2. Initialize and apply: + ```bash + terraform init + terraform plan + terraform apply + ``` + +3. SSH to VM (no `-i` needed if you use the same key as the one in metadata): + - **PowerShell:** `ssh ubuntu@$(terraform output -raw public_ip)` + - Or: `ssh -i C:\path\to\your_private_key ubuntu@` + +## Cleanup + +```bash +terraform destroy +``` + +## Files + +- `main.tf` — provider, network, subnet, security group, instance +- `variables.tf` — folder_id, zone, ssh_public_key_path, ssh_cidr +- `outputs.tf` — public_ip, ssh_command +- `terraform.tfvars` — your values (gitignored) diff --git a/lab4c/terraform/main.tf b/lab4c/terraform/main.tf new file mode 100644 index 0000000000..5152f76f06 --- /dev/null +++ b/lab4c/terraform/main.tf @@ -0,0 +1,101 @@ +terraform { + required_version = ">= 1.9" + required_providers { + yandex = { + source = "yandex-cloud/yandex" + version = "~> 0.100" + } + } +} + +provider "yandex" { + zone = var.yandex_zone + folder_id = var.yandex_folder_id + service_account_key_file = var.yandex_service_account_key_file +} + +# Ubuntu 22.04 +data "yandex_compute_image" "ubuntu" { + family = "ubuntu-2204-lts" +} + +resource "yandex_vpc_network" "lab4" { + name = "lab4c-network" +} + +resource "yandex_vpc_subnet" "lab4" { + name = "lab4c-subnet" + network_id = yandex_vpc_network.lab4.id + zone = var.yandex_zone + v4_cidr_blocks = ["10.0.1.0/24"] +} + +resource "yandex_vpc_security_group" "lab4" { + name = "lab4c-vm-sg" + network_id = yandex_vpc_network.lab4.id + description = "Allow SSH, HTTP, and port 5000 for Lab 4" + + ingress { + description = "SSH" + port = 22 + protocol = "TCP" + v4_cidr_blocks = [var.ssh_cidr] + } + + ingress { + description = "HTTP" + port = 80 + protocol = "TCP" + v4_cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + description = "App 5000" + port = 5000 + protocol = "TCP" + v4_cidr_blocks = ["0.0.0.0/0"] + } + + egress { + description = "Any" + from_port = 0 + to_port = 65535 + protocol = "ANY" + v4_cidr_blocks = ["0.0.0.0/0"] + } +} + +resource "yandex_compute_instance" "lab4" { + name = "lab4c-vm" + platform_id = "standard-v3" + zone = var.yandex_zone + folder_id = var.yandex_folder_id + + resources { + cores = 2 + memory = 1 + core_fraction = 20 + } + + boot_disk { + initialize_params { + image_id = data.yandex_compute_image.ubuntu.id + size = 10 + type = "network-hdd" + } + } + + network_interface { + subnet_id = yandex_vpc_subnet.lab4.id + nat = true + security_group_ids = [yandex_vpc_security_group.lab4.id] + } + + metadata = { + ssh-keys = "ubuntu:${file(var.ssh_public_key_path)}" + } + + labels = { + lab = "lab04" + } +} diff --git a/lab4c/terraform/outputs.tf b/lab4c/terraform/outputs.tf new file mode 100644 index 0000000000..2821ecd1bc --- /dev/null +++ b/lab4c/terraform/outputs.tf @@ -0,0 +1,9 @@ +output "public_ip" { + description = "Public IP of the VM" + value = yandex_compute_instance.lab4.network_interface[0].nat_ip_address +} + +output "ssh_command" { + description = "Example SSH command" + value = "ssh ubuntu@${yandex_compute_instance.lab4.network_interface[0].nat_ip_address}" +} diff --git a/lab4c/terraform/terraform.tfvars.example b/lab4c/terraform/terraform.tfvars.example new file mode 100644 index 0000000000..f36767e4d3 --- /dev/null +++ b/lab4c/terraform/terraform.tfvars.example @@ -0,0 +1,5 @@ +yandex_folder_id = +yandex_zone = +yandex_service_account_key_file = +ssh_public_key_path = +ssh_cidr = "1.2.3.4/32" diff --git a/lab4c/terraform/variables.tf b/lab4c/terraform/variables.tf new file mode 100644 index 0000000000..0d121ae84c --- /dev/null +++ b/lab4c/terraform/variables.tf @@ -0,0 +1,26 @@ +variable "yandex_folder_id" { + description = "Yandex Cloud folder ID" + type = string +} + +variable "yandex_zone" { + description = "Yandex Cloud zone" + type = string + default = "ru-central1-a" +} + +variable "ssh_public_key_path" { + description = "Path to your SSH public key file" + type = string +} + +variable "ssh_cidr" { + description = "CIDR allowed for SSH" + type = string +} + +variable "yandex_service_account_key_file" { + description = "Path to Yandex service account JSON key" + type = string + default = null +} diff --git a/lab5c/README.md b/lab5c/README.md new file mode 100644 index 0000000000..b59d1ce2cc --- /dev/null +++ b/lab5c/README.md @@ -0,0 +1,38 @@ +# Lab 05 Completion (`lab5c`) + + +## Structure + +- `ansible/ansible.cfg` - project configuration +- `ansible/inventory/hosts.ini` - static inventory template +- `ansible/roles/common` - base system setup role +- `ansible/roles/docker` - Docker installation role +- `ansible/roles/app_deploy` - app deployment role +- `ansible/playbooks/provision.yml` - provisioning playbook +- `ansible/playbooks/deploy.yml` - deployment playbook +- `ansible/playbooks/site.yml` - full provision + deploy flow +- `ansible/group_vars/all.yml.example` - vault variable template +- `ansible/docs/LAB05.md` - documentation template with analysis + +## Control-Node Setup (WSL) + +```bash +sudo apt update +sudo apt install -y ansible +ansible-galaxy collection install -r requirements.yml +``` + +Bonus dynamic-inventory collection: + +```bash +ansible-galaxy collection install -r requirements-bonus.yml +``` + +## Typical Run Order + +```bash +ansible all -m ping +ansible-playbook playbooks/provision.yml +ansible-playbook playbooks/provision.yml +ansible-playbook playbooks/deploy.yml --ask-vault-pass +``` \ No newline at end of file diff --git a/lab5c/ansible/.gitignore b/lab5c/ansible/.gitignore new file mode 100644 index 0000000000..b487bb7be0 --- /dev/null +++ b/lab5c/ansible/.gitignore @@ -0,0 +1,4 @@ +.vault_pass +*.retry +inventory/*.pyc +__pycache__/ diff --git a/lab5c/ansible/ansible.cfg b/lab5c/ansible/ansible.cfg new file mode 100644 index 0000000000..2fb9889628 --- /dev/null +++ b/lab5c/ansible/ansible.cfg @@ -0,0 +1,13 @@ +[defaults] +inventory = inventory/hosts.ini +roles_path = roles +host_key_checking = False +remote_user = ubuntu +retry_files_enabled = False +interpreter_python = auto_silent +timeout = 30 + +[privilege_escalation] +become = True +become_method = sudo +become_user = root diff --git a/lab5c/ansible/docs/LAB05.md b/lab5c/ansible/docs/LAB05.md new file mode 100644 index 0000000000..7db4ecb2c6 --- /dev/null +++ b/lab5c/ansible/docs/LAB05.md @@ -0,0 +1,180 @@ +# LAB05 — Ansible Fundamentals (Role-Based) + +## 1. Architecture Overview + +- **Ansible version used:** Ansible Core 2.17.8. +- **Control node:** Windows 10 + Docker Desktop (Ansible executed in container). +- **Target VM:** Ubuntu 22.04/24.04 VM from Lab 4 (cloud), connected via SSH. +- **Role structure:** Three roles are used: + - `common` - baseline OS preparation + - `docker` - Docker engine installation and service setup + - `app_deploy` - Dockerized app deployment and health verification +- **Why roles instead of one large playbook:** Roles isolate responsibilities, keep playbooks clean, and make each part reusable. + +## 2. Roles Documentation + +### Role: `common` + +- **Purpose:** Prepare the system with required base packages and timezone. +- **Variables (defaults):** + - `common_packages` - essential packages list (`curl`, `git`, `python3-pip`, etc.) + - `common_timezone` - default `UTC` +- **Handlers:** None. +- **Dependencies:** `community.general` collection (for timezone module). + +### Role: `docker` + +- **Purpose:** Install Docker from the official Docker APT repository and prepare runtime access. +- **Variables (defaults):** + - `docker_arch_map`, `docker_arch` + - `docker_packages` (`docker-ce`, `docker-ce-cli`, `containerd.io`, plugins) + - `docker_python_package` (`python3-docker`) + - `docker_user` (user added to `docker` group) +- **Handlers:** + - `restart docker` - restarts Docker service when package changes require it. +- **Dependencies:** Uses Ansible built-in modules; installs `python3-docker` on target host for Docker-related modules. + +### Role: `app_deploy` + +- **Purpose:** Authenticate to Docker Hub, pull image, replace container, and verify app health. +- **Variables (defaults):** + - `app_name`, `docker_image`, `docker_image_tag` + - `app_port`, `app_container_name` + - `app_restart_policy`, `app_env` + - `app_health_path`, `app_wait_timeout` + - Vaulted vars: `dockerhub_username`, `dockerhub_password` +- **Handlers:** + - `restart app container` - restarts running container when deployment task triggers notify. +- **Dependencies:** `community.docker` collection. + +## 3. Idempotency Demonstration + +### First run (`playbooks/provision.yml`) + +```text +PLAY [Provision web servers] +... +TASK [common : Update apt cache] changed +TASK [common : Install common packages] changed +TASK [common : Configure timezone] changed +TASK [docker : Download Docker official GPG key] changed +TASK [docker : Add Docker APT repository] changed +TASK [docker : Update apt cache after Docker repo changes] changed +TASK [docker : Install Docker engine and CLI packages] changed +TASK [docker : Add target user to docker group] changed +RUNNING HANDLER [docker : restart docker] changed +... +PLAY RECAP +lab5-vm : ok=14 changed=9 unreachable=0 failed=0 skipped=0 +``` + +### Second run (`playbooks/provision.yml`) + +```text +PLAY [Provision web servers] +... +TASK [common : Update apt cache] ok +TASK [common : Install common packages] ok +TASK [common : Configure timezone] ok +TASK [docker : Download Docker official GPG key] ok +TASK [docker : Add Docker APT repository] ok +TASK [docker : Install Docker engine and CLI packages] ok +TASK [docker : Add target user to docker group] ok +TASK [docker : Update apt cache after Docker repo changes] skipping +... +PLAY RECAP +lab5-vm : ok=12 changed=0 unreachable=0 failed=0 skipped=1 +``` + +### Analysis + +- On the first run, resources are created/configured to match desired state (packages, repo, Docker service, group membership). +- On the second run, Ansible modules compare desired and current state and skip unnecessary changes, proving idempotent behavior. +- Idempotency is achieved by stateful modules (`apt`, `service`, `user`, `docker_container`) instead of ad-hoc shell commands. + +## 4. Ansible Vault Usage + +- Credentials are stored in `group_vars/all.yml` encrypted via Ansible Vault. +- Vault password is entered interactively (`--ask-vault-pass`) or provided via local password file that is ignored by Git. +- Tasks containing credentials use `no_log: true` to prevent secret leakage in logs. + +### Encrypted file proof + +```text +$ANSIBLE_VAULT;1.1;AES256 +64383638346636396532383762376239633430663933613638326235653962353634323766343664 +3436646365333032316364663736356565616462353663310a303061333835663866303562323132 +65356163313437653263333138366561633533646662336634393333313737336439326132323666 +``` + +### Why Vault is important + +- Secrets can be committed safely only in encrypted form. +- Team members can share infrastructure code without exposing credentials. +- It reduces accidental secret leakage in repo history and CI logs. + +## 5. Deployment Verification + +### Deployment run (`playbooks/deploy.yml`) + +```text +PLAY [Deploy application] +... +TASK [app_deploy : Log in to Docker Hub] changed +TASK [app_deploy : Pull application image] changed +TASK [app_deploy : Run application container] changed +TASK [app_deploy : Wait for app port to be ready] ok +TASK [app_deploy : Verify health endpoint] ok +RUNNING HANDLER [app_deploy : restart app container] changed +... +PLAY RECAP +lab5-vm : ok=8 changed=4 unreachable=0 failed=0 skipped=2 +``` + +### Container status + +```text +lab5-vm | CHANGED | rc=0 >> +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +e45f2bb4472d tsixphoenix/devops-info-python:latest "python app.py" 58 seconds ago Up 49 seconds 0.0.0.0:5000->5000/tcp devops-info-python +``` + +### Health check + +```text +curl http://89.169.158.161:5000/health +{"status":"healthy","timestamp":"2026-02-24T11:09:07.680263Z","uptime_seconds":14} + +curl http://89.169.158.161:5000/ +{"service":{"name":"devops-info-service","version":"1.0.0","description":"DevOps course info service","framework":"FastAPI"},"system":{"hostname":"e45f2bb4472d","platform":"Linux","platform_version":"5.15.0-170-generic","architecture":"x86_64","cpu_count":2,"python_version":"3.13.12"},"runtime":{"uptime_seconds":16,"uptime_human":"0 hours, 0 minutes","current_time":"2026-02-24T11:09:09.533021Z","timezone":"UTC"},"request":{"client_ip":"188.130.155.186","user_agent":"curl/8.16.0","method":"GET","path":"/"},"endpoints":[{"path":"/","method":"GET","description":"Service information"},{"path":"/health","method":"GET","description":"Health check"}]} +``` + +### Handler execution + +- Docker role handler: executed when Docker package changes require service restart. +- App deploy handler: executes only when container deployment task reports changes. + +## 6. Key Decisions + +- **Why use roles instead of plain playbooks?** + Roles separate concerns and keep top-level playbooks minimal. This reduces complexity and improves readability as automation grows. + +- **How do roles improve reusability?** + Roles encapsulate tasks + defaults + handlers. The same role can be reused across environments by changing only inventory and variables. + +- **What makes a task idempotent?** + Idempotent tasks declare target state (for example, `state: present`, `state: started`) and change only when current state differs. + +- **How do handlers improve efficiency?** + Handlers run only when notified by changed tasks, so expensive operations (like restarts) are not executed on every run. + +- **Why is Ansible Vault necessary?** + It allows secure storage of credentials in versioned infrastructure code without exposing plaintext secrets. + +## 7. Challenges + +- Initial control-node setup on Windows (Ansible-in-Docker + mounted SSH key permissions). +- Correctly configuring Docker repository and architecture mapping. +- Verifying no secret values appear in output logs. + +--- \ No newline at end of file diff --git a/lab5c/ansible/group_vars/all.yml b/lab5c/ansible/group_vars/all.yml new file mode 100644 index 0000000000..5f415c4704 --- /dev/null +++ b/lab5c/ansible/group_vars/all.yml @@ -0,0 +1,23 @@ +$ANSIBLE_VAULT;1.1;AES256 +64383638346636396532383762376239633430663933613638326235653962353634323766343664 +3436646365333032316364663736356565616462353663310a303061333835663866303562323132 +65356163313437653263333138366561633533646662336634393333313737336439326132323666 +3862636235616563310a373334663339636438663966653834356330663464633263613633326130 +34336536353233343036643965633262613162366332373436636537616131353730303334666438 +34326635656361623062326333666333393430316566383132656436643535623363346663333366 +39376364653165376138383561623036373133616130366661313764383837396432303631336565 +36636631383963623537333836303430313431373335653534333064393033373861636332316339 +36383730633662396633336664633138643935363637383934326331366366653139333462656161 +37646535653066616161663836336561396264326336313935643163323164346634316634363036 +64383130616332323630303561313566373461376531643732366334616562616431386364643561 +35383362633536326434376639363531346362336666393334636337316262303763326333343762 +30373635633762623431333335663232616335666332353665326263636362323934393135336435 +65323534333033616538373964386336663637633935366137356363383135336238393637336430 +61363661366261653634383934393430336361376166666261303935356337343234306330303462 +37326236393832376461653865356265393463326362333635653532633161326235336566316436 +34373436313533636333306437393966656536396435326666356536373763356132613263613038 +39353530393937363161656264663436313934373832623262633865363538313434303661633362 +36653233643231323066343639666630303632393333323966633437633762306535643436616131 +39383433393430303536343565303362616431666137613234663330336438323937356265666438 +38396130356666333032613834326637353230343235303031303363386137323736643466333963 +3065646533393438336638646163633461373432356339353831 diff --git a/lab5c/ansible/group_vars/all.yml.example b/lab5c/ansible/group_vars/all.yml.example new file mode 100644 index 0000000000..aa703de75b --- /dev/null +++ b/lab5c/ansible/group_vars/all.yml.example @@ -0,0 +1,19 @@ +--- +# Copy this file to group_vars/all.yml and encrypt it with: +# ansible-vault encrypt group_vars/all.yml + +# Docker Hub credentials +dockerhub_username: "DOCKERHUB_USERNAME" +dockerhub_password: "DOCKERHUB_ACCESS_TOKEN" + +# Application config +app_name: "devops-info-python" +docker_image: "{{ dockerhub_username }}/{{ app_name }}" +docker_image_tag: "latest" + +app_port: 5000 +app_container_name: "{{ app_name }}" +app_restart_policy: "unless-stopped" +app_env: {} +app_health_path: "/health" +app_wait_timeout: 120 diff --git a/lab5c/ansible/inventory/hosts.ini b/lab5c/ansible/inventory/hosts.ini new file mode 100644 index 0000000000..3bb6bcd055 --- /dev/null +++ b/lab5c/ansible/inventory/hosts.ini @@ -0,0 +1,5 @@ +[webservers] +lab5-vm ansible_host=89.169.158.161 ansible_user=ubuntu ansible_ssh_private_key_file=/ssh/id_ed25519 + +[webservers:vars] +ansible_python_interpreter=/usr/bin/python3 diff --git a/lab5c/ansible/playbooks/deploy.yml b/lab5c/ansible/playbooks/deploy.yml new file mode 100644 index 0000000000..532fb1e207 --- /dev/null +++ b/lab5c/ansible/playbooks/deploy.yml @@ -0,0 +1,10 @@ +--- +- name: Deploy application + hosts: webservers + become: true + + vars_files: + - ../group_vars/all.yml + + roles: + - app_deploy diff --git a/lab5c/ansible/playbooks/provision.yml b/lab5c/ansible/playbooks/provision.yml new file mode 100644 index 0000000000..7cc2e6678d --- /dev/null +++ b/lab5c/ansible/playbooks/provision.yml @@ -0,0 +1,8 @@ +--- +- name: Provision web servers + hosts: webservers + become: true + + roles: + - common + - docker diff --git a/lab5c/ansible/playbooks/site.yml b/lab5c/ansible/playbooks/site.yml new file mode 100644 index 0000000000..5621849987 --- /dev/null +++ b/lab5c/ansible/playbooks/site.yml @@ -0,0 +1,12 @@ +--- +- name: Provision and deploy application + hosts: webservers + become: true + + vars_files: + - ../group_vars/all.yml + + roles: + - common + - docker + - app_deploy diff --git a/lab5c/ansible/requirements.yml b/lab5c/ansible/requirements.yml new file mode 100644 index 0000000000..b869f415df --- /dev/null +++ b/lab5c/ansible/requirements.yml @@ -0,0 +1,4 @@ +--- +collections: + - name: community.docker + - name: community.general diff --git a/lab5c/ansible/roles/app_deploy/defaults/main.yml b/lab5c/ansible/roles/app_deploy/defaults/main.yml new file mode 100644 index 0000000000..7d7997a13a --- /dev/null +++ b/lab5c/ansible/roles/app_deploy/defaults/main.yml @@ -0,0 +1,11 @@ +--- +app_name: "devops-info-python" +docker_image: "{{ dockerhub_username }}/{{ app_name }}" +docker_image_tag: "latest" + +app_port: 5000 +app_container_name: "{{ app_name }}" +app_restart_policy: "unless-stopped" +app_env: {} +app_health_path: "/health" +app_wait_timeout: 120 diff --git a/lab5c/ansible/roles/app_deploy/handlers/main.yml b/lab5c/ansible/roles/app_deploy/handlers/main.yml new file mode 100644 index 0000000000..1fc3fba48b --- /dev/null +++ b/lab5c/ansible/roles/app_deploy/handlers/main.yml @@ -0,0 +1,6 @@ +--- +- name: restart app container + community.docker.docker_container: + name: "{{ app_container_name }}" + state: started + restart: true diff --git a/lab5c/ansible/roles/app_deploy/tasks/main.yml b/lab5c/ansible/roles/app_deploy/tasks/main.yml new file mode 100644 index 0000000000..9567528545 --- /dev/null +++ b/lab5c/ansible/roles/app_deploy/tasks/main.yml @@ -0,0 +1,62 @@ +--- +- name: Log in to Docker Hub + community.docker.docker_login: + registry_url: https://index.docker.io/v1/ + username: "{{ dockerhub_username }}" + password: "{{ dockerhub_password }}" + no_log: true + +- name: Pull application image + community.docker.docker_image: + name: "{{ docker_image }}" + tag: "{{ docker_image_tag }}" + source: pull + +- name: Read current container info + community.docker.docker_container_info: + name: "{{ app_container_name }}" + register: current_app_container + +- name: Stop existing app container + community.docker.docker_container: + name: "{{ app_container_name }}" + state: stopped + when: + - current_app_container.exists | default(false) + - current_app_container.container.State.Status | default("") == "running" + +- name: Remove old app container + community.docker.docker_container: + name: "{{ app_container_name }}" + state: absent + when: current_app_container.exists | default(false) + +- name: Run application container + community.docker.docker_container: + name: "{{ app_container_name }}" + image: "{{ docker_image }}:{{ docker_image_tag }}" + state: started + restart_policy: "{{ app_restart_policy }}" + published_ports: + - "{{ app_port }}:5000" + env: "{{ app_env }}" + recreate: true + notify: restart app container + +- name: Wait for app port to be ready + ansible.builtin.wait_for: + host: "127.0.0.1" + port: "{{ app_port | int }}" + delay: 2 + timeout: "{{ app_wait_timeout }}" + +- name: Verify health endpoint + ansible.builtin.uri: + url: "http://127.0.0.1:{{ app_port }}{{ app_health_path }}" + method: GET + status_code: 200 + return_content: true + register: app_health_result + retries: 5 + delay: 3 + until: app_health_result.status == 200 diff --git a/lab5c/ansible/roles/common/defaults/main.yml b/lab5c/ansible/roles/common/defaults/main.yml new file mode 100644 index 0000000000..49e2e4526d --- /dev/null +++ b/lab5c/ansible/roles/common/defaults/main.yml @@ -0,0 +1,13 @@ +--- +common_packages: + - apt-transport-https + - ca-certificates + - curl + - git + - gnupg + - htop + - lsb-release + - python3-pip + - vim + +common_timezone: "UTC" diff --git a/lab5c/ansible/roles/common/tasks/main.yml b/lab5c/ansible/roles/common/tasks/main.yml new file mode 100644 index 0000000000..e9085097f4 --- /dev/null +++ b/lab5c/ansible/roles/common/tasks/main.yml @@ -0,0 +1,15 @@ +--- +- name: Update apt cache + ansible.builtin.apt: + update_cache: true + cache_valid_time: 3600 + +- name: Install common packages + ansible.builtin.apt: + name: "{{ common_packages }}" + state: present + +- name: Configure timezone + community.general.timezone: + name: "{{ common_timezone }}" + when: common_timezone | default("") | length > 0 diff --git a/lab5c/ansible/roles/docker/defaults/main.yml b/lab5c/ansible/roles/docker/defaults/main.yml new file mode 100644 index 0000000000..546f4a7af0 --- /dev/null +++ b/lab5c/ansible/roles/docker/defaults/main.yml @@ -0,0 +1,16 @@ +--- +docker_arch_map: + x86_64: amd64 + aarch64: arm64 + +docker_arch: "{{ docker_arch_map.get(ansible_architecture, 'amd64') }}" + +docker_packages: + - docker-ce + - docker-ce-cli + - containerd.io + - docker-buildx-plugin + - docker-compose-plugin + +docker_python_package: python3-docker +docker_user: "{{ ansible_user | default('ubuntu') }}" diff --git a/lab5c/ansible/roles/docker/handlers/main.yml b/lab5c/ansible/roles/docker/handlers/main.yml new file mode 100644 index 0000000000..1a5058da5e --- /dev/null +++ b/lab5c/ansible/roles/docker/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: restart docker + ansible.builtin.service: + name: docker + state: restarted diff --git a/lab5c/ansible/roles/docker/tasks/main.yml b/lab5c/ansible/roles/docker/tasks/main.yml new file mode 100644 index 0000000000..bc99133596 --- /dev/null +++ b/lab5c/ansible/roles/docker/tasks/main.yml @@ -0,0 +1,56 @@ +--- +- name: Install APT dependencies for Docker repository + ansible.builtin.apt: + name: + - ca-certificates + - curl + - gnupg + state: present + update_cache: true + +- name: Ensure Docker keyring directory exists + ansible.builtin.file: + path: /etc/apt/keyrings + state: directory + mode: "0755" + +- name: Download Docker official GPG key + ansible.builtin.get_url: + url: https://download.docker.com/linux/ubuntu/gpg + dest: /etc/apt/keyrings/docker.asc + mode: "0644" + +- name: Add Docker APT repository + ansible.builtin.apt_repository: + repo: "deb [arch={{ docker_arch }} signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable" + filename: docker + state: present + register: docker_repo + +- name: Update apt cache after Docker repo changes + ansible.builtin.apt: + update_cache: true + when: docker_repo is changed + +- name: Install Docker engine and CLI packages + ansible.builtin.apt: + name: "{{ docker_packages + [docker_python_package] }}" + state: present + notify: restart docker + +- name: Ensure docker group exists + ansible.builtin.group: + name: docker + state: present + +- name: Add target user to docker group + ansible.builtin.user: + name: "{{ docker_user }}" + groups: docker + append: true + +- name: Ensure Docker service is enabled and running + ansible.builtin.service: + name: docker + state: started + enabled: true diff --git a/lab6c/ansible/.gitignore b/lab6c/ansible/.gitignore new file mode 100644 index 0000000000..b487bb7be0 --- /dev/null +++ b/lab6c/ansible/.gitignore @@ -0,0 +1,4 @@ +.vault_pass +*.retry +inventory/*.pyc +__pycache__/ diff --git a/lab6c/ansible/ansible.cfg b/lab6c/ansible/ansible.cfg new file mode 100644 index 0000000000..2fb9889628 --- /dev/null +++ b/lab6c/ansible/ansible.cfg @@ -0,0 +1,13 @@ +[defaults] +inventory = inventory/hosts.ini +roles_path = roles +host_key_checking = False +remote_user = ubuntu +retry_files_enabled = False +interpreter_python = auto_silent +timeout = 30 + +[privilege_escalation] +become = True +become_method = sudo +become_user = root diff --git a/lab6c/ansible/docs/LAB05.md b/lab6c/ansible/docs/LAB05.md new file mode 100644 index 0000000000..7db4ecb2c6 --- /dev/null +++ b/lab6c/ansible/docs/LAB05.md @@ -0,0 +1,180 @@ +# LAB05 — Ansible Fundamentals (Role-Based) + +## 1. Architecture Overview + +- **Ansible version used:** Ansible Core 2.17.8. +- **Control node:** Windows 10 + Docker Desktop (Ansible executed in container). +- **Target VM:** Ubuntu 22.04/24.04 VM from Lab 4 (cloud), connected via SSH. +- **Role structure:** Three roles are used: + - `common` - baseline OS preparation + - `docker` - Docker engine installation and service setup + - `app_deploy` - Dockerized app deployment and health verification +- **Why roles instead of one large playbook:** Roles isolate responsibilities, keep playbooks clean, and make each part reusable. + +## 2. Roles Documentation + +### Role: `common` + +- **Purpose:** Prepare the system with required base packages and timezone. +- **Variables (defaults):** + - `common_packages` - essential packages list (`curl`, `git`, `python3-pip`, etc.) + - `common_timezone` - default `UTC` +- **Handlers:** None. +- **Dependencies:** `community.general` collection (for timezone module). + +### Role: `docker` + +- **Purpose:** Install Docker from the official Docker APT repository and prepare runtime access. +- **Variables (defaults):** + - `docker_arch_map`, `docker_arch` + - `docker_packages` (`docker-ce`, `docker-ce-cli`, `containerd.io`, plugins) + - `docker_python_package` (`python3-docker`) + - `docker_user` (user added to `docker` group) +- **Handlers:** + - `restart docker` - restarts Docker service when package changes require it. +- **Dependencies:** Uses Ansible built-in modules; installs `python3-docker` on target host for Docker-related modules. + +### Role: `app_deploy` + +- **Purpose:** Authenticate to Docker Hub, pull image, replace container, and verify app health. +- **Variables (defaults):** + - `app_name`, `docker_image`, `docker_image_tag` + - `app_port`, `app_container_name` + - `app_restart_policy`, `app_env` + - `app_health_path`, `app_wait_timeout` + - Vaulted vars: `dockerhub_username`, `dockerhub_password` +- **Handlers:** + - `restart app container` - restarts running container when deployment task triggers notify. +- **Dependencies:** `community.docker` collection. + +## 3. Idempotency Demonstration + +### First run (`playbooks/provision.yml`) + +```text +PLAY [Provision web servers] +... +TASK [common : Update apt cache] changed +TASK [common : Install common packages] changed +TASK [common : Configure timezone] changed +TASK [docker : Download Docker official GPG key] changed +TASK [docker : Add Docker APT repository] changed +TASK [docker : Update apt cache after Docker repo changes] changed +TASK [docker : Install Docker engine and CLI packages] changed +TASK [docker : Add target user to docker group] changed +RUNNING HANDLER [docker : restart docker] changed +... +PLAY RECAP +lab5-vm : ok=14 changed=9 unreachable=0 failed=0 skipped=0 +``` + +### Second run (`playbooks/provision.yml`) + +```text +PLAY [Provision web servers] +... +TASK [common : Update apt cache] ok +TASK [common : Install common packages] ok +TASK [common : Configure timezone] ok +TASK [docker : Download Docker official GPG key] ok +TASK [docker : Add Docker APT repository] ok +TASK [docker : Install Docker engine and CLI packages] ok +TASK [docker : Add target user to docker group] ok +TASK [docker : Update apt cache after Docker repo changes] skipping +... +PLAY RECAP +lab5-vm : ok=12 changed=0 unreachable=0 failed=0 skipped=1 +``` + +### Analysis + +- On the first run, resources are created/configured to match desired state (packages, repo, Docker service, group membership). +- On the second run, Ansible modules compare desired and current state and skip unnecessary changes, proving idempotent behavior. +- Idempotency is achieved by stateful modules (`apt`, `service`, `user`, `docker_container`) instead of ad-hoc shell commands. + +## 4. Ansible Vault Usage + +- Credentials are stored in `group_vars/all.yml` encrypted via Ansible Vault. +- Vault password is entered interactively (`--ask-vault-pass`) or provided via local password file that is ignored by Git. +- Tasks containing credentials use `no_log: true` to prevent secret leakage in logs. + +### Encrypted file proof + +```text +$ANSIBLE_VAULT;1.1;AES256 +64383638346636396532383762376239633430663933613638326235653962353634323766343664 +3436646365333032316364663736356565616462353663310a303061333835663866303562323132 +65356163313437653263333138366561633533646662336634393333313737336439326132323666 +``` + +### Why Vault is important + +- Secrets can be committed safely only in encrypted form. +- Team members can share infrastructure code without exposing credentials. +- It reduces accidental secret leakage in repo history and CI logs. + +## 5. Deployment Verification + +### Deployment run (`playbooks/deploy.yml`) + +```text +PLAY [Deploy application] +... +TASK [app_deploy : Log in to Docker Hub] changed +TASK [app_deploy : Pull application image] changed +TASK [app_deploy : Run application container] changed +TASK [app_deploy : Wait for app port to be ready] ok +TASK [app_deploy : Verify health endpoint] ok +RUNNING HANDLER [app_deploy : restart app container] changed +... +PLAY RECAP +lab5-vm : ok=8 changed=4 unreachable=0 failed=0 skipped=2 +``` + +### Container status + +```text +lab5-vm | CHANGED | rc=0 >> +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +e45f2bb4472d tsixphoenix/devops-info-python:latest "python app.py" 58 seconds ago Up 49 seconds 0.0.0.0:5000->5000/tcp devops-info-python +``` + +### Health check + +```text +curl http://89.169.158.161:5000/health +{"status":"healthy","timestamp":"2026-02-24T11:09:07.680263Z","uptime_seconds":14} + +curl http://89.169.158.161:5000/ +{"service":{"name":"devops-info-service","version":"1.0.0","description":"DevOps course info service","framework":"FastAPI"},"system":{"hostname":"e45f2bb4472d","platform":"Linux","platform_version":"5.15.0-170-generic","architecture":"x86_64","cpu_count":2,"python_version":"3.13.12"},"runtime":{"uptime_seconds":16,"uptime_human":"0 hours, 0 minutes","current_time":"2026-02-24T11:09:09.533021Z","timezone":"UTC"},"request":{"client_ip":"188.130.155.186","user_agent":"curl/8.16.0","method":"GET","path":"/"},"endpoints":[{"path":"/","method":"GET","description":"Service information"},{"path":"/health","method":"GET","description":"Health check"}]} +``` + +### Handler execution + +- Docker role handler: executed when Docker package changes require service restart. +- App deploy handler: executes only when container deployment task reports changes. + +## 6. Key Decisions + +- **Why use roles instead of plain playbooks?** + Roles separate concerns and keep top-level playbooks minimal. This reduces complexity and improves readability as automation grows. + +- **How do roles improve reusability?** + Roles encapsulate tasks + defaults + handlers. The same role can be reused across environments by changing only inventory and variables. + +- **What makes a task idempotent?** + Idempotent tasks declare target state (for example, `state: present`, `state: started`) and change only when current state differs. + +- **How do handlers improve efficiency?** + Handlers run only when notified by changed tasks, so expensive operations (like restarts) are not executed on every run. + +- **Why is Ansible Vault necessary?** + It allows secure storage of credentials in versioned infrastructure code without exposing plaintext secrets. + +## 7. Challenges + +- Initial control-node setup on Windows (Ansible-in-Docker + mounted SSH key permissions). +- Correctly configuring Docker repository and architecture mapping. +- Verifying no secret values appear in output logs. + +--- \ No newline at end of file diff --git a/lab6c/ansible/docs/LAB06.md b/lab6c/ansible/docs/LAB06.md new file mode 100644 index 0000000000..1f7ff3799f --- /dev/null +++ b/lab6c/ansible/docs/LAB06.md @@ -0,0 +1,164 @@ +# Lab 6 — Advanced Ansible & CI/CD + +## 1. Overview + +This lab extends Lab 5 with: +- **Blocks and tags** in common and docker roles +- **Docker Compose** for app deployment (replacing docker run) +- **Wipe logic** (variable + tag) for clean removal +- **GitHub Actions** workflow for automated deployment + +## 2. Blocks & Tags + +### Common Role +- **packages** block: apt update + install, with rescue (retry apt on failure), always (log completion) +- **users** block: ensure sudo group +- **common** tag: entire role + +### Docker Role +- **docker_install** block: repo setup, package install; rescue (wait 10s, retry); always (ensure service enabled) +- **docker_config** block: docker group, add user +- **docker** tag: entire role + +### Web App Role +- **app_deploy**, **compose** tags: deployment tasks +- **web_app_wipe** tag: wipe tasks only + +### Execution Examples +```bash +ansible-playbook playbooks/provision.yml --tags "docker" +ansible-playbook playbooks/provision.yml --skip-tags "common" +ansible-playbook playbooks/provision.yml --tags "packages" +ansible-playbook playbooks/provision.yml --list-tags +``` + +## 3. Docker Compose Migration + +- **Template:** `roles/web_app/templates/docker-compose.yml.j2` +- **Project dir:** `/opt/{{ app_name }}` +- **Role dependency:** `web_app` depends on `docker` (meta/main.yml) +- **Module:** `community.docker.docker_compose_v2` with `state: present`, `pull: always` + +## 4. Wipe Logic + +- **Variable:** `web_app_wipe: false` (default) +- **Tag:** `web_app_wipe` +- **Tasks:** `roles/web_app/tasks/wipe.yml` — compose down, remove file, remove dir + +### Scenarios +1. **Normal deploy:** `ansible-playbook playbooks/deploy.yml` — wipe skipped +2. **Wipe only:** `ansible-playbook playbooks/deploy.yml -e "web_app_wipe=true" --tags web_app_wipe` +3. **Clean reinstall:** `ansible-playbook playbooks/deploy.yml -e "web_app_wipe=true"` +4. **Safety:** `--tags web_app_wipe` without variable — wipe skipped (when blocks it) + +## 5. CI/CD Integration + +- **Workflow:** `.github/workflows/ansible-deploy.yml` +- **Triggers:** push to `lab6c/ansible/**` +- **Jobs:** lint (ansible-lint), deploy (playbook + verify) +- **Secrets required:** `ANSIBLE_VAULT_PASSWORD`, `SSH_PRIVATE_KEY`, `VM_HOST`, `VM_USER` + +## 6. Testing Results + +### 6.1 Provision with tags +```bash +ansible-playbook playbooks/provision.yml --tags "docker" +``` +``` +PLAY RECAP ********************************************************************* +lab5-vm : ok=9 changed=0 unreachable=0 failed=0 skipped=1 rescued=0 ignored=0 +``` + +### 6.2 List of tags +```bash +ansible-playbook playbooks/provision.yml --list-tags +``` +``` +playbook: playbooks/provision.yml + play #1 (webservers): Provision web servers TAGS: [] + TASK TAGS: [common, docker, docker_config, docker_install, packages, users] +``` + +### 6.3 Deploy +```bash +ansible-playbook playbooks/deploy.yml +``` +``` +PLAY RECAP ********************************************************************* +lab5-vm : ok=16 changed=2 unreachable=0 failed=0 skipped=5 rescued=0 ignored=0 +``` + +### 6.4 Wipe-only +```bash +ansible-playbook playbooks/deploy.yml -e "web_app_wipe=true" --tags web_app_wipe +``` +``` +TASK [web_app : Log wipe completion] ******************************************* +ok: [lab5-vm] => {"msg": "Application devops-info-python wiped successfully"} +PLAY RECAP ********************************************************************* +lab5-vm : ok=6 changed=3 unreachable=0 failed=0 skipped=0 +``` + +### 6.5 Clean reinstall +```bash +ansible-playbook playbooks/deploy.yml -e "web_app_wipe=true" +``` +``` +PLAY RECAP ********************************************************************* +lab5-vm : ok=20 changed=3 unreachable=0 failed=0 skipped=1 rescued=0 ignored=1 +``` + +### 6.6 Health check +```bash +curl http://62.84.127.190:5000/health +``` +```json +{"status":"healthy","timestamp":"2026-03-05T12:17:53.667273Z","uptime_seconds":60} +``` + +### 6.7 Idempotency (2nd deploy run) +```bash +ansible-playbook playbooks/deploy.yml +``` +Second run: `changed=0` (all `ok`, no changes). + +### 6.8 Scenario 4a — safety (--tags web_app_wipe without variable) +```bash +ansible-playbook playbooks/deploy.yml --tags web_app_wipe +``` +Wipe tasks skipped (when blocks: `web_app_wipe` false by default). Deploy runs normally. + +### 6.9 GitHub Actions +Add 4 secrets and push. Include screenshot of successful workflow in report. + + +## 7. Challenges & Solutions + +- **Template `to_native` filter:** Ansible 2.16+ does not provide `to_native` in Jinja2 — replaced with `to_json`. +- **dpkg lock:** On a new VM, `unattended-upgrades` blocks apt; retry `provision` after updates complete succeeds. +- **Wipe on empty directory:** `docker_compose_v2 state: absent` fails if directory was already removed. Added `compose_dir_stat` check before `compose down`. + +## 8. Research Answers + +### Task 1 — Blocks & Tags +- **If rescue also fails?** Play will fail with error; can add `ignore_errors` or nested rescue. +- **Nested blocks?** Yes, a block can contain another block. +- **Tag inheritance?** Tags on block apply to all tasks inside. + +### Task 2 — Docker Compose +- **restart: always vs unless-stopped?** `unless-stopped` does not restart container after manual stop. +- **Compose networks vs bridge?** Compose creates named networks; bridge is the default network. +- **Vault in template?** Yes, Vault variables are available when templating. + +### Task 3 — Wipe Logic +- **Variable + tag?** Double safety: variable prevents accidental wipe; tag enables selective execution. +- **never tag vs our approach?** `never` disables task by tag; our approach requires both tag and variable. +- **Wipe before deploy?** Enables clean reinstall: wipe → deploy in one run. +- **Clean reinstall vs rolling update?** Reinstall = full replacement; rolling = phased update without downtime. +- **Extending wipe?** Can add `docker image prune` and `docker volume rm` to wipe.yml. + +### Task 4 — CI/CD +- **SSH keys in Secrets?** Use short-lived keys; regular rotation; restrict scope. +- **Staging → production?** Separate inventory/playbooks, approval before prod, or environment protection. +- **Rollbacks?** Add tag/version to image, keep previous config, workflow for rollback. +- **Self-hosted vs GitHub-hosted?** Self-hosted gives direct network/VMs access; fewer SSH key exposure risks. diff --git a/lab6c/ansible/docs/workflow.jpg b/lab6c/ansible/docs/workflow.jpg new file mode 100644 index 0000000000..bd8ac1af34 Binary files /dev/null and b/lab6c/ansible/docs/workflow.jpg differ diff --git a/lab6c/ansible/group_vars/all.yml b/lab6c/ansible/group_vars/all.yml new file mode 100644 index 0000000000..e12b23ba74 --- /dev/null +++ b/lab6c/ansible/group_vars/all.yml @@ -0,0 +1,33 @@ +$ANSIBLE_VAULT;1.1;AES256 +61633462363861316436623662633839646361313433333434316236643637393039656137333630 +3638333365396330616461636436633866333531633930630a383464376530653839613930383661 +31313332303966653138366435366666353838316232633836393534646261333437396530386164 +6138376139346638330a613032643265376438343935356238613235343434356638653963316365 +65626665313563383639323930346438363239386565626434656337383430343732333962396335 +61613535636538626165313333613464633935343361353163353366333966653665383832373838 +61356436323939613636616635333836346330323531623037353736366462393336356633616132 +39343936353837316132373338616633353834333065303536326439366530666539336561303036 +64383632313331613932363934313264646464336635343535363631333031653664326530653038 +39356366366237396366386637313939306637373438366162303962386536633936626130633433 +63393363613863333965316537383439623732303862386238306637343136396634626639666335 +31363333356239303631306432656265306135643238656366346635663637666465303761653162 +66326363643065343062646634366636636166336136363862616630363030636533623861626132 +63633232373638623331323231323366326236316331663961656236666237393361653533343435 +38323333363938363237613432353362396330333961326332383634396333663336613665396637 +37383366643764363438616536323463303634396139363837343335366662653835373630303131 +66393063373339323630393238306638663335623232303239613831353932376465663834663736 +37396661323332303061633538393035356531623065396634633433623862666538356538343761 +61653630346237656663356462343366653163326261663138303132373932323863663566393932 +34653132623136633734663664356631663839363533353631373162306339653938636161633939 +62306639396634646366316662306533663337376262666333653431383562656138643264376437 +66376630326437353766613733333434333861613830303366663133363733656561393666613364 +65656636316663666438323635343062626138393963303837393536353466626161353531313733 +37373866363435303436346339393566656431326233333336343636303638313632363466653963 +32353963356431383433396461653034353963346462653066356662396462383432363231633739 +64646563306534323565396263623236356130643234313239393232366633333034383466653438 +37313138363764306561343364393838353963373464633864356666376536383131626638333332 +35333538306161633465663966663464643032343665393438366538623666346263333839393532 +61393132313662346266346234393766616532356638663432626236363238303063666135626663 +35346434346632653164646530323833656433386465313037653231336365363739336661636163 +33346463303439383837376363343430333161396431653538313466323563343964363238333132 +35303738346436393766 diff --git a/lab6c/ansible/group_vars/all.yml.example b/lab6c/ansible/group_vars/all.yml.example new file mode 100644 index 0000000000..335be5e489 --- /dev/null +++ b/lab6c/ansible/group_vars/all.yml.example @@ -0,0 +1,22 @@ +--- +# Copy to group_vars/all.yml and encrypt: ansible-vault encrypt group_vars/all.yml + +# Docker Hub (required for deploy) +dockerhub_username: "YOUR_DOCKERHUB_USERNAME" +dockerhub_password: "YOUR_DOCKERHUB_ACCESS_TOKEN" + +# Application config (Lab 6 Docker Compose) +app_name: "devops-info-python" +docker_image: "{{ dockerhub_username }}/{{ app_name }}" +docker_image_tag: "latest" + +app_port: 5000 +app_internal_port: 5000 +app_container_name: "{{ app_name }}" +app_restart_policy: "unless-stopped" +app_env: {} +app_health_path: "/health" +app_wait_timeout: 120 + +# Docker Compose project directory on target VM +compose_project_dir: "/opt/{{ app_name }}" diff --git a/lab6c/ansible/inventory/hosts.ini b/lab6c/ansible/inventory/hosts.ini new file mode 100644 index 0000000000..b0c44a8fd4 --- /dev/null +++ b/lab6c/ansible/inventory/hosts.ini @@ -0,0 +1,5 @@ +[webservers] +lab5-vm ansible_host=62.84.127.190 ansible_user=ubuntu ansible_ssh_private_key_file=/ssh/id_ed25519 + +[webservers:vars] +ansible_python_interpreter=/usr/bin/python3 diff --git a/lab6c/ansible/playbooks/deploy.yml b/lab6c/ansible/playbooks/deploy.yml new file mode 100644 index 0000000000..f3923b77bb --- /dev/null +++ b/lab6c/ansible/playbooks/deploy.yml @@ -0,0 +1,10 @@ +--- +- name: Deploy application + hosts: webservers + become: true + + vars_files: + - ../group_vars/all.yml + + roles: + - web_app diff --git a/lab6c/ansible/playbooks/provision.yml b/lab6c/ansible/playbooks/provision.yml new file mode 100644 index 0000000000..7cc2e6678d --- /dev/null +++ b/lab6c/ansible/playbooks/provision.yml @@ -0,0 +1,8 @@ +--- +- name: Provision web servers + hosts: webservers + become: true + + roles: + - common + - docker diff --git a/lab6c/ansible/playbooks/site.yml b/lab6c/ansible/playbooks/site.yml new file mode 100644 index 0000000000..f90334eff7 --- /dev/null +++ b/lab6c/ansible/playbooks/site.yml @@ -0,0 +1,12 @@ +--- +- name: Provision and deploy application + hosts: webservers + become: true + + vars_files: + - ../group_vars/all.yml + + roles: + - common + - docker + - web_app diff --git a/lab6c/ansible/requirements.yml b/lab6c/ansible/requirements.yml new file mode 100644 index 0000000000..b869f415df --- /dev/null +++ b/lab6c/ansible/requirements.yml @@ -0,0 +1,4 @@ +--- +collections: + - name: community.docker + - name: community.general diff --git a/lab6c/ansible/roles/common/defaults/main.yml b/lab6c/ansible/roles/common/defaults/main.yml new file mode 100644 index 0000000000..49e2e4526d --- /dev/null +++ b/lab6c/ansible/roles/common/defaults/main.yml @@ -0,0 +1,13 @@ +--- +common_packages: + - apt-transport-https + - ca-certificates + - curl + - git + - gnupg + - htop + - lsb-release + - python3-pip + - vim + +common_timezone: "UTC" diff --git a/lab6c/ansible/roles/common/tasks/main.yml b/lab6c/ansible/roles/common/tasks/main.yml new file mode 100644 index 0000000000..bb93353eee --- /dev/null +++ b/lab6c/ansible/roles/common/tasks/main.yml @@ -0,0 +1,58 @@ +--- +# Common role: baseline system setup +# Tags: packages, users, common + +- name: Install packages + block: + - name: Update apt cache + ansible.builtin.apt: + update_cache: true + cache_valid_time: 3600 + tags: + - packages + - common + + - name: Install common packages + ansible.builtin.apt: + name: "{{ common_packages }}" + state: present + tags: + - packages + - common + + rescue: + - name: Retry apt update on failure + ansible.builtin.apt: + update_cache: true + ignore_errors: true + + - name: Re-run package install after cache fix + ansible.builtin.apt: + name: "{{ common_packages }}" + state: present + + always: + - name: Log common role completion + ansible.builtin.copy: + content: "common role completed at {{ ansible_date_time.iso8601 }}\n" + dest: /tmp/ansible_common_completed + mode: "0644" + tags: + - common + +- name: User and group setup + block: + - name: Ensure sudo group exists + ansible.builtin.group: + name: sudo + state: present + tags: + - users + - common + +- name: Configure timezone + community.general.timezone: + name: "{{ common_timezone }}" + when: common_timezone | default("") | length > 0 + tags: + - common diff --git a/lab6c/ansible/roles/docker/defaults/main.yml b/lab6c/ansible/roles/docker/defaults/main.yml new file mode 100644 index 0000000000..546f4a7af0 --- /dev/null +++ b/lab6c/ansible/roles/docker/defaults/main.yml @@ -0,0 +1,16 @@ +--- +docker_arch_map: + x86_64: amd64 + aarch64: arm64 + +docker_arch: "{{ docker_arch_map.get(ansible_architecture, 'amd64') }}" + +docker_packages: + - docker-ce + - docker-ce-cli + - containerd.io + - docker-buildx-plugin + - docker-compose-plugin + +docker_python_package: python3-docker +docker_user: "{{ ansible_user | default('ubuntu') }}" diff --git a/lab6c/ansible/roles/docker/handlers/main.yml b/lab6c/ansible/roles/docker/handlers/main.yml new file mode 100644 index 0000000000..1a5058da5e --- /dev/null +++ b/lab6c/ansible/roles/docker/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: restart docker + ansible.builtin.service: + name: docker + state: restarted diff --git a/lab6c/ansible/roles/docker/tasks/main.yml b/lab6c/ansible/roles/docker/tasks/main.yml new file mode 100644 index 0000000000..990a1dc49c --- /dev/null +++ b/lab6c/ansible/roles/docker/tasks/main.yml @@ -0,0 +1,105 @@ +--- +# Docker role: install Docker engine from official repo +# Tags: docker_install, docker_config, docker + +- name: Docker installation + block: + - name: Install APT dependencies for Docker repository + ansible.builtin.apt: + name: + - ca-certificates + - curl + - gnupg + state: present + update_cache: true + tags: + - docker_install + - docker + + - name: Ensure Docker keyring directory exists + ansible.builtin.file: + path: /etc/apt/keyrings + state: directory + mode: "0755" + tags: + - docker_install + - docker + + - name: Download Docker official GPG key + ansible.builtin.get_url: + url: https://download.docker.com/linux/ubuntu/gpg + dest: /etc/apt/keyrings/docker.asc + mode: "0644" + tags: + - docker_install + - docker + + - name: Add Docker APT repository + ansible.builtin.apt_repository: + repo: "deb [arch={{ docker_arch }} signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable" + filename: docker + state: present + register: docker_repo + tags: + - docker_install + - docker + + - name: Update apt cache after Docker repo changes + ansible.builtin.apt: + update_cache: true + when: docker_repo is changed + tags: + - docker_install + - docker + + - name: Install Docker engine and CLI packages + ansible.builtin.apt: + name: "{{ docker_packages + [docker_python_package] }}" + state: present + notify: restart docker + tags: + - docker_install + - docker + + rescue: + - name: Wait before retry after GPG/repo failure + ansible.builtin.pause: + seconds: 10 + prompt: "Retrying Docker repo setup..." + + - name: Retry apt update + ansible.builtin.apt: + update_cache: true + + always: + - name: Ensure Docker service is enabled and running + ansible.builtin.service: + name: docker + state: started + enabled: true + tags: + - docker + - docker_config + +- name: Docker configuration + block: + - name: Ensure docker group exists + ansible.builtin.group: + name: docker + state: present + tags: + - docker_config + - docker + + - name: Add target user to docker group + ansible.builtin.user: + name: "{{ docker_user }}" + groups: docker + append: true + tags: + - docker_config + - docker + + tags: + - docker_config + - docker diff --git a/lab6c/ansible/roles/web_app/defaults/main.yml b/lab6c/ansible/roles/web_app/defaults/main.yml new file mode 100644 index 0000000000..0e150a57fe --- /dev/null +++ b/lab6c/ansible/roles/web_app/defaults/main.yml @@ -0,0 +1,19 @@ +--- +# Application config +app_name: "devops-info-python" +docker_image: "{{ dockerhub_username }}/{{ app_name }}" +docker_image_tag: "latest" + +app_port: 5000 +app_internal_port: 5000 +app_container_name: "{{ app_name }}" +app_restart_policy: "unless-stopped" +app_env: {} +app_health_path: "/health" +app_wait_timeout: 120 + +# Docker Compose +compose_project_dir: "/opt/{{ app_name }}" + +# Wipe logic: set to true to remove app completely. Also requires --tags web_app_wipe for wipe-only. +web_app_wipe: false diff --git a/lab6c/ansible/roles/web_app/handlers/main.yml b/lab6c/ansible/roles/web_app/handlers/main.yml new file mode 100644 index 0000000000..f8bfe8ed4f --- /dev/null +++ b/lab6c/ansible/roles/web_app/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: restart app container + community.docker.docker_compose_v2: + project_src: "{{ compose_project_dir }}" + state: present diff --git a/lab6c/ansible/roles/web_app/meta/main.yml b/lab6c/ansible/roles/web_app/meta/main.yml new file mode 100644 index 0000000000..a5177c0f80 --- /dev/null +++ b/lab6c/ansible/roles/web_app/meta/main.yml @@ -0,0 +1,4 @@ +--- +# web_app depends on docker - Docker must be installed before deploying containers +dependencies: + - role: docker diff --git a/lab6c/ansible/roles/web_app/tasks/main.yml b/lab6c/ansible/roles/web_app/tasks/main.yml new file mode 100644 index 0000000000..063676e3cf --- /dev/null +++ b/lab6c/ansible/roles/web_app/tasks/main.yml @@ -0,0 +1,84 @@ +--- +# web_app role: deploy containerized app with Docker Compose +# Tags: app_deploy, compose, web_app +# Dependency: docker role (installed automatically via meta/main.yml) + +# Wipe logic runs first when explicitly requested +- name: Include wipe tasks + include_tasks: wipe.yml + tags: + - web_app_wipe + +# Deployment block +- name: Deploy application with Docker Compose + block: + - name: Log in to Docker Hub + community.docker.docker_login: + registry_url: https://index.docker.io/v1/ + username: "{{ dockerhub_username }}" + password: "{{ dockerhub_password }}" + no_log: true + tags: + - app_deploy + - compose + + - name: Create application directory + ansible.builtin.file: + path: "{{ compose_project_dir }}" + state: directory + mode: "0755" + tags: + - app_deploy + - compose + + - name: Template docker-compose.yml + ansible.builtin.template: + src: docker-compose.yml.j2 + dest: "{{ compose_project_dir }}/docker-compose.yml" + mode: "0644" + tags: + - app_deploy + - compose + + - name: Deploy with Docker Compose (up) + community.docker.docker_compose_v2: + project_src: "{{ compose_project_dir }}" + state: present + pull: always + tags: + - app_deploy + - compose + + - name: Wait for app port to be ready + ansible.builtin.wait_for: + host: "127.0.0.1" + port: "{{ app_port | int }}" + delay: 2 + timeout: "{{ app_wait_timeout }}" + tags: + - app_deploy + - compose + + - name: Verify health endpoint + ansible.builtin.uri: + url: "http://127.0.0.1:{{ app_port }}{{ app_health_path }}" + method: GET + status_code: 200 + return_content: true + register: app_health_result + retries: 5 + delay: 3 + until: app_health_result.status == 200 + tags: + - app_deploy + - compose + + rescue: + - name: Log deployment failure + ansible.builtin.debug: + msg: "Deployment failed, check logs above" + + tags: + - app_deploy + - compose + - web_app diff --git a/lab6c/ansible/roles/web_app/tasks/wipe.yml b/lab6c/ansible/roles/web_app/tasks/wipe.yml new file mode 100644 index 0000000000..fbeb835e4f --- /dev/null +++ b/lab6c/ansible/roles/web_app/tasks/wipe.yml @@ -0,0 +1,37 @@ +--- +# Wipe web application: stops containers, removes compose file and app directory +# Runs only when web_app_wipe=true AND tag web_app_wipe specified +# Usage: ansible-playbook deploy.yml -e "web_app_wipe=true" --tags web_app_wipe + +- name: Check if app directory exists + ansible.builtin.stat: + path: "{{ compose_project_dir }}" + register: compose_dir_stat + +- name: Wipe web application + block: + - name: Stop and remove containers with Docker Compose + community.docker.docker_compose_v2: + project_src: "{{ compose_project_dir }}" + state: absent + when: compose_dir_stat.stat.isdir | default(false) + + - name: Remove docker-compose file + ansible.builtin.file: + path: "{{ compose_project_dir }}/docker-compose.yml" + state: absent + ignore_errors: true + + - name: Remove application directory + ansible.builtin.file: + path: "{{ compose_project_dir }}" + state: absent + ignore_errors: true + + - name: Log wipe completion + ansible.builtin.debug: + msg: "Application {{ app_name }} wiped successfully" + + when: web_app_wipe | default(false) | bool + tags: + - web_app_wipe diff --git a/lab6c/ansible/roles/web_app/templates/docker-compose.yml.j2 b/lab6c/ansible/roles/web_app/templates/docker-compose.yml.j2 new file mode 100644 index 0000000000..7285e18cf1 --- /dev/null +++ b/lab6c/ansible/roles/web_app/templates/docker-compose.yml.j2 @@ -0,0 +1,15 @@ +--- +# Generated by Ansible - do not edit manually +# Template: roles/web_app/templates/docker-compose.yml.j2 + +services: + {{ app_name }}: + image: {{ docker_image }}:{{ docker_image_tag }} + container_name: {{ app_container_name }} + ports: + - "{{ app_port }}:{{ app_internal_port }}" + environment: {{ app_env | default({}) | to_json }} + restart: {{ app_restart_policy }} + {% if app_extra_hosts is defined and app_extra_hosts %} + extra_hosts: {{ app_extra_hosts | to_json }} + {% endif %} diff --git a/lab7c/docker-compose.yml b/lab7c/docker-compose.yml new file mode 100644 index 0000000000..4ad8163ebd --- /dev/null +++ b/lab7c/docker-compose.yml @@ -0,0 +1,109 @@ +version: "3.8" + +networks: + logging: + name: logging + +volumes: + loki-data: + grafana-data: + +services: + loki: + image: grafana/loki:3.0.0 + command: -config.file=/etc/loki/config.yml + ports: + - "3100:3100" + volumes: + - ./loki/config.yml:/etc/loki/config.yml:ro + - loki-data:/loki + networks: + - logging + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:3100/ready || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s + deploy: + resources: + limits: + cpus: "1.0" + memory: 1G + reservations: + cpus: "0.5" + memory: 512M + + promtail: + image: grafana/promtail:3.0.0 + command: -config.file=/etc/promtail/config.yml + ports: + - "9080:9080" + volumes: + - ./promtail/config.yml:/etc/promtail/config.yml:ro + - /var/log:/var/log:ro + - /var/lib/docker/containers:/var/lib/docker/containers:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + networks: + - logging + depends_on: + - loki + deploy: + resources: + limits: + cpus: "0.5" + memory: 512M + reservations: + cpus: "0.25" + memory: 256M + + grafana: + image: grafana/grafana:12.3.1 + ports: + - "3000:3000" + environment: + GF_AUTH_ANONYMOUS_ENABLED: "false" + GF_SECURITY_ADMIN_USER: "admin" + GF_SECURITY_ADMIN_PASSWORD: "${GRAFANA_ADMIN_PASSWORD:-admin}" + volumes: + - grafana-data:/var/lib/grafana + networks: + - logging + depends_on: + - loki + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:3000/api/health || exit 1"] + interval: 15s + timeout: 5s + retries: 5 + start_period: 20s + deploy: + resources: + limits: + cpus: "1.0" + memory: 1G + reservations: + cpus: "0.5" + memory: 512M + + app-python: + image: tsixphoenix/devops-info-python:latest + ports: + - "8000:5000" + networks: + - logging + labels: + logging: "promtail" + app: "devops-python" + environment: + HOST: "0.0.0.0" + PORT: "5000" + deploy: + resources: + limits: + cpus: "0.5" + memory: 512M + reservations: + cpus: "0.25" + memory: 256M + diff --git a/lab7c/docs/LAB07.md b/lab7c/docs/LAB07.md new file mode 100644 index 0000000000..f898ad53f1 --- /dev/null +++ b/lab7c/docs/LAB07.md @@ -0,0 +1,197 @@ +# Lab 7 — Observability & Logging with Loki Stack + +## 1. Architecture + +- **Loki**: log storage and query engine (TSDB on filesystem, 7‑day retention). +- **Promtail**: collects container logs from Docker and ships them to Loki. +- **Grafana**: visualizes logs and dashboards using LogQL. +- **App (FastAPI)**: `devops-info-service` container, logging JSON to stdout. +- All services run in `lab7c/docker-compose.yml` on a shared `logging` network. + +## 2. Setup Guide + +### 2.1 Stack deployment + +```bash +cd monitoring +docker compose up -d +docker compose ps +``` + +Services: +- `loki` on `3100` +- `promtail` on `9080` +- `grafana` on `3000` +- `app-python` on `8000` (mapped to container 5000) + +### 2.2 Verification + +```bash +# Loki readiness +curl http://localhost:3100/ready + +# Promtail targets +curl http://localhost:9080/targets + +# Open Grafana (local) +http://localhost:3000 +``` + +In Grafana: +1. **Connections → Data sources → Add data source → Loki** +2. URL: `http://loki:3100` +3. **Save & Test** → “Data source connected” +4. Go to **Explore**, choose **Loki**, run `{job="docker"}`. + +## 3. Configuration + +### 3.1 Docker Compose (`lab7c/docker-compose.yml`) + +- Defines network `logging` and volumes `loki-data`, `grafana-data`. +- **Loki**: + - Image `grafana/loki:3.0.0` + - Mounts `./loki/config.yml` to `/etc/loki/config.yml` + - Persists data in `loki-data:/loki` + - Health check on `/ready` + - Resource limits and reservations set. +- **Promtail**: + - Image `grafana/promtail:3.0.0` + - Mounts `./promtail/config.yml` + - Mounts `/var/lib/docker/containers` and `/var/run/docker.sock` read‑only. +- **Grafana**: + - Image `grafana/grafana:12.3.1` + - Port `3000:3000` + - Admin user/password via env (for dev: `admin` / `${GRAFANA_ADMIN_PASSWORD:-admin}`). + - Health check on `/api/health`, resource limits. +- **app-python**: + - Image `tsixphoenix/devops-info-python:latest` + - Port `8000:5000` + - Labels `logging="promtail"`, `app="devops-python"` for Promtail/Loki labels. + +### 3.2 Loki (`lab7c/loki/config.yml`) + +- `auth_enabled: false` for local testing. +- `server.http_listen_port: 3100`. +- `common`: + - `path_prefix: /loki` + - filesystem storage for chunks and rules. + - in‑memory ring for a single instance. +- `schema_config`: + - `store: tsdb`, `object_store: filesystem`, `schema: v13`, daily index. +- `storage_config`: + - `tsdb_shipper` index in `/loki/index` with cache. + - filesystem chunks in `/loki/chunks`. +- `limits_config.retention_period: 168h` (7 days). +- `compactor`: + - cleans up old logs with `retention_enabled: true`. + +### 3.3 Promtail (`lab7c/promtail/config.yml`) + +- `server.http_listen_port: 9080`. +- `positions` stored in `/tmp/positions.yaml`. +- `clients` send to `http://loki:3100/loki/api/v1/push`. +- `scrape_configs` for **Docker**: + - `docker_sd_configs` on `unix:///var/run/docker.sock`. + - `relabel_configs`: + - `container` label from `__meta_docker_container_name`. + - `app` label from container label `app`. + - `logging` label from container label `logging`. + +## 4. Application Logging (JSON) + +In `lab3c/app_python/app.py`: +- Switched to **JSON log lines** using the standard `logging` module. +- HTTP middleware logs: + - `timestamp`, `level`, `service`, `method`, `path`, `status`, `client_ip`, `user_agent`. +- Logs are written to stdout and collected by Docker, then by Promtail. + +Example JSON log line: +```json +{ + "timestamp": "2026-03-05T12:20:00Z", + "level": "INFO", + "service": "devops-info-service", + "method": "GET", + "path": "/health", + "status": 200, + "client_ip": "127.0.0.1", + "user_agent": "curl/8.6.0", + "message": "request" +} +``` + +Screenshots are stored in `lab7c/docs/`. +## 5. Dashboard & LogQL + +### 5.1 Explore queries + +In Grafana Explore (Loki data source): + +- All logs for Python app: +```logql +{app="devops-python"} +``` + +- Only error logs: +```logql +{app="devops-python"} |= "ERROR" +``` + +- Parse JSON and filter by method: +```logql +{app="devops-python"} | json | method="GET" +``` + +### 5.2 Dashboard panels + +Dashboard panels created (LogQL examples): + +1. **Logs Table** (all apps): + ```logql + {app=~"devops-.*"} + ``` +2. **Request Rate** (time series): + ```logql + sum by (app) (rate({app=~"devops-.*"}[1m])) + ``` +3. **Error Logs**: + ```logql + {app=~"devops-.*"} | json | level="ERROR" + ``` +4. **Log Level Distribution**: + ```logql + sum by (level) (count_over_time({app=~"devops-.*"} | json [5m])) + ``` + +## 6. Production Configuration + +- **Resource limits**: all services have `deploy.resources` limits and reservations. +- **Grafana security**: + - Anonymous access disabled (`GF_AUTH_ANONYMOUS_ENABLED=false`). + - Admin credentials configured via environment variables / `.env`. +- **Health checks**: + - Loki: `/ready` endpoint. + - Grafana: `/api/health` endpoint. +- **Retention**: + - Loki configured for 7 days (`retention_period: 168h`) with compactor cleanup. + +## 7. Testing + +1. Start stack: `docker compose up -d`. +2. Generate logs: + ```bash + for i in {1..20}; do curl http://localhost:8000/; done + for i in {1..20}; do curl http://localhost:8000/health; done + ``` +3. In Grafana Explore, run: + - `{app="devops-python"}` + - `{app="devops-python"} | json | method="GET"` + - `{app="devops-python"} | json | level="ERROR"` +4. Check dashboard panels render data. + +## 8. Challenges + +- **Docker TSDB configuration**: required reading Loki 3.0 docs to use `tsdb` with filesystem correctly. +- **Docker discovery**: Promtail needed correct Docker SD and relabeling to get `app` and `container` labels. +- **JSON logging**: changing logging format without breaking existing behavior and keeping logs parseable in Loki. + diff --git a/lab7c/docs/dashboard.jpg b/lab7c/docs/dashboard.jpg new file mode 100644 index 0000000000..05843cfab4 Binary files /dev/null and b/lab7c/docs/dashboard.jpg differ diff --git a/lab7c/docs/query.jpg b/lab7c/docs/query.jpg new file mode 100644 index 0000000000..ec2146e77c Binary files /dev/null and b/lab7c/docs/query.jpg differ diff --git a/lab7c/loki/config.yml b/lab7c/loki/config.yml new file mode 100644 index 0000000000..6a9219da04 --- /dev/null +++ b/lab7c/loki/config.yml @@ -0,0 +1,43 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + +common: + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + instance_addr: 127.0.0.1 + kvstore: + store: inmemory + +schema_config: + configs: + - from: 2024-01-01 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +storage_config: + tsdb_shipper: + active_index_directory: /loki/index + cache_location: /loki/cache + cache_ttl: 24h + filesystem: + directory: /loki/chunks + +limits_config: + retention_period: 168h + +compactor: + working_directory: /loki/compactor + retention_enabled: true + delete_request_store: filesystem + diff --git a/lab7c/promtail/config.yml b/lab7c/promtail/config.yml new file mode 100644 index 0000000000..eb99e73798 --- /dev/null +++ b/lab7c/promtail/config.yml @@ -0,0 +1,29 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: docker + docker_sd_configs: + - host: unix:///var/run/docker.sock + refresh_interval: 5s + + relabel_configs: + # Container name label (without leading slash) + - source_labels: [__meta_docker_container_name] + target_label: container + regex: "/(.*)" + replacement: "$1" + + # Propagate container labels as Loki labels + - source_labels: [__meta_docker_container_label_app] + target_label: app + - source_labels: [__meta_docker_container_label_logging] + target_label: logging + diff --git a/lab8c/docker-compose.yml b/lab8c/docker-compose.yml new file mode 100644 index 0000000000..129dc3058e --- /dev/null +++ b/lab8c/docker-compose.yml @@ -0,0 +1,142 @@ +networks: + logging: + name: logging + +volumes: + loki-data: + grafana-data: + prometheus-data: + +services: + loki: + image: grafana/loki:3.0.0 + command: -config.file=/etc/loki/config.yml + ports: + - "3100:3100" + volumes: + - ./loki/config.yml:/etc/loki/config.yml:ro + - loki-data:/loki + networks: + - logging + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:3100/ready || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s + deploy: + resources: + limits: + cpus: "1.0" + memory: 1G + reservations: + cpus: "0.5" + memory: 512M + + promtail: + image: grafana/promtail:3.0.0 + command: -config.file=/etc/promtail/config.yml + ports: + - "9080:9080" + volumes: + - ./promtail/config.yml:/etc/promtail/config.yml:ro + - /var/log:/var/log:ro + - /var/lib/docker/containers:/var/lib/docker/containers:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + networks: + - logging + depends_on: + - loki + deploy: + resources: + limits: + cpus: "0.5" + memory: 512M + reservations: + cpus: "0.25" + memory: 256M + + prometheus: + image: prom/prometheus:v3.9.0 + command: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.path=/prometheus" + - "--storage.tsdb.retention.time=15d" + - "--storage.tsdb.retention.size=10GB" + - "--web.enable-lifecycle" + ports: + - "9090:9090" + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus-data:/prometheus + networks: + - logging + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:9090/-/healthy || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + deploy: + resources: + limits: + cpus: "1.0" + memory: 1G + reservations: + cpus: "0.5" + memory: 512M + + grafana: + image: grafana/grafana:12.3.1 + ports: + - "3000:3000" + environment: + GF_AUTH_ANONYMOUS_ENABLED: "false" + GF_SECURITY_ADMIN_USER: "admin" + GF_SECURITY_ADMIN_PASSWORD: "${GRAFANA_ADMIN_PASSWORD:-admin}" + volumes: + - grafana-data:/var/lib/grafana + networks: + - logging + depends_on: + - loki + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:3000/api/health || exit 1"] + interval: 15s + timeout: 5s + retries: 5 + start_period: 20s + deploy: + resources: + limits: + cpus: "0.5" + memory: 512M + reservations: + cpus: "0.25" + memory: 256M + + app-python: + image: tsixphoenix/devops-info-python:latest + ports: + - "8000:5000" + networks: + - logging + labels: + logging: "promtail" + app: "devops-python" + environment: + HOST: "0.0.0.0" + PORT: "5000" + healthcheck: + # python:3.13-slim has no curl; use Python + test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:5000/health')"] + interval: 10s + timeout: 5s + retries: 5 + deploy: + resources: + limits: + cpus: "0.5" + memory: 256M + reservations: + cpus: "0.25" + memory: 128M diff --git a/lab8c/docs/LAB08.md b/lab8c/docs/LAB08.md new file mode 100644 index 0000000000..5fab199c43 --- /dev/null +++ b/lab8c/docs/LAB08.md @@ -0,0 +1,93 @@ +# Lab 8 — Metrics & Monitoring with Prometheus + +## 1. Architecture + +The app exposes metrics at `/metrics`. Prometheus scrapes that endpoint every 15 seconds, stores the time series, and Grafana queries Prometheus to draw dashboards. + +Rough flow: + +- **App** → exposes `/metrics` in Prometheus text format +- **Prometheus** → scrapes app, Loki, Grafana, and itself on the same Docker network (`logging`) +- **Grafana** → uses Prometheus as a data source (`http://prometheus:9090`) and shows RED metrics (rate, errors, duration) plus app health + +So: app and other services are scraped by Prometheus; Grafana only talks to Prometheus (and Loki for logs from Lab 7 if you use that stack too). + +## 2. Application instrumentation + +The Python app lives in `lab3c/app_python`. I added `prometheus-client==0.23.1` to `requirements.txt` and wired up metrics in `app.py`. + +**What’s exposed:** + +- **http_requests_total** (counter) — total requests with labels `method`, `endpoint`, `status`. Used for request rate and error rate. +- **http_request_duration_seconds** (histogram) — request duration with `method` and `endpoint`. Used for latency percentiles (e.g. p95). +- **http_requests_in_progress** (gauge) — how many requests are in flight right now. +- **devops_info_endpoint_calls** (counter) — per-endpoint usage (e.g. `/`, `/health`). +- **devops_info_system_collection_seconds** (histogram) — how long it takes to gather system info on the root endpoint. + +Paths are normalized to `/`, `/health`, `/metrics`, or `other` so we don’t blow up cardinality. A middleware records the start time, bumps the in-progress gauge, runs the handler, then records duration and status and decrements the gauge. + +The `/metrics` route just returns `generate_latest()` with the right content type so Prometheus can scrape it. + +## 3. Prometheus configuration + +Config is in `lab8c/prometheus/prometheus.yml`. + +- Global scrape interval: 15s. +- Four jobs: **prometheus** (self), **app** (`app-python:5000`, path `/metrics`), **loki** (`loki:3100`), **grafana** (`grafana:3000`). + +Retention (15d, 10GB) is set on the command line in `docker-compose.yml`, not in this file. + +## 4. Dashboard + +The custom dashboard is in `lab8c/docs/grafana-app-dashboard.json`. It has seven panels: + +1. **Request rate** — `sum(rate(http_requests_total[5m])) by (endpoint)` (requests per second per endpoint). +2. **Error rate (5xx)** — `sum(rate(http_requests_total{status=~"5.."}[5m]))`. +3. **Request duration p95** — `histogram_quantile(0.95, ...)` over the duration histogram. +4. **Active requests** — `http_requests_in_progress`. +5. **Status code distribution** — `sum by (status) (rate(http_requests_total[5m]))` (pie chart). +6. **Uptime (app)** — `up{job="app"}` (1 = up, 0 = down). +7. **Request duration heatmap** — `rate(http_request_duration_seconds_bucket[5m])`. + +When you import the JSON in Grafana, it will ask for a Prometheus data source; pick the one you added (URL `http://prometheus:9090`). + +## 5. PromQL examples + +- `rate(http_requests_total[5m])` — request rate over the last 5 minutes. +- `sum(rate(http_requests_total[5m])) by (endpoint)` — same, broken down by endpoint. +- `sum(rate(http_requests_total{status=~"5.."}[5m]))` — 5xx error rate (RED: errors). +- `histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))` — 95th percentile latency in seconds (RED: duration). +- `up{job="app"}` — 1 if the app target is up, 0 if down. +- `http_requests_in_progress` — current number of requests being processed. + +## 6. Production-style setup + +In `lab8c/docker-compose.yml`: + +- **Health checks**: Prometheus uses `wget` on `/-/healthy`; the app uses `curl` on `/health`. Loki and Grafana keep their existing checks. +- **Resource limits**: Prometheus 1 CPU / 1G; Loki 1 CPU / 1G; Grafana 0.5 CPU / 512M; app 0.5 CPU / 256M. +- **Retention**: 15 days and 10GB via Prometheus command-line flags. +- **Volumes**: `prometheus-data`, `loki-data`, `grafana-data` so data survives restarts. + +## 7. Testing + +- Run the app locally from `lab3c/app_python`, then hit `http://localhost:8000/metrics` — you should see the usual Prometheus text output. +- Run the stack: `cd lab8c && docker compose up -d`. Open http://localhost:9090/targets and check that all targets (prometheus, app, loki, grafana) are UP. Run a few queries in the Prometheus UI (e.g. `up`, `rate(http_requests_total[5m])` after some traffic). +- In Grafana, add the Prometheus data source and import the dashboard from `lab8c/docs/grafana-app-dashboard.json`. Generate some traffic to the app and confirm the panels show data. + +Screenshots to put in `lab8c/docs/`: + +- `metrics-endpoint.jpg` — browser or terminal output of `/metrics`. +- `prometheus-targets.jpg` — Targets page with all UP. +- `prometheus-query.jpg` — e.g. result of `up` or `rate(http_requests_total[5m])`. +- `grafana-dashboard.jpg` — the custom dashboard with live data. + +## 8. Challenges and fixes + +- **Middleware order**: Metrics need the response status and duration, so the metrics middleware runs the handler first and then records counter/histogram/gauge. The logging middleware is separate and doesn’t affect the numbers. +- **Cardinality**: We only use a few endpoint labels (`/`, `/health`, `/metrics`, `other`) so we don’t get thousands of series from random paths. +- **Docker**: Prometheus config is mounted at `/etc/prometheus/prometheus.yml`. All scrape targets use service names on the `logging` network (`app-python:5000`, `loki:3100`, `grafana:3000`). + +## 9. Metrics vs logs (Lab 7) + +Logs (Loki) answer “what happened” — individual requests, errors, stack traces. Metrics (Prometheus) answer “how much” and “how often” — rates, percentiles, counts. You need both: use metrics for dashboards and alerts, and when something spikes, dig into the logs for context. diff --git a/lab8c/docs/grafana-app-dashboard.json b/lab8c/docs/grafana-app-dashboard.json new file mode 100644 index 0000000000..e9e2cb5c54 --- /dev/null +++ b/lab8c/docs/grafana-app-dashboard.json @@ -0,0 +1,161 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { "unit": "reqps", "min": 0 }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 }, + "id": 1, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "expr": "sum(rate(http_requests_total[5m])) by (endpoint)", + "legendFormat": "{{endpoint}}", + "refId": "A" + } + ], + "title": "Request Rate", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { "unit": "reqps", "min": 0, "color": { "mode": "palette-classic" } }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 }, + "id": 2, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "expr": "sum(rate(http_requests_total{status=~\"5..\"}[5m]))", + "legendFormat": "5xx errors/s", + "refId": "A" + } + ], + "title": "Error Rate (5xx)", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { "unit": "s", "min": 0 }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 }, + "id": 3, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (le, endpoint))", + "legendFormat": "p95 {{endpoint}}", + "refId": "A" + } + ], + "title": "Request Duration p95", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { "unit": "short", "min": 0 }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 }, + "id": 4, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "expr": "http_requests_in_progress", + "legendFormat": "in progress", + "refId": "A" + } + ], + "title": "Active Requests", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { "unit": "short", "min": 0 }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 }, + "id": 5, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "expr": "sum by (status) (rate(http_requests_total[5m]))", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Status Code Distribution", + "type": "piechart" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { "unit": "short", "min": 0, "max": 1 }, + "overrides": [ + { "matcher": { "id": "byValue", "options": "0" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] }, + { "matcher": { "id": "byValue", "options": "1" }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] } + ] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 }, + "id": 6, + "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto" }, + "targets": [ + { + "expr": "up{job=\"app\"}", + "legendFormat": "app", + "refId": "A" + } + ], + "title": "Uptime (app)", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { "unit": "s", "min": 0 }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 24 }, + "id": 7, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "expr": "rate(http_request_duration_seconds_bucket[5m])", + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Request Duration Heatmap", + "type": "heatmap" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "style": "dark", + "tags": ["devops", "app-metrics"], + "templating": { "list": [] }, + "time": { "from": "now-1h", "to": "now" }, + "timepicker": {}, + "timezone": "browser", + "title": "DevOps Info Service - Metrics", + "uid": "devops-app-metrics", + "version": 1, + "weekStart": "" +} diff --git a/lab8c/docs/grafana-dashboard.jpg b/lab8c/docs/grafana-dashboard.jpg new file mode 100644 index 0000000000..b30001e68e Binary files /dev/null and b/lab8c/docs/grafana-dashboard.jpg differ diff --git a/lab8c/docs/metrics-endpoint.jpg b/lab8c/docs/metrics-endpoint.jpg new file mode 100644 index 0000000000..0a5421dd13 Binary files /dev/null and b/lab8c/docs/metrics-endpoint.jpg differ diff --git a/lab8c/docs/prometheus-query.jpg b/lab8c/docs/prometheus-query.jpg new file mode 100644 index 0000000000..226cc594be Binary files /dev/null and b/lab8c/docs/prometheus-query.jpg differ diff --git a/lab8c/docs/prometheus-targets.jpg b/lab8c/docs/prometheus-targets.jpg new file mode 100644 index 0000000000..019cdc5bd1 Binary files /dev/null and b/lab8c/docs/prometheus-targets.jpg differ diff --git a/lab8c/loki/config.yml b/lab8c/loki/config.yml new file mode 100644 index 0000000000..a2d65e6e87 --- /dev/null +++ b/lab8c/loki/config.yml @@ -0,0 +1,42 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + +common: + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + instance_addr: 127.0.0.1 + kvstore: + store: inmemory + +schema_config: + configs: + - from: 2024-01-01 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +storage_config: + tsdb_shipper: + active_index_directory: /loki/index + cache_location: /loki/cache + cache_ttl: 24h + filesystem: + directory: /loki/chunks + +limits_config: + retention_period: 168h + +compactor: + working_directory: /loki/compactor + retention_enabled: true + delete_request_store: filesystem diff --git a/lab8c/prometheus/prometheus.yml b/lab8c/prometheus/prometheus.yml new file mode 100644 index 0000000000..80f4e88f91 --- /dev/null +++ b/lab8c/prometheus/prometheus.yml @@ -0,0 +1,23 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: "prometheus" + static_configs: + - targets: ["localhost:9090"] + + - job_name: "app" + static_configs: + - targets: ["app-python:5000"] + metrics_path: "/metrics" + + - job_name: "loki" + static_configs: + - targets: ["loki:3100"] + metrics_path: "/metrics" + + - job_name: "grafana" + static_configs: + - targets: ["grafana:3000"] + metrics_path: "/metrics" diff --git a/lab8c/promtail/config.yml b/lab8c/promtail/config.yml new file mode 100644 index 0000000000..395ff7b797 --- /dev/null +++ b/lab8c/promtail/config.yml @@ -0,0 +1,26 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: docker + docker_sd_configs: + - host: unix:///var/run/docker.sock + refresh_interval: 5s + + relabel_configs: + - source_labels: [__meta_docker_container_name] + target_label: container + regex: "/(.*)" + replacement: "$1" + + - source_labels: [__meta_docker_container_label_app] + target_label: app + - source_labels: [__meta_docker_container_label_logging] + target_label: logging diff --git a/lab9c/k8s/README.md b/lab9c/k8s/README.md new file mode 100644 index 0000000000..723ba24ece --- /dev/null +++ b/lab9c/k8s/README.md @@ -0,0 +1,168 @@ +# Lab 9 — Kubernetes Fundamentals + +This lab is complete for all **required** tasks (bonus not included). + +## 1) Architecture Overview + +I used **kind** (Kubernetes in Docker) because it is quick to run locally on Windows and good for repeatable tests. + +- Deployment: `devops-info-python` (3 replicas) +- Service: `devops-info-python-service` (`NodePort`, `80 -> 5000`, nodePort `30080`) +- Update strategy: RollingUpdate (`maxSurge: 1`, `maxUnavailable: 0`) +- Resources per pod: + - requests: `100m` CPU, `128Mi` memory + - limits: `300m` CPU, `256Mi` memory + +Traffic path used for local verification: +`kubectl port-forward` -> Service -> Pods. + +## 2) Manifest Files + +### `deployment.yml` + +Contains: + +- 3 replicas +- labels/selectors +- image `tsixphoenix/devops-info-python:lab9` +- readiness + liveness probes on `/health` +- resource requests/limits +- non-root security context +- rolling update strategy + +### `service.yml` + +Contains: + +- `type: NodePort` +- selector `app: devops-info-python` +- `port: 80`, `targetPort: 5000`, `nodePort: 30080` + +## 3) Deployment Evidence + +### Cluster setup + +```bash +kubectl cluster-info --context kind-lab9 +kubectl get nodes -o wide +``` + +```text +Kubernetes control plane is running at https://127.0.0.1:... +lab9-control-plane Ready control-plane v1.32.2 +``` + +### Deployed resources + +```bash +kubectl get all +kubectl get pods,svc -o wide +kubectl describe deployment devops-info-python +``` + +Observed: + +- deployment `devops-info-python` is `3/3 READY` +- service `devops-info-python-service` is `NodePort 80:30080/TCP` +- probes and rolling strategy are visible in `describe` + +### App is reachable + +```bash +kubectl port-forward service/devops-info-python-service 8080:80 +curl http://127.0.0.1:8080/health +curl http://127.0.0.1:8080/ +``` + +Example health response: + +```text +{"status":"healthy","timestamp":"...","uptime_seconds":...} +``` + +## 4) Operations Performed + +### Deploy + +```bash +kubectl apply -f lab9c/k8s/deployment.yml -f lab9c/k8s/service.yml +kubectl rollout status deployment/devops-info-python +``` + +### Scale to 5 replicas + +```bash +kubectl scale deployment/devops-info-python --replicas=5 +kubectl rollout status deployment/devops-info-python +kubectl get deployment/devops-info-python +``` + +Result: `READY 5/5, AVAILABLE 5`. + +### Rolling update + +Updated `RELEASE_ID` in deployment and applied again: + +```bash +kubectl apply -f lab9c/k8s/deployment.yml +kubectl rollout status deployment/devops-info-python +kubectl rollout history deployment/devops-info-python +``` + +Result: rollout completed successfully, revision history updated. + +### Zero-downtime check during update + +I called `/health` repeatedly during rollout. All responses were HTTP 200. + +### Rollback + +```bash +kubectl rollout undo deployment/devops-info-python +kubectl rollout status deployment/devops-info-python +kubectl rollout history deployment/devops-info-python +``` + +Result: rollback completed and previous revision was restored. + +### Service verification + +```bash +kubectl describe service devops-info-python-service +kubectl get endpoints devops-info-python-service +``` + +Result: service endpoints matched running pod IPs on port 5000. + +## 5) Production Considerations + +- Readiness probe keeps not-ready pods out of traffic. +- Liveness probe restarts broken pods. +- Requests/limits prevent noisy-neighbor issues and help scheduling. +- For real production, I would add: + - namespace isolation + network policies + - HPA + - ConfigMaps/Secrets + - PodDisruptionBudget + - Ingress with TLS +- Observability plan: + - metrics in Prometheus + - logs in Loki/Grafana + - alerts for 5xx rate, restarts, and pod availability + +## 6) Challenges & Solutions + +### No local cluster available initially + +- `kubectl` existed, but no running cluster. +- Fixed by creating a local `kind` cluster (`kind-lab9`). + +### First app rollout failed (CrashLoopBackOff) + +- Cause: old image/tag mismatch. +- Fix: built fresh image `tsixphoenix/devops-info-python:lab9`, loaded it into kind, and used that tag in deployment. + +### NodePort access from host in kind setup + +- Direct node IP access was unreliable in this environment. +- Used `kubectl port-forward` for stable local verification. diff --git a/lab9c/k8s/deployment.yml b/lab9c/k8s/deployment.yml new file mode 100644 index 0000000000..48e23cab7a --- /dev/null +++ b/lab9c/k8s/deployment.yml @@ -0,0 +1,67 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: devops-info-python + labels: + app: devops-info-python + app.kubernetes.io/name: devops-info-python + app.kubernetes.io/component: backend +spec: + replicas: 3 + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + selector: + matchLabels: + app: devops-info-python + template: + metadata: + labels: + app: devops-info-python + app.kubernetes.io/name: devops-info-python + app.kubernetes.io/component: backend + spec: + containers: + - name: app + image: tsixphoenix/devops-info-python:lab9 + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 5000 + env: + - name: HOST + value: "0.0.0.0" + - name: PORT + value: "5000" + - name: RELEASE_ID + value: "v2" + resources: + requests: + cpu: "100m" + memory: "128Mi" + limits: + cpu: "300m" + memory: "256Mi" + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 2 + failureThreshold: 3 + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 15 + periodSeconds: 10 + timeoutSeconds: 2 + failureThreshold: 3 + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 10001 diff --git a/lab9c/k8s/service.yml b/lab9c/k8s/service.yml new file mode 100644 index 0000000000..9a264cd259 --- /dev/null +++ b/lab9c/k8s/service.yml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + name: devops-info-python-service + labels: + app: devops-info-python + app.kubernetes.io/name: devops-info-python +spec: + type: NodePort + selector: + app: devops-info-python + ports: + - name: http + protocol: TCP + port: 80 + targetPort: 5000 + nodePort: 30080