linux-kdevops
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎PROMPTS.md‎
Lines changed: 31 additions & 0 deletions b/‎PROMPTS.md‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 24 additions & 2 deletions b/‎README.md‎
Lines changed: 24 additions & 2 deletions
diff --git a/‎defconfigs/vllm‎
Lines changed: 40 additions & 0 deletions b/‎defconfigs/vllm‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎defconfigs/vllm-production-stack-cpu‎
Lines changed: 45 additions & 0 deletions b/‎defconfigs/vllm-production-stack-cpu‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎defconfigs/vllm-quick-test‎
Lines changed: 42 additions & 0 deletions b/‎defconfigs/vllm-quick-test‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎kconfigs/Kconfig.libvirt‎
Lines changed: 3 additions & 0 deletions b/‎kconfigs/Kconfig.libvirt‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎kconfigs/workflows/Kconfig‎
Lines changed: 28 additions & 0 deletions b/‎kconfigs/workflows/Kconfig‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎playbooks/roles/gen_hosts/defaults/main.yml‎
Lines changed: 1 addition & 0 deletions b/‎playbooks/roles/gen_hosts/defaults/main.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎playbooks/roles/gen_hosts/tasks/main.yml‎
Lines changed: 15 additions & 0 deletions b/‎playbooks/roles/gen_hosts/tasks/main.yml‎
Lines changed: 15 additions & 0 deletions
@@ -91,6 +91,7 @@ playbooks/roles/linux-mirror/linux-mirror-systemd/mirrors.yaml
 workflows/selftests/results/
 
 workflows/minio/results/
+workflows/vllm/results/
 
 workflows/linux/refs/default/Kconfig.linus
 workflows/linux/refs/default/Kconfig.next
 
@@ -5,6 +5,37 @@ and example commits and their outcomes, and notes by users of the AI agent
 grading. It is also instructive for humans to learn how to use generative
 AI to easily extend kdevops for their own needs.
 
+## Adding new AI/ML workflows
+
+### Adding vLLM Production Stack workflow
+
+**Prompt:**
+I have placed in ../production-stack/ the https://github.com/vllm-project/production-stack.git
+project. Familiarize yourself with it and then add support for as a new
+I workflow, other than Milvus AI on kdevops.
+
+**AI:** Claude Code
+**Commit:** TBD
+**Result:** Tough
+**Grading:** 50%
+
+**Notes:**
+
+Adding just vllm was fairly trivial. However the production stack project
+lacked any clear documentation about what docker container image could be
+used for CPU support, and all docker container images had one or another
+obscure issue.
+
+So while getting the vllm and the production stack generally supported was
+faily trivial, the lack of proper docs make it hard to figure out exactly what
+to do.
+
+Fortunately the implementation correctly identified the need for Kubernetes
+orchestration, included support for various deployment options (Minikube vs
+existing clusters), and integrated monitoring with Prometheus/Grafana. The
+workflow supports A/B testing, multiple routing algorithms, and performance
+benchmarking capabilities.
+
 ## Extending existing Linux kernel selftests
 
 Below are a set of example prompts / result commits of extending existing
 
@@ -285,10 +285,30 @@ For detailed documentation and demo results, see the
 
 ### AI workflow
 
-kdevops now supports AI/ML system benchmarking, starting with vector databases
-like Milvus. Similar to fstests, you can quickly set up and benchmark AI
+kdevops now supports AI/ML system benchmarking, including vector databases
+and LLM serving infrastructure. Similar to fstests, you can quickly set up and benchmark AI
 infrastructure with just a few commands:
 
+#### vLLM Production Stack
+Deploy and benchmark large language models using the vLLM Production Stack:
+
+```bash
+make defconfig-vllm
+make bringup
+make vllm
+make vllm-benchmark
+```
+
+The vLLM workflow provides:
+- **Production LLM Deployment**: Kubernetes-based vLLM serving with Helm
+- **Request Routing**: Multiple algorithms (round-robin, session affinity, prefix-aware)
+- **Observability**: Integrated Prometheus and Grafana monitoring
+- **Performance Features**: Prefix caching, chunked prefill, KV cache offloading
+- **A/B Testing**: Compare different model configurations
+
+#### Milvus Vector Database
+Benchmark vector database performance for AI applications:
+
 ```bash
 make defconfig-ai-milvus-docker
 make bringup
@@ -303,6 +323,7 @@ The AI workflow supports:
 - **Demo Results**: View actual benchmark HTML reports and performance visualizations
 
 For details and demo results, see:
+- [kdevops vLLM workflow documentation](workflows/vllm/)
 - [kdevops AI workflow documentation](docs/ai/README.md)
 - [Milvus performance demo results](docs/ai/vector-databases/milvus.md#demo-results)
 
@@ -358,6 +379,7 @@ want to just use the kernel that comes with your Linux distribution.
   * [kdevops selftests docs](docs/selftests.md)
   * [kdevops reboot-limit docs](docs/reboot-limit.md)
   * [kdevops AI workflow docs](docs/ai/README.md)
+  * [kdevops vLLM workflow docs](workflows/vllm/)
 
 # kdevops general documentation
 
 
@@ -0,0 +1,40 @@
+# vLLM configuration with Latest Docker deployment
+CONFIG_KDEVOPS_FIRST_RUN=n
+CONFIG_LIBVIRT=y
+CONFIG_LIBVIRT_VCPUS=8
+CONFIG_LIBVIRT_MEM_32G=y
+
+# Workflow configuration
+CONFIG_WORKFLOWS=y
+CONFIG_WORKFLOWS_TESTS=y
+CONFIG_WORKFLOWS_LINUX_TESTS=y
+CONFIG_WORKFLOWS_DEDICATED_WORKFLOW=y
+CONFIG_KDEVOPS_WORKFLOW_DEDICATE_VLLM=y
+
+# vLLM specific configuration
+CONFIG_VLLM_LATEST_DOCKER=y
+CONFIG_VLLM_K8S_MINIKUBE=y
+CONFIG_VLLM_HELM_RELEASE_NAME="vllm"
+CONFIG_VLLM_HELM_NAMESPACE="vllm-system"
+CONFIG_VLLM_MODEL_URL="facebook/opt-125m"
+CONFIG_VLLM_MODEL_NAME="opt-125m"
+CONFIG_VLLM_REPLICA_COUNT=1
+CONFIG_VLLM_USE_CPU_INFERENCE=y
+CONFIG_VLLM_REQUEST_CPU=8
+CONFIG_VLLM_REQUEST_MEMORY="32Gi"
+CONFIG_VLLM_REQUEST_GPU=0
+CONFIG_VLLM_MAX_MODEL_LEN=2048
+CONFIG_VLLM_DTYPE="float32"
+CONFIG_VLLM_TENSOR_PARALLEL_SIZE=1
+CONFIG_VLLM_ROUTER_ENABLED=y
+CONFIG_VLLM_ROUTER_ROUND_ROBIN=y
+CONFIG_VLLM_OBSERVABILITY_ENABLED=y
+CONFIG_VLLM_GRAFANA_PORT=3000
+CONFIG_VLLM_PROMETHEUS_PORT=9090
+CONFIG_VLLM_API_PORT=8000
+CONFIG_VLLM_API_KEY=""
+CONFIG_VLLM_HF_TOKEN=""
+CONFIG_VLLM_BENCHMARK_ENABLED=y
+CONFIG_VLLM_BENCHMARK_DURATION=60
+CONFIG_VLLM_BENCHMARK_CONCURRENT_USERS=10
+CONFIG_VLLM_BENCHMARK_RESULTS_DIR="/data/vllm-benchmark"
@@ -0,0 +1,45 @@
+# vLLM Production Stack configuration with official Helm chart
+CONFIG_KDEVOPS_FIRST_RUN=n
+CONFIG_LIBVIRT=y
+CONFIG_LIBVIRT_VCPUS=64
+CONFIG_LIBVIRT_MEM_64G=y
+
+# Workflow configuration
+CONFIG_WORKFLOWS=y
+CONFIG_WORKFLOWS_TESTS=y
+CONFIG_WORKFLOWS_LINUX_TESTS=y
+CONFIG_WORKFLOWS_DEDICATED_WORKFLOW=y
+CONFIG_KDEVOPS_WORKFLOW_DEDICATE_VLLM=y
+
+# vLLM Production Stack specific configuration
+CONFIG_VLLM_PRODUCTION_STACK=y
+CONFIG_VLLM_K8S_MINIKUBE=y
+CONFIG_VLLM_VERSION_LATEST=y
+CONFIG_VLLM_HELM_RELEASE_NAME="vllm-prod"
+CONFIG_VLLM_HELM_NAMESPACE="vllm-system"
+CONFIG_VLLM_PROD_STACK_REPO="https://vllm-project.github.io/production-stack"
+CONFIG_VLLM_PROD_STACK_CHART_VERSION="latest"
+CONFIG_VLLM_PROD_STACK_ROUTER_IMAGE="ghcr.io/vllm-project/production-stack/router"
+CONFIG_VLLM_PROD_STACK_ROUTER_TAG="latest"
+CONFIG_VLLM_PROD_STACK_ENABLE_MONITORING=y
+CONFIG_VLLM_PROD_STACK_ENABLE_AUTOSCALING=n
+CONFIG_VLLM_MODEL_URL="facebook/opt-125m"
+CONFIG_VLLM_MODEL_NAME="opt-125m"
+CONFIG_VLLM_REPLICA_COUNT=2
+CONFIG_VLLM_USE_CPU_INFERENCE=y
+CONFIG_VLLM_REQUEST_CPU=8
+CONFIG_VLLM_REQUEST_MEMORY="20Gi"
+CONFIG_VLLM_REQUEST_GPU=0
+CONFIG_VLLM_MAX_MODEL_LEN=2048
+CONFIG_VLLM_DTYPE="float32"
+CONFIG_VLLM_TENSOR_PARALLEL_SIZE=1
+CONFIG_VLLM_ROUTER_ENABLED=y
+CONFIG_VLLM_ROUTER_ROUND_ROBIN=y
+CONFIG_VLLM_OBSERVABILITY_ENABLED=y
+CONFIG_VLLM_GRAFANA_PORT=3000
+CONFIG_VLLM_PROMETHEUS_PORT=9090
+CONFIG_VLLM_API_PORT=8000
+CONFIG_VLLM_BENCHMARK_ENABLED=y
+CONFIG_VLLM_BENCHMARK_DURATION=60
+CONFIG_VLLM_BENCHMARK_CONCURRENT_USERS=10
+CONFIG_VLLM_BENCHMARK_RESULTS_DIR="/data/vllm-benchmark"
@@ -0,0 +1,42 @@
+# vLLM Production Stack quick test configuration (CI/demo)
+CONFIG_KDEVOPS_FIRST_RUN=n
+CONFIG_LIBVIRT=y
+CONFIG_LIBVIRT_VCPUS=4
+CONFIG_LIBVIRT_MEM_16G=y
+
+# Workflow configuration
+CONFIG_WORKFLOWS=y
+CONFIG_WORKFLOWS_TESTS=y
+CONFIG_WORKFLOWS_LINUX_TESTS=y
+CONFIG_WORKFLOWS_DEDICATED_WORKFLOW=y
+CONFIG_KDEVOPS_WORKFLOW_DEDICATE_VLLM=y
+
+# vLLM specific configuration - Quick test mode
+CONFIG_VLLM_PRODUCTION_STACK=y
+CONFIG_VLLM_K8S_MINIKUBE=y
+CONFIG_VLLM_HELM_RELEASE_NAME="vllm"
+CONFIG_VLLM_HELM_NAMESPACE="vllm-system"
+CONFIG_VLLM_MODEL_URL="facebook/opt-125m"
+CONFIG_VLLM_MODEL_NAME="opt-125m"
+CONFIG_VLLM_REPLICA_COUNT=1
+CONFIG_VLLM_REQUEST_CPU=2
+CONFIG_VLLM_REQUEST_MEMORY="8Gi"
+CONFIG_VLLM_REQUEST_GPU=0
+CONFIG_VLLM_GPU_TYPE=""
+CONFIG_VLLM_MAX_MODEL_LEN=512
+CONFIG_VLLM_DTYPE="auto"
+CONFIG_VLLM_GPU_MEMORY_UTILIZATION="0.9"
+CONFIG_VLLM_TENSOR_PARALLEL_SIZE=1
+CONFIG_VLLM_ROUTER_ENABLED=y
+CONFIG_VLLM_ROUTER_ROUND_ROBIN=y
+CONFIG_VLLM_OBSERVABILITY_ENABLED=y
+CONFIG_VLLM_GRAFANA_PORT=3000
+CONFIG_VLLM_PROMETHEUS_PORT=9090
+CONFIG_VLLM_API_PORT=8000
+CONFIG_VLLM_API_KEY=""
+CONFIG_VLLM_HF_TOKEN=""
+CONFIG_VLLM_QUICK_TEST=y
+CONFIG_VLLM_BENCHMARK_ENABLED=y
+CONFIG_VLLM_BENCHMARK_DURATION=30
+CONFIG_VLLM_BENCHMARK_CONCURRENT_USERS=5
+CONFIG_VLLM_BENCHMARK_RESULTS_DIR="/data/vllm-benchmark"
@@ -335,6 +335,7 @@ config LIBVIRT_LARGE_CPU
 
 choice
 	prompt "Guest vCPUs"
+	default LIBVIRT_VCPUS_64 if KDEVOPS_WORKFLOW_DEDICATE_VLLM
 	default LIBVIRT_VCPUS_8
 
 config LIBVIRT_VCPUS_2
@@ -408,6 +409,7 @@ config LIBVIRT_VCPUS_COUNT
 
 choice
 	prompt "How much GiB memory to use per guest"
+	default LIBVIRT_MEM_64G if KDEVOPS_WORKFLOW_DEDICATE_VLLM
 	default LIBVIRT_MEM_4G
 
 config LIBVIRT_MEM_2G
@@ -478,6 +480,7 @@ config LIBVIRT_MEM_MB
 config LIBVIRT_IMAGE_SIZE
 	string "VM image size"
 	output yaml
+	default "100G" if KDEVOPS_WORKFLOW_DEDICATE_VLLM
 	default "20G"
 	depends on GUESTFS
 	help
 
@@ -233,6 +233,14 @@ config KDEVOPS_WORKFLOW_DEDICATE_AI
 	  This will dedicate your configuration to running only the
 	  AI workflow for vector database performance testing.
 
+config KDEVOPS_WORKFLOW_DEDICATE_VLLM
+	bool "vllm"
+	select KDEVOPS_WORKFLOW_ENABLE_VLLM
+	help
+	  This will dedicate your configuration to running only the
+	  vLLM Production Stack workflow for deploying and benchmarking
+	  large language models with Kubernetes.
+
 config KDEVOPS_WORKFLOW_DEDICATE_MINIO
 	bool "minio"
 	select KDEVOPS_WORKFLOW_ENABLE_MINIO
@@ -265,6 +273,7 @@ config KDEVOPS_WORKFLOW_NAME
 	default "mmtests" if KDEVOPS_WORKFLOW_DEDICATE_MMTESTS
 	default "fio-tests" if KDEVOPS_WORKFLOW_DEDICATE_FIO_TESTS
 	default "ai" if KDEVOPS_WORKFLOW_DEDICATE_AI
+	default "vllm" if KDEVOPS_WORKFLOW_DEDICATE_VLLM
 	default "minio" if KDEVOPS_WORKFLOW_DEDICATE_MINIO
 	default "build-linux" if KDEVOPS_WORKFLOW_DEDICATE_BUILD_LINUX
 
@@ -395,6 +404,14 @@ config KDEVOPS_WORKFLOW_NOT_DEDICATED_ENABLE_AI
 	  Select this option if you want to provision AI benchmarks on a
 	  single target node for by-hand testing.
 
+config KDEVOPS_WORKFLOW_NOT_DEDICATED_ENABLE_VLLM
+	bool "vllm"
+	select KDEVOPS_WORKFLOW_ENABLE_VLLM
+	depends on LIBVIRT || TERRAFORM_PRIVATE_NET
+	help
+	  Select this option if you want to provision vLLM Production Stack
+	  on a single target node for by-hand testing and development.
+
 endif # !WORKFLOWS_DEDICATED_WORKFLOW
 
 config KDEVOPS_WORKFLOW_ENABLE_FSTESTS
@@ -530,6 +547,17 @@ source "workflows/ai/Kconfig"
 endmenu
 endif # KDEVOPS_WORKFLOW_ENABLE_AI
 
+config KDEVOPS_WORKFLOW_ENABLE_VLLM
+	bool
+	output yaml
+	default y if KDEVOPS_WORKFLOW_NOT_DEDICATED_ENABLE_VLLM || KDEVOPS_WORKFLOW_DEDICATE_VLLM
+
+if KDEVOPS_WORKFLOW_ENABLE_VLLM
+menu "Configure and run vLLM Production Stack"
+source "workflows/vllm/Kconfig"
+endmenu
+endif # KDEVOPS_WORKFLOW_ENABLE_VLLM
+
 config KDEVOPS_WORKFLOW_ENABLE_MINIO
 	bool
 	output yaml
 
@@ -30,6 +30,7 @@ kdevops_workflow_enable_sysbench: false
 kdevops_workflow_enable_fio_tests: false
 kdevops_workflow_enable_mmtests: false
 kdevops_workflow_enable_ai: false
+kdevops_workflow_enable_vllm: false
 workflows_reboot_limit: false
 kdevops_use_declared_hosts: false
 
 
@@ -270,6 +270,21 @@
     - ansible_hosts_template.stat.exists
     - not kdevops_use_declared_hosts|default(false)|bool
 
+- name: Generate the Ansible hosts file for a dedicated vLLM setup
+  tags: ['hosts']
+  ansible.builtin.template:
+    src: "{{ kdevops_hosts_template }}"
+    dest: "{{ ansible_cfg_inventory }}"
+    force: true
+    trim_blocks: True
+    lstrip_blocks: True
+    mode: '0644'
+  when:
+    - kdevops_workflows_dedicated_workflow
+    - kdevops_workflow_enable_vllm|default(false)|bool
+    - ansible_hosts_template.stat.exists
+    - not kdevops_use_declared_hosts|default(false)|bool
+
 - name: Verify if final host file exists
   ansible.builtin.stat:
     path: "{{ ansible_cfg_inventory }}"