kubeflow · SanthoshToorpu · May 26, 2026 · May 28, 2026 · May 28, 2026 · May 28, 2026
diff --git a/.actions/.gitkeep b/.actions/.gitkeep
diff --git a/.github/workflows/build-pipeline-base.yaml b/.github/workflows/build-pipeline-base.yaml
@@ -0,0 +1,29 @@
+name: Build Pipeline Base Image
+
+on:
+  push:
+    paths:
+      - 'docs-agent-mcp/pipelines/Dockerfile.pipeline'
+      - '.github/workflows/build-pipeline-base.yaml'
+  workflow_dispatch: # Allows manual trigger
+
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v5
+        with:
+          context: docs-agent-mcp/pipelines
+          file: docs-agent-mcp/pipelines/Dockerfile.pipeline
+          push: true
+          tags: ${{ secrets.DOCKERHUB_USERNAME }}/kubeflow-pipeline-base:latest
diff --git a/.github/workflows/deploy-agent.yaml b/.github/workflows/deploy-agent.yaml
@@ -0,0 +1,33 @@
+name: Deploy Agent and RemoteMCPServer
+
+on:
+  push:
+    paths:
+      - 'docs-agent-mcp/manifests/kagent/**'
+      - '.github/workflows/deploy-agent.yaml'
+  workflow_dispatch:
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Configure OCI CLI
+        uses: oracle-actions/configure-oci-cli@v1.1.1
+        with:
+          tenancy: ${{ secrets.OCI_CLI_TENANCY }}
+          user: ${{ secrets.OCI_CLI_USER }}
+          fingerprint: ${{ secrets.OCI_CLI_FINGERPRINT }}
+          key_file_content: ${{ secrets.OCI_CLI_KEY_FILE }}
+          region: ${{ secrets.OCI_CLI_REGION }}
+
+      - name: Setup kubeconfig
+        run: |
+          oci ce cluster create-kubeconfig --cluster-id ${{ secrets.OKE_CLUSTER_ID }} --file ~/.kube/config --region ${{ secrets.OCI_CLI_REGION }} --token-version 2.0.0
+          chmod 600 ~/.kube/config
+
+      - name: Deploy to OKE
+        run: |
+          kubectl apply -f docs-agent-mcp/manifests/kagent/setup.yaml
diff --git a/.github/workflows/deploy-mcp.yaml b/.github/workflows/deploy-mcp.yaml
@@ -0,0 +1,51 @@
+name: Deploy MCP Server
+
+on:
+  push:
+    paths:
+      - 'docs-agent-mcp/mcp-server/**'
+      - 'docs-agent-mcp/manifests/mcp-server/**'
+      - '.github/workflows/deploy-mcp.yaml'
+  workflow_dispatch:
+
+jobs:
+  build-push-deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v5
+        with:
+          context: docs-agent-mcp/mcp-server
+          push: true
+          tags: ${{ secrets.DOCKERHUB_USERNAME }}/mcp-kubeflow-docs:latest
+
+      - name: Configure OCI CLI
+        uses: oracle-actions/configure-oci-cli@v1.1.1
+        with:
+          tenancy: ${{ secrets.OCI_CLI_TENANCY }}
+          user: ${{ secrets.OCI_CLI_USER }}
+          fingerprint: ${{ secrets.OCI_CLI_FINGERPRINT }}
+          key_file_content: ${{ secrets.OCI_CLI_KEY_FILE }}
+          region: ${{ secrets.OCI_CLI_REGION }}
+
+      - name: Setup kubeconfig
+        run: |
+          oci ce cluster create-kubeconfig --cluster-id ${{ secrets.OKE_CLUSTER_ID }} --file ~/.kube/config --region ${{ secrets.OCI_CLI_REGION }} --token-version 2.0.0
+          chmod 600 ~/.kube/config
+
+      - name: Deploy to OKE
+        env:
+          DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
+        run: |
+          # Replace placeholder with actual registry before applying
+          sed -i "s|<YOUR_DOCKERHUB_USERNAME>|${DOCKERHUB_USERNAME}|g" docs-agent-mcp/manifests/mcp-server/mcp-server.yaml
+          kubectl apply -f docs-agent-mcp/manifests/mcp-server/mcp-server.yaml
diff --git a/.github/workflows/run-pipeline.yaml b/.github/workflows/run-pipeline.yaml
@@ -0,0 +1,60 @@
+name: Compile and Run KFP Pipeline
+
+on:
+  workflow_dispatch: # Allows manual trigger from GitHub UI
+  schedule:
+    - cron: '0 0 * * 0' # Run automatically weekly on Sunday at midnight
+
+jobs:
+  run-pipeline:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install KFP SDK
+        run: pip install kfp>=2.0.0
+
+      - name: Configure OCI CLI
+        uses: oracle-actions/configure-oci-cli@v1.1.1
+        with:
+          tenancy: ${{ secrets.OCI_CLI_TENANCY }}
+          user: ${{ secrets.OCI_CLI_USER }}
+          fingerprint: ${{ secrets.OCI_CLI_FINGERPRINT }}
+          key_file_content: ${{ secrets.OCI_CLI_KEY_FILE }}
+          region: ${{ secrets.OCI_CLI_REGION }}
+
+      - name: Setup kubeconfig
+        run: |
+          oci ce cluster create-kubeconfig --cluster-id ${{ secrets.OKE_CLUSTER_ID }} --file ~/.kube/config --region ${{ secrets.OCI_CLI_REGION }} --token-version 2.0.0
+          chmod 600 ~/.kube/config
+
+      - name: Compile and Submit Pipeline
+        env:
+          DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
+        run: |
+          cd docs-agent-mcp/pipelines
+
+          # Replace placeholder with actual Docker Hub username for the base image
+          sed -i "s|<YOUR_DOCKERHUB_USERNAME>|${DOCKERHUB_USERNAME}|g" kubeflow-pipeline.py
+
+          # Start port forward to KFP API in the background
+          kubectl port-forward svc/ml-pipeline 8888:8888 -n kubeflow &
+          PF_PID=$!
+
+          # Wait for port forward to be ready
+          sleep 5
+
+          # Compile the pipeline (generates yaml)
+          python kubeflow-pipeline.py
+
+          # Submit the pipeline run via KFP SDK
+          python submit_run.py
+
+          # Cleanup port forward
+          kill $PF_PID
diff --git a/.gitignore b/.gitignore
@@ -216,5 +216,5 @@ feast_repo/data/
 feast_repo/registry.db
 
 # Compiled pipeline YAML
-pipelines/*.yaml
+**/pipelines/*.yaml
 !pipelines/README.md
diff --git a/README.md b/README.md
@@ -1,5 +1,7 @@
 # Kubeflow Documentation AI Assistant
 
+**Author**: Santhosh Toorpu
+
 [![KEP-867](https://img.shields.io/badge/KEP-867-Documentation%20AI%20Assistant-blue)](https://github.com/kubeflow/community/issues/867)
 
 The official LLM implementation of the Kubeflow Documentation Assistant powered by Retrieval-Augmented Generation (RAG). This repository provides a comprehensive solution for Kubeflow users to search across documentation and get accurate, contextual answers to their queries.
@@ -707,4 +709,32 @@ This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENS
 - [Kubeflow Community](https://github.com/kubeflow/community) for the KEP-867 proposal
 - [Milvus](https://milvus.io/) for the vector database
 - [KServe](https://kserve.github.io/website/) for model serving
-- [vLLM](https://github.com/vllm-project/vllm) for high-performance LLM inference
+- [vLLM](https://github.com/vllm-project/vllm) for high-performance LLM inference
+
+---
+
+## Modern Infrastructure & CI/CD (Kagent & MCP)
+
+The project has evolved to utilize the Model Context Protocol (MCP) and **kagent** to route queries intelligently. The infrastructure is heavily automated using Terraform and GitHub Actions.
+
+### Terraform (`docs-agent-mcp/terraform/`)
+We use Terraform for declarative, reproducible cluster infrastructure on OKE.
+*   **`variables.tf`**: Single source of truth for component versions (Knative, Istio, KServe, etc.) and namespace names.
+*   **`namespaces.tf`**: Manages the `ml-infra` and `docs-agent` namespaces.
+*   **`knative.tf`**: Installs `cert-manager`, Knative Serving (Core & CRDs), Istio base/istiod, and KServe, applying crucial ConfigMap patches for scheduling.
+*   **`kubeflow_pipelines.tf`**: Deploys Kubeflow Pipelines (Standalone) into the `kubeflow` namespace without Istio sidecars (to prevent routing conflicts), persisting `fsGroup` patches for SeaweedFS.
+*   **`milvus.tf`**: Uses the Milvus Operator to deploy a lightweight Milvus Standalone instance strictly scheduled on CPU nodes, reserving GPU nodes purely for LLM inference.
+*   **`kagent.tf`**: Installs official `kagent-crds` and the `kagent` controller via OCI Helm charts, with bundled agents cleanly disabled.
+*   **`istio_policies.tf`**: Implements zero-trust networking, explicitly allowing internal cluster traffic where needed (e.g., KFP Pipeline to Milvus, Kagent to Milvus).
+
+### Pipeline Optimizations (`docs-agent-mcp/pipelines/`)
+The ingestion pipeline was rewritten to maximize efficiency and avoid Kubernetes ephemeral storage eviction:
+*   **Feast Removal**: The pipeline now writes embeddings directly to Milvus using `pymilvus`, dramatically lowering complexity.
+*   **Custom Base Image (`Dockerfile.pipeline`)**: We bake the massive PyTorch library and the Hugging Face `all-mpnet-base-v2` model directly into a custom Docker image. This reduces runtime disk usage from 5.5GB to zero, fixing OKE pod eviction errors, and preventing Hugging Face API rate limits.
+
+### GitHub Actions CI/CD (`.github/workflows/`)
+The repository is fully automated via CI/CD, securely connecting to the OKE cluster using temporary OCI tokens:
+*   **`build-pipeline-base.yaml`**: Automatically builds and pushes the custom pipeline Docker image when `Dockerfile.pipeline` is updated.
+*   **`run-pipeline.yaml`**: A weekly scheduled (and manually triggerable) workflow that compiles the Kubeflow Pipeline and submits the run to the cluster via KFP API port-forwarding.
+*   **`deploy-mcp.yaml`**: Builds the MCP Server Docker image, pushes it to Docker Hub, and applies it to the OKE cluster.
+*   **`deploy-agent.yaml`**: Continuously deploys your `kagent` custom resources (`setup.yaml`) to update the AI agent's configuration.
diff --git a/kagent-feast-mcp/README.md → docs-agent-mcp/README.md b/kagent-feast-mcp/README.md → docs-agent-mcp/README.md
@@ -1,48 +1,22 @@
 # kagent Setup for Kubeflow Docs Agent
 
-Deploy the Kubeflow documentation assistant using kagent, MCP, Feast, and Milvus on Kubernetes.
+Deploy the Kubeflow documentation assistant using kagent, MCP, and Milvus on Kubernetes.
 
 ## Architecture
 
-![Kubeflow Docs Agent Architecture](../assets/kagentarch.png)
+* **KAgent UI / Runner:** Chat interface that orchestrates interactions.
+* **MCP Server:** Fetches context from Milvus.
+* **LLM Service:** Qwen-2.5-14B running on KServe/vLLM.
+* **Embeddings Service:** Sentence-Transformers MPNet via Hugging Face TEI.
+* **Milvus:** Direct vector database storage (no Feast dependency).
 
 ## Prerequisites
 
 - Kubernetes cluster with Helm 3.x
 - `kubectl` access to a namespace (examples use `<YOUR_NAMESPACE>`)
 - Python 3.9+
-- A Groq API key (or other OpenAI-compatible LLM provider)
 - Container registry (e.g. Docker Hub) to push the MCP server image
 
-## Placeholders and how to fill them
-
-- **`<YOUR_NAMESPACE>`**
-  - Any Kubernetes namespace you control, for example:
-    - Create it: `kubectl create namespace docs-agent`
-    - Then use `docs-agent` as `<YOUR_NAMESPACE>`.
-  - Used in:
-    - `manifests/mcp-server/mcp-server.yaml` (`metadata.namespace`, Service namespace)
-    - `manifests/kagent/setup.yaml` (all `metadata.namespace` fields and MCP URL)
-    - Istio AuthorizationPolicies under `manifests/istio/`
-    - All `kubectl` and `helm` commands in this README.
-
-- **`<YOUR_DOCKERHUB_USERNAME>`**
-  - Your Docker Hub (or other registry) account name.
-  - Build and push the MCP image from `mcp-server/`:
-    - `docker build -t <YOUR_DOCKERHUB_USERNAME>/mcp-kubeflow-docs:latest .`
-    - `docker push <YOUR_DOCKERHUB_USERNAME>/mcp-kubeflow-docs:latest`
-  - Used in:
-    - `manifests/mcp-server/mcp-server.yaml` (`image: <YOUR_DOCKERHUB_USERNAME>/mcp-kubeflow-docs:latest`)
-
-- **`<YOUR_GROQ_API_KEY>`**
-  - Get it from the Groq console and paste it into:
-    - `manifests/kagent/setup.yaml` → `stringData.GROQ_API_KEY`.
-  - This Secret is referenced by the `ModelConfig` in the same file.
-
-- **Feast / Milvus placeholders in `feast_repo/feature_store.yaml`**
-  - The `feast_repo/` folder is a **legacy/example configuration** and is **not required** for the default pipeline flow. The pipeline code configures Feast and Milvus directly.
-  - You can safely ignore or delete `feast_repo/` unless you explicitly want to experiment with a standalone Feast repo configuration.
-
 ## Deployment Guide
 
 ### Step 1: Install Milvus
@@ -149,7 +123,7 @@ helm install kagent oci://ghcr.io/kagent-dev/kagent/helm/kagent \
   --set tools.querydoc.enabled=false
 ```
 
-Before configuring kagent, make sure you have a valid LLM API key (for example, a Groq API key) and that you have set `<YOUR_GROQ_API_KEY>` in `manifests/kagent/setup.yaml` under `stringData.GROQ_API_KEY`.
+Before configuring kagent, make sure you have the local Qwen model (`qwen-llm`) running on KServe and the stable service (`qwen-llm-stable`) created.
 
 Apply the custom agent configuration:
 

diff --git a/docs-agent-mcp/manifests/istio/allow-kubeflow-to-milvus.yaml b/docs-agent-mcp/manifests/istio/allow-kubeflow-to-milvus.yaml
@@ -0,0 +1,18 @@
+apiVersion: security.istio.io/v1beta1
+kind: AuthorizationPolicy
+metadata:
+  name: allow-kubeflow-to-milvus
+  namespace: ml-infra
+spec:
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: milvus
+      component: standalone
+  action: ALLOW
+  rules:
+  - from:
+    - source:
+        namespaces: ["kubeflow"]
+    to:
+    - operation:
+        ports: ["19530"]
diff --git a/...cp/manifests/istio/allow-milvus-etcd.yaml → ...cp/manifests/istio/allow-milvus-etcd.yaml b/...cp/manifests/istio/allow-milvus-etcd.yaml → ...cp/manifests/istio/allow-milvus-etcd.yaml
@@ -2,7 +2,7 @@ apiVersion: security.istio.io/v1beta1
 kind: AuthorizationPolicy
 metadata:
   name: allow-milvus-etcd
-  namespace: <YOUR_NAMESPACE>
+  namespace: ml-infra
 spec:
   selector:
     matchLabels:

diff --git a/...p/manifests/istio/allow-milvus-minio.yaml → ...p/manifests/istio/allow-milvus-minio.yaml b/...p/manifests/istio/allow-milvus-minio.yaml → ...p/manifests/istio/allow-milvus-minio.yaml
@@ -2,7 +2,7 @@ apiVersion: security.istio.io/v1beta1
 kind: AuthorizationPolicy
 metadata:
   name: allow-milvus-minio
-  namespace: <YOUR_NAMESPACE>
+  namespace: ml-infra
 spec:
   selector:
     matchLabels:

diff --git a/...ifests/istio/allow-milvus-standalone.yaml → ...ifests/istio/allow-milvus-standalone.yaml b/...ifests/istio/allow-milvus-standalone.yaml → ...ifests/istio/allow-milvus-standalone.yaml
@@ -2,7 +2,7 @@ apiVersion: security.istio.io/v1beta1
 kind: AuthorizationPolicy
 metadata:
   name: allow-milvus-standalone
-  namespace: <YOUR_NAMESPACE>
+  namespace: ml-infra
 spec:
   selector:
     matchLabels: