diff --git a/Dockerfile.sdk b/Dockerfile.sdk
index 8febb7bf39..b2181abe6e 100644
--- a/Dockerfile.sdk
+++ b/Dockerfile.sdk
@@ -29,7 +29,7 @@
 #
 
 # Base image on the minimum Triton container
-ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:26.04-py3-min
+ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:26.05-py3-min
 
 ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
 ARG TRITON_REPO_ORGANIZATION=http://github.com/triton-inference-server
diff --git a/README.md b/README.md
index b2f3d818b0..6fc36283f3 100644
--- a/README.md
+++ b/README.md
@@ -27,11 +27,6 @@
 -->
 [![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)
 
->[!WARNING]
->You are currently on the `main` branch which tracks under-development progress
->towards the next release. The current release is version [2.68.0](https://github.com/triton-inference-server/server/releases/latest)
->and corresponds to the 26.04 container release on NVIDIA GPU Cloud (NGC).
-
 # Triton Inference Server
 
 Triton Inference Server is an open source inference serving software that
@@ -90,16 +85,16 @@ Inference Server with the
 
 ```bash
 # Step 1: Create the example model repository
-git clone -b r26.04 https://github.com/triton-inference-server/server.git
+git clone -b r26.05 https://github.com/triton-inference-server/server.git
 cd server/docs/examples
 ./fetch_models.sh
 
 # Step 2: Launch triton from the NGC Triton container
-docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:26.04-py3 tritonserver --model-repository=/models --model-control-mode explicit --load-model densenet_onnx
+docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:26.05-py3 tritonserver --model-repository=/models --model-control-mode explicit --load-model densenet_onnx
 
 # Step 3: Sending an Inference Request
 # In a separate console, launch the image_client example from the NGC Triton SDK container
-docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:26.04-py3-sdk /workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg
+docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:26.05-py3-sdk /workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg
 
 # Inference should return the following
 Image '/workspace/images/mug.jpg':
diff --git a/TRITON_VERSION b/TRITON_VERSION
index 6a166a54c5..a740b92f5e 100644
--- a/TRITON_VERSION
+++ b/TRITON_VERSION
@@ -1 +1 @@
-2.70.0dev
+2.69.0
diff --git a/build.py b/build.py
index 9baaedbfe3..e80d62f399 100755
--- a/build.py
+++ b/build.py
@@ -71,8 +71,8 @@
 #
 
 DEFAULT_TRITON_VERSION_MAP = {
-    "release_version": "2.70.0dev",
-    "triton_container_version": "26.06dev",
+    "release_version": "2.69.0",
+    "triton_container_version": "26.05",
     "upstream_container_version": "26.04",
     "ort_version": "1.24.4",
     "ort_openvino_version": "2026.1.0",
diff --git a/deploy/aws/values.yaml b/deploy/aws/values.yaml
index a140611d4f..c94f832aa8 100644
--- a/deploy/aws/values.yaml
+++ b/deploy/aws/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:26.04-py3
+  imageName: nvcr.io/nvidia/tritonserver:26.05-py3
   pullPolicy: IfNotPresent
   modelRepositoryPath: s3://triton-inference-server-repository/model_repository
   numGpus: 1
diff --git a/deploy/fleetcommand/Chart.yaml b/deploy/fleetcommand/Chart.yaml
index e96abde6f5..bd360e7955 100644
--- a/deploy/fleetcommand/Chart.yaml
+++ b/deploy/fleetcommand/Chart.yaml
@@ -26,7 +26,7 @@
 
 apiVersion: v1
 # appVersion is the Triton version; update when changing release
-appVersion: 2.68.0
+appVersion: 2.69.0
 description: Triton Inference Server (Fleet Command)
 name: triton-inference-server
 # version is the Chart version; update when changing anything in the chart
diff --git a/deploy/fleetcommand/values.yaml b/deploy/fleetcommand/values.yaml
index 6ecf3b351d..b911db4afd 100644
--- a/deploy/fleetcommand/values.yaml
+++ b/deploy/fleetcommand/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:26.04-py3
+  imageName: nvcr.io/nvidia/tritonserver:26.05-py3
   pullPolicy: IfNotPresent
   numGpus: 1
   serverCommand: tritonserver
@@ -47,13 +47,13 @@ image:
     #
     # To set model control mode, uncomment and configure below
     # TODO: Fix the following url, it is invalid
-    # See https://github.com/triton-inference-server/server/blob/r26.04/docs/user_guide/model_management.md
+    # See https://github.com/triton-inference-server/server/blob/r26.05/docs/user_guide/model_management.md
     #  for more details
     #- --model-control-mode=explicit|poll|none
     #
     # Additional server args
     #
-    # see https://github.com/triton-inference-server/server/blob/r26.04/README.md
+    # see https://github.com/triton-inference-server/server/blob/r26.05/README.md
     #  for more details
 
 service:
diff --git a/deploy/gcp/values.yaml b/deploy/gcp/values.yaml
index c9900d68a0..9784c9d252 100644
--- a/deploy/gcp/values.yaml
+++ b/deploy/gcp/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:26.04-py3
+  imageName: nvcr.io/nvidia/tritonserver:26.05-py3
   pullPolicy: IfNotPresent
   modelRepositoryPath: gs://triton-inference-server-repository/model_repository
   numGpus: 1
diff --git a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml
index a732a1da20..0e1347f4fd 100644
--- a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml
+++ b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml
@@ -33,7 +33,7 @@ metadata:
   namespace: default
 spec:
   containers:
-  - image: nvcr.io/nvidia/tritonserver:26.04-py3-sdk
+  - image: nvcr.io/nvidia/tritonserver:26.05-py3-sdk
     imagePullPolicy: Always
     name: nv-triton-client
     securityContext:
diff --git a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh
index 8e00967f88..4b4468d89d 100755
--- a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh
+++ b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh
@@ -28,8 +28,8 @@
 export REGISTRY=gcr.io/$(gcloud config get-value project | tr ':' '/')
 export APP_NAME=tritonserver
 export MAJOR_VERSION=2.67
-export MINOR_VERSION=2.68.0
-export NGC_VERSION=26.04-py3
+export MINOR_VERSION=2.69.0
+export NGC_VERSION=26.05-py3
 
 docker pull nvcr.io/nvidia/$APP_NAME:$NGC_VERSION
 
diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml
index 18f83cca68..d150f0e8d7 100644
--- a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml
+++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml
@@ -28,4 +28,4 @@ apiVersion: v1
 appVersion: "2.68"
 description: Triton Inference Server
 name: triton-inference-server
-version: 2.68.0
+version: 2.69.0
diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml
index 8cfd8171b8..362107e71a 100644
--- a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml
+++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml
@@ -31,14 +31,14 @@ maxReplicaCount: 3
 tritonProtocol: HTTP
 # HPA GPU utilization autoscaling target
 HPATargetAverageValue: 85
-modelRepositoryPath: gs://triton_sample_models/26.04
-publishedVersion: '2.68.0'
+modelRepositoryPath: gs://triton_sample_models/26.05
+publishedVersion: '2.69.0'
 gcpMarketplace: true
 
 image:
   registry: gcr.io
   repository: nvidia-ngc-public/tritonserver
-  tag: 26.04-py3
+  tag: 26.05-py3
   pullPolicy: IfNotPresent
   # modify the model repository here to match your GCP storage bucket
   numGpus: 1
diff --git a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml
index 7583068bc6..4c312c9880 100644
--- a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml
+++ b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml
@@ -27,7 +27,7 @@
 x-google-marketplace:
   schemaVersion: v2
   applicationApiVersion: v1beta1
-  publishedVersion: '2.68.0'
+  publishedVersion: '2.69.0'
   publishedVersionMetadata:
     releaseNote: >-
       Initial release.
diff --git a/deploy/gke-marketplace-app/server-deployer/schema.yaml b/deploy/gke-marketplace-app/server-deployer/schema.yaml
index 457e13d19d..ccf3b157c4 100644
--- a/deploy/gke-marketplace-app/server-deployer/schema.yaml
+++ b/deploy/gke-marketplace-app/server-deployer/schema.yaml
@@ -27,7 +27,7 @@
 x-google-marketplace:
   schemaVersion: v2
   applicationApiVersion: v1beta1
-  publishedVersion: '2.68.0'
+  publishedVersion: '2.69.0'
   publishedVersionMetadata:
     releaseNote: >-
       Initial release.
@@ -89,7 +89,7 @@ properties:
   modelRepositoryPath:
     type: string
     title: Bucket where models are stored. Please make sure the user/service account to create the GKE app has permission to this GCS bucket. Read Triton documentation on configs and formatting details, supporting TensorRT, TensorFlow, Pytorch, Onnx ... etc.
-    default: gs://triton_sample_models/26.04
+    default: gs://triton_sample_models/26.05
   image.ldPreloadPath:
     type: string
     title: Leave this empty by default. Triton allows users to create custom layers for backend such as TensorRT plugin, the compiled shared library must be provided via LD_PRELOAD environment variable.
diff --git a/deploy/gke-marketplace-app/trt-engine/README.md b/deploy/gke-marketplace-app/trt-engine/README.md
index 0200987e6f..fff7466da4 100644
--- a/deploy/gke-marketplace-app/trt-engine/README.md
+++ b/deploy/gke-marketplace-app/trt-engine/README.md
@@ -33,7 +33,7 @@
 ```
 docker run --gpus all -it --network host \
     --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \
-    -v ~:/scripts nvcr.io/nvidia/tensorrt:26.04-py3
+    -v ~:/scripts nvcr.io/nvidia/tensorrt:26.05-py3
 
 pip install onnx six torch tf2onnx tensorflow
 
@@ -57,7 +57,7 @@ mkdir -p engines
 
 python3 builder.py -m models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/model.ckpt -o engines/bert_large_int8_bs1_s128.engine -b 1 -s 128 -c models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/ -v models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/vocab.txt --int8 --fp16 --strict --calib-num 1 -iln -imh
 
-gsutil cp bert_large_int8_bs1_s128.engine gs://triton_sample_models/26.04/bert/1/model.plan
+gsutil cp bert_large_int8_bs1_s128.engine gs://triton_sample_models/26.05/bert/1/model.plan
 ```
 
-For each Triton upgrade, container version used to generate the model, and the model path in GCS `gs://triton_sample_models/26.04/` should be updated accordingly with the correct version.
+For each Triton upgrade, container version used to generate the model, and the model path in GCS `gs://triton_sample_models/26.05/` should be updated accordingly with the correct version.
diff --git a/deploy/k8s-onprem/values.yaml b/deploy/k8s-onprem/values.yaml
index 4dc4bf2c15..3d788f3f17 100644
--- a/deploy/k8s-onprem/values.yaml
+++ b/deploy/k8s-onprem/values.yaml
@@ -30,7 +30,7 @@ tags:
   openshift: false
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:26.04-py3
+  imageName: nvcr.io/nvidia/tritonserver:26.05-py3
   pullPolicy: IfNotPresent
   modelRepositoryServer: < Replace with the IP Address of your file server >
   modelRepositoryPath: /srv/models
diff --git a/deploy/oci/values.yaml b/deploy/oci/values.yaml
index f8867069c1..df5d60066d 100644
--- a/deploy/oci/values.yaml
+++ b/deploy/oci/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:26.04-py3
+  imageName: nvcr.io/nvidia/tritonserver:26.05-py3
   pullPolicy: IfNotPresent
   modelRepositoryPath: s3://https://<OCI_NAMESPACE>.compat.objectstorage.<OCI_REGION>.oraclecloud.com:443/triton-inference-server-repository
   numGpus: 1
diff --git a/docs/customization_guide/compose.md b/docs/customization_guide/compose.md
index e88f0c90ba..e922d27fbe 100644
--- a/docs/customization_guide/compose.md
+++ b/docs/customization_guide/compose.md
@@ -46,8 +46,8 @@ The `compose.py` script can be found in the
 Simply clone the repository and run `compose.py` to create a custom container.
 Note: Created container version will depend on the branch that was cloned.
 For example branch
- [r26.04](https://github.com/triton-inference-server/server/tree/r26.04)
-should be used to create a image based on the NGC 26.04 Triton release.
+ [r26.05](https://github.com/triton-inference-server/server/tree/r26.05)
+should be used to create a image based on the NGC 26.05 Triton release.
 
 `compose.py` provides `--backend`, `--repoagent` options that allow you to
 specify which backends and repository agents to include in the custom image.
@@ -78,20 +78,20 @@ For example, running
 ```
 python3 compose.py --backend pytorch --repoagent checksum
 ```
-on branch [r26.04](https://github.com/triton-inference-server/server/tree/r26.04) pulls:
-- `min` container `nvcr.io/nvidia/tritonserver:26.04-py3-min`
-- `full` container `nvcr.io/nvidia/tritonserver:26.04-py3`
+on branch [r26.05](https://github.com/triton-inference-server/server/tree/r26.05) pulls:
+- `min` container `nvcr.io/nvidia/tritonserver:26.05-py3-min`
+- `full` container `nvcr.io/nvidia/tritonserver:26.05-py3`
 
 Alternatively, users can specify the version of Triton container to pull from
 any branch by either:
 1. Adding flag `--container-version <container version>` to branch
 ```
-python3 compose.py --backend pytorch --repoagent checksum --container-version 26.04
+python3 compose.py --backend pytorch --repoagent checksum --container-version 26.05
 ```
 2. Specifying `--image min,<min container image name> --image full,<full container image name>`.
    The user is responsible for specifying compatible `min` and `full` containers.
 ```
-python3 compose.py --backend pytorch --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:26.04-py3-min --image full,nvcr.io/nvidia/tritonserver:26.04-py3
+python3 compose.py --backend pytorch --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:26.05-py3-min --image full,nvcr.io/nvidia/tritonserver:26.05-py3
 ```
 Method 1 and 2 will result in the same composed container. Furthermore,
 `--image` flag overrides the `--container-version` flag when both are specified.
@@ -102,8 +102,8 @@ Note:
 2. vLLM and TensorRT-LLM backends are currently not supported backends for
 `compose.py`. If you want to build additional backends on top of these backends,
 it would be better to [build it yourself](#build-it-yourself) by using
-`nvcr.io/nvidia/tritonserver:26.04-vllm-python-py3` or
-`nvcr.io/nvidia/tritonserver:26.04-trtllm-python-py3` as a `min` container.
+`nvcr.io/nvidia/tritonserver:26.05-vllm-python-py3` or
+`nvcr.io/nvidia/tritonserver:26.05-trtllm-python-py3` as a `min` container.
 
 
 ### CPU-only container composition
diff --git a/docs/getting_started/llm.md b/docs/getting_started/llm.md
index 052d7829ca..6ea9a5aa33 100644
--- a/docs/getting_started/llm.md
+++ b/docs/getting_started/llm.md
@@ -282,7 +282,7 @@ The above needs to be done manually with your favorite editor. Once finished, pl
     -v $(pwd)/all_models:/opt/all_models \
     -v $(pwd)/scripts:/opt/scripts \
     -v $(pwd)/Phi-3-mini-4k-instruct:/opt/Phi-3-mini-4k-instruct \
-    nvcr.io/nvidia/tritonserver:26.04-trtllm-python-py3
+    nvcr.io/nvidia/tritonserver:26.05-trtllm-python-py3
 
     # Launch Server
     python3 ../scripts/launch_triton_server.py --model_repo ../all_models/inflight_batcher_llm --world_size 1
@@ -308,7 +308,7 @@ The above needs to be done manually with your favorite editor. Once finished, pl
 
 <!---->
 
-    export RELEASE="26.04"
+    export RELEASE="26.05"
     docker run -it --net=host --gpus '"device=0"'  nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk
 
 17. ## Download the Phi-3 tokenizer
diff --git a/docs/introduction/release_notes.md b/docs/introduction/release_notes.md
index 295734b89f..19fc0f22f3 100644
--- a/docs/introduction/release_notes.md
+++ b/docs/introduction/release_notes.md
@@ -25,9 +25,9 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 -->
-# [Triton Inference Server Release 26.04](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/rel-26-04.html#rel-26-04)
+# [Triton Inference Server Release 26.05](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/rel-26-05.html#rel-26-05)
 
-The Triton Inference Server container image, release 26.04, is available
+The Triton Inference Server container image, release 26.05, is available
 on [NGC](https://ngc.nvidia.com/catalog/containers/nvidia:tritonserver) and
 is open source
 on [GitHub](https://github.com/triton-inference-server/server). Release notes can
diff --git a/docs/user_guide/performance_tuning.md b/docs/user_guide/performance_tuning.md
index d51ad5ab2f..ff837a4629 100644
--- a/docs/user_guide/performance_tuning.md
+++ b/docs/user_guide/performance_tuning.md
@@ -235,7 +235,7 @@ with a `tritonserver` binary.
 
 ```bash
 # Start server container
-docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:26.04-py3
+docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:26.05-py3
 
 # Start serving your models
 tritonserver --model-repository=/mnt/models
@@ -284,7 +284,7 @@ by setting the `-u` flag, such as `perf_analyzer -m densenet_onnx -u
 
 ```bash
 # Start the SDK container interactively
-docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:26.04-py3-sdk
+docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:26.05-py3-sdk
 
 # Benchmark model being served from step 3
 perf_analyzer -m densenet_onnx --concurrency-range 1:4
diff --git a/python/openai/README.md b/python/openai/README.md
index 4598a5a43f..4134e72cb6 100644
--- a/python/openai/README.md
+++ b/python/openai/README.md
@@ -46,7 +46,7 @@
 docker run -it --net=host --gpus all --rm \
   -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
   -e HF_TOKEN \
-  nvcr.io/nvidia/tritonserver:26.04-vllm-python-py3
+  nvcr.io/nvidia/tritonserver:26.05-vllm-python-py3
 ```
 
 2. Launch the OpenAI-compatible Triton Inference Server:
@@ -355,7 +355,7 @@ Currently, OpenAI-Compatible Frontend supports loading embedding models and embe
 docker run -it --net=host --gpus all --rm \
   -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
   -e HF_TOKEN \
-  nvcr.io/nvidia/tritonserver:26.04-vllm-python-py3
+  nvcr.io/nvidia/tritonserver:26.05-vllm-python-py3
 ```
 
 2. Launch the OpenAI-compatible Triton Inference Server:
@@ -451,7 +451,7 @@ docker run -it --net=host --gpus all --rm \
   -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
   -e HF_TOKEN \
   -e TRTLLM_ORCHESTRATOR=1 \
-  nvcr.io/nvidia/tritonserver:26.04-trtllm-python-py3
+  nvcr.io/nvidia/tritonserver:26.05-trtllm-python-py3
 ```
 
 2. Install dependencies inside the container:
diff --git a/qa/common/gen_jetson_trt_models b/qa/common/gen_jetson_trt_models
index 7fcdb49823..4d491fa2a1 100755
--- a/qa/common/gen_jetson_trt_models
+++ b/qa/common/gen_jetson_trt_models
@@ -34,7 +34,7 @@
 # Make all generated files accessible outside of container
 umask 0000
 # Set the version of the models
-TRITON_VERSION=${TRITON_VERSION:=26.04}
+TRITON_VERSION=${TRITON_VERSION:=26.05}
 # Set the CUDA device to use
 NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES:=0}
 # Set TensorRT image
diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository
index d6ed5738f8..a503b64a10 100755
--- a/qa/common/gen_qa_model_repository
+++ b/qa/common/gen_qa_model_repository
@@ -66,7 +66,7 @@ log_message.status "Changing working directory to the script directory to: " "${
 cd ${TRITON_MDLS_BASE_SCRIPT_DIR}
 
 log_message.status "define: default values"
-TRITON_VERSION=${TRITON_VERSION:=26.04}
+TRITON_VERSION=${TRITON_VERSION:=26.05}
 ONNX_VERSION=1.20.1
 ONNX_OPSET=0
 OPENVINO_VERSION=2024.5.0