From 50c4782ad3653e6aa51de2db73532b1af35234b6 Mon Sep 17 00:00:00 2001
From: Claas de Boer <dev@claas.plus>
Date: Fri, 10 Apr 2026 14:23:19 +0200
Subject: [PATCH] Fix pred_depth in V-JEPA 2.1 ViT-L configs to match released
 checkpoint

The released ViT-L checkpoint (vjepa2_1_vitl_dist_vitG_384.pt) has 12
predictor blocks, matching the distillation protocol described in the
paper: "we retrain a new predictor, which has 12 blocks (instead of 24
for pretraining)". The ViT-B config already correctly specifies
pred_depth: 12.

With pred_depth: 24, loading the checkpoint results in a silent partial
load where blocks 0-11 are loaded but blocks 12-23 remain randomly
initialized (strict=False).
---
 configs/train_2_1/vitl16/cooldown-256px-64f.yaml | 2 +-
 configs/train_2_1/vitl16/pretrain-256px-16f.yaml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/configs/train_2_1/vitl16/cooldown-256px-64f.yaml b/configs/train_2_1/vitl16/cooldown-256px-64f.yaml
index 73308856..6a74c2ea 100644
--- a/configs/train_2_1/vitl16/cooldown-256px-64f.yaml
+++ b/configs/train_2_1/vitl16/cooldown-256px-64f.yaml
@@ -118,7 +118,7 @@ model:
   model_name: vit_large
   n_registers: 0
   normalize_predictor: false
-  pred_depth: 24
+  pred_depth: 12
   pred_embed_dim: 384
   pred_num_heads: 12
   uniform_power: true
diff --git a/configs/train_2_1/vitl16/pretrain-256px-16f.yaml b/configs/train_2_1/vitl16/pretrain-256px-16f.yaml
index 3988cf9a..6ffe061e 100644
--- a/configs/train_2_1/vitl16/pretrain-256px-16f.yaml
+++ b/configs/train_2_1/vitl16/pretrain-256px-16f.yaml
@@ -120,7 +120,7 @@ model:
   n_registers: 0
   n_registers_predictor: 0
   normalize_predictor: false
-  pred_depth: 24
+  pred_depth: 12
   pred_embed_dim: 384
   pred_is_causal: false
   pred_local_window: