From 7149c03d3691197d42b4c060e449e59ac4da45a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=EA=B9=80=EC=9E=AC=EC=9A=B0?= <pewpewplay315@gmail.com>
Date: Tue, 3 Mar 2026 01:07:09 +0900
Subject: [PATCH] fix: use processor.tokenizer for apply_chat_template in VLM
 preprocessing (#416)

`AutoProcessor.from_pretrained()` returns a generic processor that does
not have `apply_chat_template()`. The tokenizer accessible via
`processor.tokenizer` does have this method with the chat template
loaded from the model config.
---
 specforge/data/preprocessing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/specforge/data/preprocessing.py b/specforge/data/preprocessing.py
index 71a63820..e7df490e 100644
--- a/specforge/data/preprocessing.py
+++ b/specforge/data/preprocessing.py
@@ -245,7 +245,7 @@ def preprocess_vlm_conversations(
             else:
                 messages.append({"role": role, "content": sentence["content"]})
 
-        conversation = processor.apply_chat_template(
+        conversation = processor.tokenizer.apply_chat_template(
             messages,
             tokenize=False,
             add_generation_prompt=False,