Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion vllm/v1/attention/backends/triton_attn.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import torch

from vllm.config import CUDAGraphMode, VllmConfig
>>>>>>> v0.15.1
from vllm.config.cache import CacheDType
from vllm.logger import init_logger
from vllm.model_executor.layers.quantization.utils.quant_utils import (
Expand All @@ -33,6 +32,7 @@
)
from vllm.v1.attention.ops.triton_unified_attention import unified_attention
from vllm.v1.kv_cache_interface import AttentionSpec
import vllm.envs as envs

logger = init_logger(__name__)

Expand Down
16 changes: 8 additions & 8 deletions vllm/v1/worker/gpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1707,14 +1707,14 @@ def _get_block_table(kv_cache_gid: int):
blk_table_tensor[num_reqs:num_reqs_padded].fill_(-1)
return blk_table_tensor

if not hasattr(self, "rotate"):
if not isinstance(self.model.model.layers[0], PPMissingLayer):
self.rotate = self.model.model.layers[0].self_attn.rotary_emb
else:
for lay in self.model.model.layers:
if not isinstance(lay, PPMissingLayer):
self.rotate = lay.self_attn.rotary_emb
break
if not hasattr(self, "rotate"):
if not isinstance(self.model.model.layers[0], PPMissingLayer):
self.rotate = self.model.model.layers[0].self_attn.rotary_emb
else:
for lay in self.model.model.layers:
if not isinstance(lay, PPMissingLayer):
self.rotate = lay.self_attn.rotary_emb
break

assert slot_mappings is not None
block_table_gid_0 = _get_block_table(0)
Expand Down