Skip to content

vllm.attention.backends.utils

Attention backend utils

PAD_SLOT_ID module-attribute

PAD_SLOT_ID = -1

logger module-attribute

logger = init_logger(__name__)

MLADims dataclass

Source code in vllm/attention/backends/utils.py
@dataclass
class MLADims:
    q_lora_rank: Optional[int]
    kv_lora_rank: int
    qk_nope_head_dim: int
    qk_rope_head_dim: int
    v_head_dim: int

kv_lora_rank instance-attribute

kv_lora_rank: int

q_lora_rank instance-attribute

q_lora_rank: Optional[int]

qk_nope_head_dim instance-attribute

qk_nope_head_dim: int

qk_rope_head_dim instance-attribute

qk_rope_head_dim: int

v_head_dim instance-attribute

v_head_dim: int

__init__

__init__(
    q_lora_rank: Optional[int],
    kv_lora_rank: int,
    qk_nope_head_dim: int,
    qk_rope_head_dim: int,
    v_head_dim: int,
) -> None

get_mla_dims

get_mla_dims(model_config: ModelConfig) -> MLADims
Source code in vllm/attention/backends/utils.py
def get_mla_dims(model_config: ModelConfig) -> MLADims:
    hf_text_config = model_config.hf_text_config

    return MLADims(
        q_lora_rank=getattr(hf_text_config, "q_lora_rank", None),
        kv_lora_rank=hf_text_config.kv_lora_rank,
        qk_nope_head_dim=hf_text_config.qk_nope_head_dim,
        qk_rope_head_dim=hf_text_config.qk_rope_head_dim,
        v_head_dim=hf_text_config.v_head_dim,
    )