vllm.model_executor.models.bert ¶
BertAttention ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
output instance-attribute
¶
output = BertSelfOutput(
hidden_size=hidden_size,
layer_norm_eps=layer_norm_eps,
quant_config=quant_config,
prefix=f"{prefix}.output",
)
self instance-attribute
¶
self = BertSelfAttention(
hidden_size=hidden_size,
num_attention_heads=num_attention_heads,
cache_config=cache_config,
quant_config=quant_config,
prefix=f"{prefix}.output",
)
__init__ ¶
__init__(
hidden_size: int,
num_attention_heads: int,
layer_norm_eps: float,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
BertEmbedding ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
position_embeddings instance-attribute
¶
position_embeddings = VocabParallelEmbedding(
max_position_embeddings, hidden_size
)
token_type_embeddings instance-attribute
¶
token_type_embeddings = VocabParallelEmbedding(
type_vocab_size, hidden_size
)
word_embeddings instance-attribute
¶
word_embeddings = VocabParallelEmbedding(
vocab_size, hidden_size
)
__init__ ¶
Source code in vllm/model_executor/models/bert.py
forward ¶
forward(
input_ids: Tensor,
position_ids: Tensor,
inputs_embeds: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
BertEmbeddingModel ¶
Bases: Module
, SupportsQuant
A model that uses Bert to provide embedding functionalities.
This class encapsulates the BertModel and provides an interface for embedding operations and customized pooling functions.
Attributes:
Name | Type | Description |
---|---|---|
model | An instance of BertModel used for forward operations. | |
_pooler | An instance of Pooler used for pooling operations. |
Source code in vllm/model_executor/models/bert.py
model instance-attribute
¶
model = _build_model(
vllm_config=vllm_config,
prefix=maybe_prefix(prefix, "model"),
)
__init__ ¶
__init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/bert.py
_build_model ¶
_build_model(
vllm_config: VllmConfig, prefix: str = ""
) -> BertModel
_build_pooler ¶
_build_pooler(pooler_config: PoolerConfig) -> Pooler
forward ¶
forward(
input_ids: Tensor,
positions: Tensor,
intermediate_tensors: Optional[
IntermediateTensors
] = None,
inputs_embeds: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
get_input_embeddings ¶
load_weights ¶
Source code in vllm/model_executor/models/bert.py
BertEncoder ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
layer instance-attribute
¶
layer = ModuleList(
[
(
BertLayer(
config=config,
cache_config=cache_config,
quant_config=quant_config,
prefix=f"{prefix}.layer.{layer_idx}",
)
)
for layer_idx in (range(num_hidden_layers))
]
)
__init__ ¶
__init__(vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/bert.py
BertForSequenceClassification ¶
Bases: Module
, SupportsCrossEncoding
, SupportsQuant
A model that uses Bert to provide embedding functionalities.
This class encapsulates the BertModel and provides an interface for embedding operations and customized pooling functions.
Attributes:
Name | Type | Description |
---|---|---|
model | An instance of BertModel used for forward operations. | |
_pooler | An instance of Pooler used for pooling operations. |
Source code in vllm/model_executor/models/bert.py
bert instance-attribute
¶
bert = BertPoolingModel(
vllm_config=vllm_config,
prefix=maybe_prefix(prefix, "bert"),
embedding_class=BertEmbedding,
)
pooler instance-attribute
¶
pooler = DispatchPooler(
{
"encode": for_encode(pooler_config),
"classify": ClassifierPooler(
pooling=pooler,
classifier=classifier,
act_fn=act_fn_for_seq_cls(model_config),
),
"score": ClassifierPooler(
pooling=pooler,
classifier=classifier,
act_fn=act_fn_for_cross_encoder(model_config),
),
}
)
__init__ ¶
__init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/bert.py
forward ¶
forward(
input_ids: Optional[Tensor],
positions: Tensor,
intermediate_tensors: Optional[
IntermediateTensors
] = None,
inputs_embeds: Optional[Tensor] = None,
token_type_ids: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
get_input_embeddings ¶
BertForTokenClassification ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
bert instance-attribute
¶
bert = BertModel(
vllm_config=vllm_config,
prefix=maybe_prefix(prefix, "bert"),
embedding_class=BertEmbedding,
)
__init__ ¶
__init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/bert.py
forward ¶
forward(
input_ids: Optional[Tensor],
positions: Tensor,
intermediate_tensors: Optional[
IntermediateTensors
] = None,
inputs_embeds: Optional[Tensor] = None,
token_type_ids: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
get_input_embeddings ¶
BertIntermediate ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
dense instance-attribute
¶
dense = ColumnParallelLinear(
input_size=hidden_size,
output_size=intermediate_size,
bias=True,
quant_config=quant_config,
prefix=f"{prefix}.dense",
)
__init__ ¶
__init__(
hidden_size: int,
intermediate_size: int,
hidden_act: str,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
forward ¶
BertLayer ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
attention instance-attribute
¶
attention = BertAttention(
hidden_size=hidden_size,
num_attention_heads=num_attention_heads,
layer_norm_eps=layer_norm_eps,
cache_config=cache_config,
quant_config=quant_config,
prefix=f"{prefix}.attention",
)
intermediate instance-attribute
¶
intermediate = BertIntermediate(
hidden_size=hidden_size,
intermediate_size=intermediate_size,
hidden_act=hidden_act,
quant_config=quant_config,
prefix=f"{prefix}.intermediate",
)
output instance-attribute
¶
output = BertOutput(
hidden_size=hidden_size,
intermediate_size=intermediate_size,
layer_norm_eps=layer_norm_eps,
quant_config=quant_config,
prefix=f"{prefix}.output",
)
__init__ ¶
__init__(
config: BertConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
BertModel ¶
Bases: Module
, SupportsQuant
Source code in vllm/model_executor/models/bert.py
331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 |
|
encoder instance-attribute
¶
encoder = BertEncoder(
vllm_config=vllm_config, prefix=f"{prefix}.encoder"
)
packed_modules_mapping class-attribute
instance-attribute
¶
__init__ ¶
__init__(
*,
vllm_config: VllmConfig,
prefix: str = "",
embedding_class: type[Module] = BertEmbedding,
) -> None
Source code in vllm/model_executor/models/bert.py
_load_weights ¶
Source code in vllm/model_executor/models/bert.py
forward ¶
forward(
input_ids: Tensor,
positions: Tensor,
intermediate_tensors: Optional[
IntermediateTensors
] = None,
inputs_embeds: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
get_input_embeddings ¶
load_weights ¶
Source code in vllm/model_executor/models/bert.py
BertOutput ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
dense instance-attribute
¶
dense = RowParallelLinear(
input_size=intermediate_size,
output_size=hidden_size,
bias=True,
quant_config=quant_config,
prefix=f"{prefix}.dense",
)
__init__ ¶
__init__(
hidden_size: int,
intermediate_size: int,
layer_norm_eps: float,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
forward ¶
BertPooler ¶
Bases: Pooler
Source code in vllm/model_executor/models/bert.py
__init__ ¶
forward ¶
forward(
hidden_states: Union[Tensor, list[Tensor]],
pooling_metadata: PoolingMetadata,
) -> Union[Tensor, list[Tensor]]
Source code in vllm/model_executor/models/bert.py
get_pooling_updates ¶
get_pooling_updates(
task: PoolingTask,
) -> PoolingParamsUpdate
get_supported_tasks ¶
get_supported_tasks() -> Set[PoolingTask]
BertPoolingModel ¶
Bases: BertModel
Source code in vllm/model_executor/models/bert.py
__init__ ¶
__init__(
*,
vllm_config: VllmConfig,
prefix: str = "",
embedding_class: type[Module] = BertEmbedding,
) -> None
Source code in vllm/model_executor/models/bert.py
load_weights ¶
Source code in vllm/model_executor/models/bert.py
BertSelfAttention ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
attn instance-attribute
¶
attn = EncoderOnlyAttention(
num_heads=num_heads,
head_size=head_dim,
scale=scaling,
num_kv_heads=num_kv_heads,
cache_config=cache_config,
quant_config=quant_config,
prefix=f"{prefix}.attn",
)
qkv_proj instance-attribute
¶
qkv_proj = QKVParallelLinear(
hidden_size=hidden_size,
head_size=head_dim,
total_num_heads=total_num_heads,
total_num_kv_heads=total_num_kv_heads,
bias=True,
quant_config=quant_config,
prefix=f"{prefix}.qkv_proj",
)
__init__ ¶
__init__(
hidden_size: int,
num_attention_heads: int,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
forward ¶
BertSelfOutput ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
dense instance-attribute
¶
dense = RowParallelLinear(
input_size=hidden_size,
output_size=hidden_size,
bias=True,
quant_config=quant_config,
prefix=f"{prefix}.dense",
)
__init__ ¶
__init__(
hidden_size: int,
layer_norm_eps: float,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)