Skip to content

vllm.lora.layers.replicated_linear

ReplicatedLinearWithLoRA

Bases: BaseLinearLayerWithLoRA

Source code in vllm/lora/layers/replicated_linear.py
class ReplicatedLinearWithLoRA(BaseLinearLayerWithLoRA):

    def __init__(self, base_layer: ReplicatedLinear) -> None:
        super().__init__(base_layer, )
        # To ensure interface compatibility, set to 1 always.
        self.output_size = self.base_layer.output_size
        self.n_slices = 1

    def forward(
        self, input_: torch.Tensor
    ) -> Union[torch.Tensor, tuple[torch.Tensor, Optional[torch.Tensor]]]:
        """Forward of ReplicatedLinearWithLoRA

        Args:
            input_: Tensor whose last dimension is `input_size`.

        Returns:
            - output
            - bias
        """
        bias = (self.base_layer.bias
                if not self.base_layer.skip_bias_add else None)

        # Matrix multiply.
        output = self.apply(input_, bias)

        output_bias = (self.base_layer.bias
                       if self.base_layer.skip_bias_add else None)

        if not self.base_layer.return_bias:
            return output

        return output, output_bias

    # ReplicatedLinear should always be replaced, regardless of the fully
    # sharded LoRAs setting, because it is, by definition, copied per GPU.
    @classmethod
    def can_replace_layer(
        cls,
        source_layer: nn.Module,
        lora_config: LoRAConfig,
        packed_modules_list: list,
        model_config: Optional[PretrainedConfig],
    ) -> bool:
        return type(source_layer) is ReplicatedLinear

n_slices instance-attribute

n_slices = 1

output_size instance-attribute

output_size = output_size

__init__

__init__(base_layer: ReplicatedLinear) -> None
Source code in vllm/lora/layers/replicated_linear.py
def __init__(self, base_layer: ReplicatedLinear) -> None:
    super().__init__(base_layer, )
    # To ensure interface compatibility, set to 1 always.
    self.output_size = self.base_layer.output_size
    self.n_slices = 1

can_replace_layer classmethod

can_replace_layer(
    source_layer: Module,
    lora_config: LoRAConfig,
    packed_modules_list: list,
    model_config: Optional[PretrainedConfig],
) -> bool
Source code in vllm/lora/layers/replicated_linear.py
@classmethod
def can_replace_layer(
    cls,
    source_layer: nn.Module,
    lora_config: LoRAConfig,
    packed_modules_list: list,
    model_config: Optional[PretrainedConfig],
) -> bool:
    return type(source_layer) is ReplicatedLinear

forward

forward(
    input_: Tensor,
) -> Union[Tensor, tuple[Tensor, Optional[Tensor]]]

Forward of ReplicatedLinearWithLoRA

Parameters:

Name Type Description Default
input_ Tensor

Tensor whose last dimension is input_size.

required

Returns:

Type Description
Union[Tensor, tuple[Tensor, Optional[Tensor]]]
  • output
Union[Tensor, tuple[Tensor, Optional[Tensor]]]
  • bias
Source code in vllm/lora/layers/replicated_linear.py
def forward(
    self, input_: torch.Tensor
) -> Union[torch.Tensor, tuple[torch.Tensor, Optional[torch.Tensor]]]:
    """Forward of ReplicatedLinearWithLoRA

    Args:
        input_: Tensor whose last dimension is `input_size`.

    Returns:
        - output
        - bias
    """
    bias = (self.base_layer.bias
            if not self.base_layer.skip_bias_add else None)

    # Matrix multiply.
    output = self.apply(input_, bias)

    output_bias = (self.base_layer.bias
                   if self.base_layer.skip_bias_add else None)

    if not self.base_layer.return_bias:
        return output

    return output, output_bias