vllm.v1.kv_offload.backend ¶

Backend ¶

Bases: ABC

An abstract class for allocating and returning specs for writing KV blocks to some backend.

Source code in vllm/v1/kv_offload/backend.py

class Backend(ABC):
    """
    An abstract class for allocating and returning specs for writing
    KV blocks to some backend.
    """

    def __init__(self, block_size: int, medium: str):
        self.block_size = block_size
        self.medium = medium

    @abstractmethod
    def get_num_free_blocks(self):
        """
        Returns the number of current number of blocks that can be allocated.
        """
        pass

    @abstractmethod
    def allocate_blocks(self,
                        block_hashes: list[BlockHash]) -> list[BlockStatus]:
        """
        Allocate space for writing blocks.
        This method assumes there is enough space for allocation.
        It is unsafe to use without checking get_num_free_blocks beforehand.

        Args:
            block_hashes: the hashes identifying the blocks to be written.

        Returns:
            A list of BlockStatus for the allocated blocks.
            The ref_cnt of each returned item will be -1, meaning the block
            is not yet ready to be read.
        """
        pass

    @abstractmethod
    def free(self, block: BlockStatus):
        """
        Free a previously allocated block.
        You should only call this function with blocks returned by
        allocate_blocks, and only once per each block.

        Args:
            block: The block to be freed.
        """
        pass

    def get_load_store_spec(self, block_hashes: Iterable[BlockHash],
                            blocks: Iterable[BlockStatus]) -> LoadStoreSpec:
        """
        Get backend-specific information on how to read/write blocks.

        Args:
            block_hashes: the list of block hashes identifying the blocks.
            blocks: the list of blocks.

        Returns:
            A LoadStoreSpec that can be used by a worker
            to read/write the blocks.
        """
        raise NotImplementedError

block_size `instance-attribute` ¶

block_size = block_size

medium `instance-attribute` ¶

medium = medium

init ¶

__init__(block_size: int, medium: str)

Source code in vllm/v1/kv_offload/backend.py

def __init__(self, block_size: int, medium: str):
    self.block_size = block_size
    self.medium = medium

allocate_blocks `abstractmethod` ¶

allocate_blocks(
    block_hashes: list[BlockHash],
) -> list[BlockStatus]

Allocate space for writing blocks. This method assumes there is enough space for allocation. It is unsafe to use without checking get_num_free_blocks beforehand.

Parameters:

Name	Type	Description	Default
`block_hashes`	`list[BlockHash]`	the hashes identifying the blocks to be written.	required

Returns:

Type	Description
`list[BlockStatus]`	A list of BlockStatus for the allocated blocks.
`list[BlockStatus]`	The ref_cnt of each returned item will be -1, meaning the block
`list[BlockStatus]`	is not yet ready to be read.

Source code in vllm/v1/kv_offload/backend.py

@abstractmethod
def allocate_blocks(self,
                    block_hashes: list[BlockHash]) -> list[BlockStatus]:
    """
    Allocate space for writing blocks.
    This method assumes there is enough space for allocation.
    It is unsafe to use without checking get_num_free_blocks beforehand.

    Args:
        block_hashes: the hashes identifying the blocks to be written.

    Returns:
        A list of BlockStatus for the allocated blocks.
        The ref_cnt of each returned item will be -1, meaning the block
        is not yet ready to be read.
    """
    pass

free `abstractmethod` ¶

free(block: BlockStatus)

Free a previously allocated block. You should only call this function with blocks returned by allocate_blocks, and only once per each block.

Parameters:

Name	Type	Description	Default
`block`	`BlockStatus`	The block to be freed.	required

Source code in vllm/v1/kv_offload/backend.py

@abstractmethod
def free(self, block: BlockStatus):
    """
    Free a previously allocated block.
    You should only call this function with blocks returned by
    allocate_blocks, and only once per each block.

    Args:
        block: The block to be freed.
    """
    pass

get_load_store_spec ¶

get_load_store_spec(
    block_hashes: Iterable[BlockHash],
    blocks: Iterable[BlockStatus],
) -> LoadStoreSpec

Get backend-specific information on how to read/write blocks.

Parameters:

Name	Type	Description	Default
`block_hashes`	`Iterable[BlockHash]`	the list of block hashes identifying the blocks.	required
`blocks`	`Iterable[BlockStatus]`	the list of blocks.	required

Returns:

Type	Description
`LoadStoreSpec`	A LoadStoreSpec that can be used by a worker
`LoadStoreSpec`	to read/write the blocks.

Source code in vllm/v1/kv_offload/backend.py

def get_load_store_spec(self, block_hashes: Iterable[BlockHash],
                        blocks: Iterable[BlockStatus]) -> LoadStoreSpec:
    """
    Get backend-specific information on how to read/write blocks.

    Args:
        block_hashes: the list of block hashes identifying the blocks.
        blocks: the list of blocks.

    Returns:
        A LoadStoreSpec that can be used by a worker
        to read/write the blocks.
    """
    raise NotImplementedError

get_num_free_blocks `abstractmethod` ¶

get_num_free_blocks()

Returns the number of current number of blocks that can be allocated.

Source code in vllm/v1/kv_offload/backend.py

@abstractmethod
def get_num_free_blocks(self):
    """
    Returns the number of current number of blocks that can be allocated.
    """
    pass

BlockStatus ¶

Bases: Structure

Offloading status for a single block of KV data. Holds the following information:

ref_cnt - the current number of transfers using this block as a source. A value of -1 indicates the block is not yet ready to be read. load_store_spec - backend-specific information on how to actually read/write the block.

Source code in vllm/v1/kv_offload/backend.py

class BlockStatus(ctypes.Structure):
    """
    Offloading status for a single block of KV data.
    Holds the following information:

    ref_cnt - the current number of transfers using this block as a source.
        A value of -1 indicates the block is not yet ready to be read.
    load_store_spec - backend-specific information on how to actually
        read/write the block.
    """
    _fields_ = [("ref_cnt", ctypes.c_int32)]

    def __init__(self):
        super().__init__()
        # initialize block as "not ready" (ref_cnt = -1)
        self.ref_cnt = -1

    @property
    def is_ready(self) -> bool:
        """
        Returns whether the block is ready to be read.
        """
        return self.ref_cnt >= 0

_fields_ `class-attribute` `instance-attribute` ¶

_fields_ = [('ref_cnt', c_int32)]

is_ready `property` ¶

is_ready: bool

Returns whether the block is ready to be read.

ref_cnt `instance-attribute` ¶

ref_cnt = -1

init ¶

__init__()

Source code in vllm/v1/kv_offload/backend.py

def __init__(self):
    super().__init__()
    # initialize block as "not ready" (ref_cnt = -1)
    self.ref_cnt = -1

vllm.v1.kv_offload.backend ¶

Backend ¶

block_size instance-attribute ¶

medium instance-attribute ¶

__init__ ¶

allocate_blocks abstractmethod ¶

free abstractmethod ¶

get_load_store_spec ¶

get_num_free_blocks abstractmethod ¶

BlockStatus ¶

_fields_ class-attribute instance-attribute ¶

is_ready property ¶

ref_cnt instance-attribute ¶

__init__ ¶

block_size `instance-attribute` ¶

medium `instance-attribute` ¶

init ¶

allocate_blocks `abstractmethod` ¶

free `abstractmethod` ¶

get_num_free_blocks `abstractmethod` ¶

_fields_ `class-attribute` `instance-attribute` ¶

is_ready `property` ¶

ref_cnt `instance-attribute` ¶

init ¶