Skip to content

vllm.utils ¶

Modules:

Name	Description
`argparse_utils`	Argument parsing utilities for vLLM.
`async_utils`	Contains helpers related to asynchronous code.
`cache`
`collection_utils`	Contains helpers that are applied to collections.
`counter`
`deep_gemm`	Compatibility wrapper for DeepGEMM API changes.
`flashinfer`	Compatibility wrapper for FlashInfer API changes.
`func_utils`	Contains helpers that are applied to functions.
`gc_utils`
`hashing`
`import_utils`	Contains helpers related to importing modules.
`jsontree`	Helper functions to work with nested JSON structures.
`math_utils`	Math utility functions for vLLM.
`mem_constants`
`mem_utils`
`nccl`
`network_utils`
`platform_utils`
`profiling`
`serial_utils`
`system_utils`
`tensor_schema`
`torch_utils`

DEFAULT_MAX_NUM_BATCHED_TOKENS `module-attribute` ¶

DEFAULT_MAX_NUM_BATCHED_TOKENS = 2048

MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS `module-attribute` ¶

MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS = 5120

POOLING_MODEL_MAX_NUM_BATCHED_TOKENS `module-attribute` ¶

POOLING_MODEL_MAX_NUM_BATCHED_TOKENS = 32768

STR_BACKEND_ENV_VAR `module-attribute` ¶

STR_BACKEND_ENV_VAR: str = 'VLLM_ATTENTION_BACKEND'

STR_FLASHINFER_ATTN_VAL `module-attribute` ¶

STR_FLASHINFER_ATTN_VAL: str = 'FLASHINFER'

STR_FLASH_ATTN_VAL `module-attribute` ¶

STR_FLASH_ATTN_VAL: str = 'FLASH_ATTN'

STR_INVALID_VAL `module-attribute` ¶

STR_INVALID_VAL: str = 'INVALID'

STR_TORCH_SDPA_ATTN_VAL `module-attribute` ¶

STR_TORCH_SDPA_ATTN_VAL: str = 'TORCH_SDPA'

STR_XFORMERS_ATTN_VAL `module-attribute` ¶

STR_XFORMERS_ATTN_VAL: str = 'XFORMERS'

T `module-attribute` ¶

T = TypeVar('T')

_DEPRECATED_MAPPINGS `module-attribute` ¶

_DEPRECATED_MAPPINGS = {
    "cprofile": "profiling",
    "cprofile_context": "profiling",
    "get_open_port": "network_utils",
}

logger `module-attribute` ¶

logger = init_logger(__name__)

dir ¶

__dir__() -> list[str]

Source code in vllm/utils/__init__.py

def __dir__() -> list[str]:
    # expose deprecated names in dir() for better UX/tab-completion
    return sorted(list(globals().keys()) + list(_DEPRECATED_MAPPINGS.keys()))

getattr ¶

__getattr__(name: str) -> Any

Module-level getattr to handle deprecated utilities.

Source code in vllm/utils/__init__.py

def __getattr__(name: str) -> Any:  # noqa: D401 - short deprecation docstring
    """Module-level getattr to handle deprecated utilities."""
    if name in _DEPRECATED_MAPPINGS:
        submodule_name = _DEPRECATED_MAPPINGS[name]
        warnings.warn(
            f"vllm.utils.{name} is deprecated and will be removed in a future version. "
            f"Use vllm.utils.{submodule_name}.{name} instead.",
            DeprecationWarning,
            stacklevel=2,
        )
        module = __import__(f"vllm.utils.{submodule_name}", fromlist=[submodule_name])
        return getattr(module, name)
    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

length_from_prompt_token_ids_or_embeds ¶

length_from_prompt_token_ids_or_embeds(
    prompt_token_ids: list[int] | None,
    prompt_embeds: Tensor | None,
) -> int

Calculate the request length (in number of tokens) give either prompt_token_ids or prompt_embeds.

Source code in vllm/utils/__init__.py

def length_from_prompt_token_ids_or_embeds(
    prompt_token_ids: list[int] | None,
    prompt_embeds: torch.Tensor | None,
) -> int:
    """Calculate the request length (in number of tokens) give either
    prompt_token_ids or prompt_embeds.
    """
    prompt_token_len = None if prompt_token_ids is None else len(prompt_token_ids)
    prompt_embeds_len = None if prompt_embeds is None else len(prompt_embeds)

    if prompt_token_len is None:
        if prompt_embeds_len is None:
            raise ValueError("Neither prompt_token_ids nor prompt_embeds were defined.")
        return prompt_embeds_len
    else:
        if prompt_embeds_len is not None and prompt_embeds_len != prompt_token_len:
            raise ValueError(
                "Prompt token ids and prompt embeds had different lengths"
                f" prompt_token_ids={prompt_token_len}"
                f" prompt_embeds={prompt_embeds_len}"
            )
        return prompt_token_len

random_uuid ¶

random_uuid() -> str

Source code in vllm/utils/__init__.py

def random_uuid() -> str:
    return str(uuid.uuid4().hex)

warn_for_unimplemented_methods ¶

warn_for_unimplemented_methods(cls: type[T]) -> type[T]

A replacement for abc.ABC. When we use abc.ABC, subclasses will fail to instantiate if they do not implement all abstract methods. Here, we only require raise NotImplementedError in the base class, and log a warning if the method is not implemented in the subclass.

Source code in vllm/utils/__init__.py

def warn_for_unimplemented_methods(cls: type[T]) -> type[T]:
    """
    A replacement for `abc.ABC`.
    When we use `abc.ABC`, subclasses will fail to instantiate
    if they do not implement all abstract methods.
    Here, we only require `raise NotImplementedError` in the
    base class, and log a warning if the method is not implemented
    in the subclass.
    """

    original_init = cls.__init__

    def find_unimplemented_methods(self: object):
        unimplemented_methods = []
        for attr_name in dir(self):
            # bypass inner method
            if attr_name.startswith("_"):
                continue

            try:
                attr = getattr(self, attr_name)
                # get the func of callable method
                if callable(attr):
                    attr_func = attr.__func__
            except AttributeError:
                continue
            src = inspect.getsource(attr_func)
            if "NotImplementedError" in src:
                unimplemented_methods.append(attr_name)
        if unimplemented_methods:
            method_names = ",".join(unimplemented_methods)
            msg = f"Methods {method_names} not implemented in {self}"
            logger.debug(msg)

    @wraps(original_init)
    def wrapped_init(self, *args, **kwargs) -> None:
        original_init(self, *args, **kwargs)
        find_unimplemented_methods(self)

    type.__setattr__(cls, "__init__", wrapped_init)
    return cls