Skip to content

vllm.v1.worker.gpu.spec_decode.eagle.utils

_should_share

_should_share(
    eagle: Module, flag: str, draft, target
) -> bool

Share when the draft has no own copy, or its copy matches the target.

Source code in vllm/v1/worker/gpu/spec_decode/eagle/utils.py
def _should_share(eagle: nn.Module, flag: str, draft, target) -> bool:
    """Share when the draft has no own copy, or its copy matches the target."""

    if not getattr(eagle, flag, False) or draft is None:
        return True
    if target is None:
        return False
    # torch.equal on GPU allocates a bool mask the size of the input.
    # Use the faster GPU path when there is plenty of headroom;
    # otherwise compare on CPU.
    w = draft.weight
    if w.is_cuda and torch.cuda.mem_get_info(w.device)[0] < w.numel() * 2:
        return torch.equal(w.cpu(), target.weight.cpu())
    return torch.equal(w, target.weight)