vllm.entrypoints.pooling.scoring.protocol ¶

ScoringRequestMixin ¶

Bases: PoolingBasicRequestMixin, ClassifyRequestMixin

Source code in vllm/entrypoints/pooling/scoring/protocol.py

class ScoringRequestMixin(PoolingBasicRequestMixin, ClassifyRequestMixin):
    # --8<-- [start:scoring-common-params]
    max_tokens_per_query: int = Field(
        default=0,
        description=(
            "Maximum number of tokens per query. Queries longer than "
            "this will be truncated to this length. 0 means no "
            "query-level truncation is applied."
        ),
    )
    max_tokens_per_doc: int = Field(
        default=0,
        description=(
            "Maximum number of tokens per document. Documents longer than "
            "this will be truncated to this length. 0 means no "
            "document-level truncation is applied (only truncate_prompt_tokens "
            "applies to the combined query+document)."
        ),
    )
    instruction: str | None = Field(
        default=None,
        description=(
            "Task instruction prepended to each scored pair via the chat "
            "template. Equivalent to passing "
            "chat_template_kwargs={'instruction': ...}."
        ),
    )
    chat_template_kwargs: dict[str, Any] | None = Field(
        default=None,
        description=(
            "Additional keyword args to pass to the chat template renderer. "
            "Will be accessible by the score/rerank chat template."
        ),
    )
    # --8<-- [end:scoring-common-params]

    @model_validator(mode="after")
    def _merge_instruction_into_kwargs(self) -> "ScoringRequestMixin":
        """Fold the top-level `instruction` field into `chat_template_kwargs`.

        This allows callers to use either the convenience field or the generic
        dict.  Explicit keys inside `chat_template_kwargs` take precedence over
        the top-level `instruction` field.
        """
        if self.instruction is not None:
            merged = dict(self.chat_template_kwargs or {})
            merged.setdefault("instruction", self.instruction)
            self.chat_template_kwargs = merged
        return self

    def build_tok_params(self, model_config: ModelConfig) -> TokenizeParams:
        return self._build_pooling_tok_params(
            model_config,
            add_special_tokens=True,
            max_total_tokens=model_config.max_model_len,
            max_output_tokens=0,
        )

    def to_pooling_params(self, task: PoolingTask = "classify"):
        return PoolingParams(
            task=task,
            use_activation=self.use_activation,
        )

_merge_instruction_into_kwargs ¶

_merge_instruction_into_kwargs() -> ScoringRequestMixin

Fold the top-level instruction field into chat_template_kwargs.

This allows callers to use either the convenience field or the generic dict. Explicit keys inside chat_template_kwargs take precedence over the top-level instruction field.

Source code in vllm/entrypoints/pooling/scoring/protocol.py

@model_validator(mode="after")
def _merge_instruction_into_kwargs(self) -> "ScoringRequestMixin":
    """Fold the top-level `instruction` field into `chat_template_kwargs`.

    This allows callers to use either the convenience field or the generic
    dict.  Explicit keys inside `chat_template_kwargs` take precedence over
    the top-level `instruction` field.
    """
    if self.instruction is not None:
        merged = dict(self.chat_template_kwargs or {})
        merged.setdefault("instruction", self.instruction)
        self.chat_template_kwargs = merged
    return self