vllm.tool_parsers.minimax_m2_tool_parser ¶

MinimaxM2ToolParser ¶

Bases: ToolParser

Source code in vllm/tool_parsers/minimax_m2_tool_parser.py

class MinimaxM2ToolParser(ToolParser):
    def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
        super().__init__(tokenizer, tools)

        self.prev_tool_call_arr: list[dict] = []

        # Sentinel tokens
        self.tool_call_start_token: str = "<minimax:tool_call>"
        self.tool_call_end_token: str = "</minimax:tool_call>"

        # Streaming state
        self.is_tool_call_started: bool = False
        self.current_tool_index: int = 0

        # Regex patterns for complete parsing
        self.tool_call_complete_regex = re.compile(
            r"<minimax:tool_call>(.*?)</minimax:tool_call>", re.DOTALL
        )
        self.invoke_complete_regex = re.compile(
            r"<invoke name=(.*?)</invoke>", re.DOTALL
        )
        self.parameter_complete_regex = re.compile(
            r"<parameter name=(.*?)</parameter>", re.DOTALL
        )

        if not self.model_tokenizer:
            raise ValueError(
                "The model tokenizer must be passed to the ToolParser "
                "constructor during construction."
            )

        self.tool_call_start_token_id = self.vocab.get(self.tool_call_start_token)
        self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)

        if self.tool_call_start_token_id is None or self.tool_call_end_token_id is None:
            raise RuntimeError(
                "MiniMax M2 Tool parser could not locate tool call start/end "
                "tokens in the tokenizer!"
            )

        logger.debug(
            "vLLM Successfully import tool parser %s !", self.__class__.__name__
        )

    def _generate_tool_call_id(self) -> str:
        """Generate a unique tool call ID."""
        return f"call_{uuid.uuid4().hex[:24]}"

    def _extract_name(self, name_str: str) -> str:
        """Extract name from quoted string."""
        name_str = name_str.strip()
        if (name_str.startswith('"') and name_str.endswith('"')) or (
            name_str.startswith("'") and name_str.endswith("'")
        ):
            return name_str[1:-1]
        return name_str

    def _parse_single_invoke(
        self, invoke_str: str, tools: list | None
    ) -> ToolCall | None:
        """Parse a single <invoke> block."""
        # Extract function name
        name_match = re.search(r"^([^>]+)", invoke_str)
        if not name_match:
            return None

        function_name = self._extract_name(name_match.group(1))
        tool_properties = find_tool_properties(tools, function_name)

        # Extract parameters
        param_dict = {}
        for match in self.parameter_complete_regex.findall(invoke_str):
            param_match = re.search(r"^([^>]+)>(.*)", match, re.DOTALL)
            if param_match:
                param_name = self._extract_name(param_match.group(1))
                param_value = param_match.group(2).strip()
                param_types = extract_types_from_schema(
                    tool_properties.get(param_name, {})
                )
                param_dict[param_name] = coerce_to_schema_type(param_value, param_types)

        return ToolCall(
            type="function",
            function=FunctionCall(
                name=function_name,
                arguments=json.dumps(param_dict, ensure_ascii=False),
            ),
        )

    def _extract_delta_tool_calls(
        self,
        current_text: str,
        request: ChatCompletionRequest | None,
    ) -> list[DeltaToolCall]:
        """Extract DeltaToolCalls from newly completed <invoke> blocks.

        Tracks progress via ``current_tool_index`` so each block is
        extracted exactly once across successive streaming calls.
        """
        complete_invokes = self.invoke_complete_regex.findall(current_text)
        delta_tool_calls: list[DeltaToolCall] = []

        while len(complete_invokes) > self.current_tool_index:
            invoke_str = complete_invokes[self.current_tool_index]
            tool_call = self._parse_single_invoke(
                invoke_str,
                self.tools,
            )
            if not tool_call:
                self.current_tool_index += 1
                continue

            args_json = tool_call.function.arguments
            idx = self.current_tool_index
            self.current_tool_index += 1

            self.prev_tool_call_arr.append(
                {
                    "name": tool_call.function.name,
                    "arguments": json.loads(args_json),
                }
            )
            self.streamed_args_for_tool.append(args_json)
            delta_tool_calls.append(
                DeltaToolCall(
                    index=idx,
                    id=self._generate_tool_call_id(),
                    function=DeltaFunctionCall(
                        name=tool_call.function.name,
                        arguments=args_json,
                    ),
                    type="function",
                )
            )

        return delta_tool_calls

    def extract_tool_calls(
        self,
        model_output: str,
        request: ChatCompletionRequest,
    ) -> ExtractedToolCallInformation:
        """Extract tool calls from complete model output (non-streaming)."""
        # Quick check
        if self.tool_call_start_token not in model_output:
            return ExtractedToolCallInformation(
                tools_called=False, tool_calls=[], content=model_output
            )

        try:
            tool_calls = []

            # Find all complete tool_call blocks
            for tool_call_match in self.tool_call_complete_regex.findall(model_output):
                # Find all invokes within this tool_call
                for invoke_match in self.invoke_complete_regex.findall(tool_call_match):
                    tool_call = self._parse_single_invoke(invoke_match, self.tools)
                    if tool_call:
                        tool_calls.append(tool_call)

            if not tool_calls:
                return ExtractedToolCallInformation(
                    tools_called=False, tool_calls=[], content=model_output
                )

            # Update prev_tool_call_arr
            self.prev_tool_call_arr.clear()
            for tool_call in tool_calls:
                self.prev_tool_call_arr.append(
                    {
                        "name": tool_call.function.name,
                        "arguments": tool_call.function.arguments,
                    }
                )

            # Extract content before first tool call
            first_tool_idx = model_output.find(self.tool_call_start_token)
            content = model_output[:first_tool_idx] if first_tool_idx > 0 else None

            return ExtractedToolCallInformation(
                tools_called=True, tool_calls=tool_calls, content=content
            )

        except Exception:
            logger.exception("Error extracting tool calls")
            return ExtractedToolCallInformation(
                tools_called=False, tool_calls=[], content=model_output
            )

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],  # pylint: disable=unused-argument
        current_token_ids: Sequence[int],  # pylint: disable=unused-argument
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> DeltaMessage | None:
        """Extract tool calls from streaming model output.

        Uses a buffer-until-complete-invoke strategy: tokens are buffered
        until a complete ``<invoke>...</invoke>`` block is available, then
        parsed and emitted in one shot.
        """

        start_in_text = self.tool_call_start_token in delta_text
        start_in_ids = self.tool_call_start_token_id in delta_token_ids
        tool_call_starting = start_in_text or start_in_ids
        # Reset state on new request (parser is reused) or new tool-call block.
        if not previous_text or tool_call_starting:
            self.current_tool_index = 0
            self.prev_tool_call_arr.clear()
            self.streamed_args_for_tool.clear()
            self.is_tool_call_started = tool_call_starting

        # Pass through content before any tool call.
        if not self.is_tool_call_started:
            return DeltaMessage(content=delta_text) if delta_text else None

        # Capture content before the start token.
        content_before = None
        if start_in_text:
            before = delta_text[: delta_text.index(self.tool_call_start_token)]
            content_before = before or None

        # Extract newly completed <invoke> blocks as DeltaToolCalls.
        delta_tool_calls = self._extract_delta_tool_calls(current_text, request)

        if delta_tool_calls or content_before:
            return DeltaMessage(
                content=content_before,
                tool_calls=delta_tool_calls,
            )

        # EOS and </minimax:tool_call> both arrive as special tokens with
        # no decoded text. Return non-None for EOS so the serving framework
        # reaches the finish-reason handling path instead of skipping.
        if (
            not delta_text
            and delta_token_ids
            and self.prev_tool_call_arr
            and self.tool_call_end_token_id not in delta_token_ids
        ):
            return DeltaMessage(content="")

        return None

_extract_delta_tool_calls ¶

_extract_delta_tool_calls(
    current_text: str, request: ChatCompletionRequest | None
) -> list[DeltaToolCall]

Extract DeltaToolCalls from newly completed blocks.

Tracks progress via current_tool_index so each block is extracted exactly once across successive streaming calls.

Source code in vllm/tool_parsers/minimax_m2_tool_parser.py

def _extract_delta_tool_calls(
    self,
    current_text: str,
    request: ChatCompletionRequest | None,
) -> list[DeltaToolCall]:
    """Extract DeltaToolCalls from newly completed <invoke> blocks.

    Tracks progress via ``current_tool_index`` so each block is
    extracted exactly once across successive streaming calls.
    """
    complete_invokes = self.invoke_complete_regex.findall(current_text)
    delta_tool_calls: list[DeltaToolCall] = []

    while len(complete_invokes) > self.current_tool_index:
        invoke_str = complete_invokes[self.current_tool_index]
        tool_call = self._parse_single_invoke(
            invoke_str,
            self.tools,
        )
        if not tool_call:
            self.current_tool_index += 1
            continue

        args_json = tool_call.function.arguments
        idx = self.current_tool_index
        self.current_tool_index += 1

        self.prev_tool_call_arr.append(
            {
                "name": tool_call.function.name,
                "arguments": json.loads(args_json),
            }
        )
        self.streamed_args_for_tool.append(args_json)
        delta_tool_calls.append(
            DeltaToolCall(
                index=idx,
                id=self._generate_tool_call_id(),
                function=DeltaFunctionCall(
                    name=tool_call.function.name,
                    arguments=args_json,
                ),
                type="function",
            )
        )

    return delta_tool_calls

_extract_name ¶

_extract_name(name_str: str) -> str

Extract name from quoted string.

Source code in vllm/tool_parsers/minimax_m2_tool_parser.py

def _extract_name(self, name_str: str) -> str:
    """Extract name from quoted string."""
    name_str = name_str.strip()
    if (name_str.startswith('"') and name_str.endswith('"')) or (
        name_str.startswith("'") and name_str.endswith("'")
    ):
        return name_str[1:-1]
    return name_str

_generate_tool_call_id ¶

_generate_tool_call_id() -> str

Generate a unique tool call ID.

Source code in vllm/tool_parsers/minimax_m2_tool_parser.py

def _generate_tool_call_id(self) -> str:
    """Generate a unique tool call ID."""
    return f"call_{uuid.uuid4().hex[:24]}"

_parse_single_invoke ¶

_parse_single_invoke(
    invoke_str: str, tools: list | None
) -> ToolCall | None

Parse a single block.

Source code in vllm/tool_parsers/minimax_m2_tool_parser.py

def _parse_single_invoke(
    self, invoke_str: str, tools: list | None
) -> ToolCall | None:
    """Parse a single <invoke> block."""
    # Extract function name
    name_match = re.search(r"^([^>]+)", invoke_str)
    if not name_match:
        return None

    function_name = self._extract_name(name_match.group(1))
    tool_properties = find_tool_properties(tools, function_name)

    # Extract parameters
    param_dict = {}
    for match in self.parameter_complete_regex.findall(invoke_str):
        param_match = re.search(r"^([^>]+)>(.*)", match, re.DOTALL)
        if param_match:
            param_name = self._extract_name(param_match.group(1))
            param_value = param_match.group(2).strip()
            param_types = extract_types_from_schema(
                tool_properties.get(param_name, {})
            )
            param_dict[param_name] = coerce_to_schema_type(param_value, param_types)

    return ToolCall(
        type="function",
        function=FunctionCall(
            name=function_name,
            arguments=json.dumps(param_dict, ensure_ascii=False),
        ),
    )

extract_tool_calls ¶

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation

Extract tool calls from complete model output (non-streaming).

Source code in vllm/tool_parsers/minimax_m2_tool_parser.py

def extract_tool_calls(
    self,
    model_output: str,
    request: ChatCompletionRequest,
) -> ExtractedToolCallInformation:
    """Extract tool calls from complete model output (non-streaming)."""
    # Quick check
    if self.tool_call_start_token not in model_output:
        return ExtractedToolCallInformation(
            tools_called=False, tool_calls=[], content=model_output
        )

    try:
        tool_calls = []

        # Find all complete tool_call blocks
        for tool_call_match in self.tool_call_complete_regex.findall(model_output):
            # Find all invokes within this tool_call
            for invoke_match in self.invoke_complete_regex.findall(tool_call_match):
                tool_call = self._parse_single_invoke(invoke_match, self.tools)
                if tool_call:
                    tool_calls.append(tool_call)

        if not tool_calls:
            return ExtractedToolCallInformation(
                tools_called=False, tool_calls=[], content=model_output
            )

        # Update prev_tool_call_arr
        self.prev_tool_call_arr.clear()
        for tool_call in tool_calls:
            self.prev_tool_call_arr.append(
                {
                    "name": tool_call.function.name,
                    "arguments": tool_call.function.arguments,
                }
            )

        # Extract content before first tool call
        first_tool_idx = model_output.find(self.tool_call_start_token)
        content = model_output[:first_tool_idx] if first_tool_idx > 0 else None

        return ExtractedToolCallInformation(
            tools_called=True, tool_calls=tool_calls, content=content
        )

    except Exception:
        logger.exception("Error extracting tool calls")
        return ExtractedToolCallInformation(
            tools_called=False, tool_calls=[], content=model_output
        )

extract_tool_calls_streaming ¶

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> DeltaMessage | None

Extract tool calls from streaming model output.

Uses a buffer-until-complete-invoke strategy: tokens are buffered until a complete <invoke>...</invoke> block is available, then parsed and emitted in one shot.

Source code in vllm/tool_parsers/minimax_m2_tool_parser.py

def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],  # pylint: disable=unused-argument
    current_token_ids: Sequence[int],  # pylint: disable=unused-argument
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> DeltaMessage | None:
    """Extract tool calls from streaming model output.

    Uses a buffer-until-complete-invoke strategy: tokens are buffered
    until a complete ``<invoke>...</invoke>`` block is available, then
    parsed and emitted in one shot.
    """

    start_in_text = self.tool_call_start_token in delta_text
    start_in_ids = self.tool_call_start_token_id in delta_token_ids
    tool_call_starting = start_in_text or start_in_ids
    # Reset state on new request (parser is reused) or new tool-call block.
    if not previous_text or tool_call_starting:
        self.current_tool_index = 0
        self.prev_tool_call_arr.clear()
        self.streamed_args_for_tool.clear()
        self.is_tool_call_started = tool_call_starting

    # Pass through content before any tool call.
    if not self.is_tool_call_started:
        return DeltaMessage(content=delta_text) if delta_text else None

    # Capture content before the start token.
    content_before = None
    if start_in_text:
        before = delta_text[: delta_text.index(self.tool_call_start_token)]
        content_before = before or None

    # Extract newly completed <invoke> blocks as DeltaToolCalls.
    delta_tool_calls = self._extract_delta_tool_calls(current_text, request)

    if delta_tool_calls or content_before:
        return DeltaMessage(
            content=content_before,
            tool_calls=delta_tool_calls,
        )

    # EOS and </minimax:tool_call> both arrive as special tokens with
    # no decoded text. Return non-None for EOS so the serving framework
    # reaches the finish-reason handling path instead of skipping.
    if (
        not delta_text
        and delta_token_ids
        and self.prev_tool_call_arr
        and self.tool_call_end_token_id not in delta_token_ids
    ):
        return DeltaMessage(content="")

    return None