vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser ¶

logger `module-attribute` ¶

logger = init_logger(__name__)

Qwen3XMLToolParser ¶

Bases: ToolParser

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

@ToolParserManager.register_module("qwen3_xml")
class Qwen3XMLToolParser(ToolParser):

    def __init__(self, tokenizer: AnyTokenizer):
        super().__init__(tokenizer)
        self.parser = StreamingXMLToolCallParser()

        logger.info("vLLM Successfully import tool parser %s !",
                    self.__class__.__name__)

    def extract_tool_calls(
        self,
        model_output: str,
        request: ChatCompletionRequest,
    ) -> ExtractedToolCallInformation:
        self.parser.reset_streaming_state()
        if request:
            self.parser.set_tools(request.tools)
        result = self.parser.parse_single_streaming_chunks(model_output)
        if not result.tool_calls:
            return ExtractedToolCallInformation(
                tool_calls=[],
                tools_called=False,
                content=result.content,
            )
        else:
            tool_calls = []
            for tool_call in result.tool_calls:
                if tool_call.function and tool_call.function.name:
                    tool_calls.append(
                        ToolCall(
                            id=tool_call.id,
                            type=tool_call.type,
                            function=FunctionCall(
                                name=tool_call.function.name,
                                arguments=tool_call.function.arguments,
                            ),
                        ))
            return ExtractedToolCallInformation(
                tool_calls=tool_calls,
                tools_called=len(tool_calls) > 0,
                content=result.content,
            )

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Union[DeltaMessage, None]:
        if not previous_text:
            self.parser.reset_streaming_state()
            if request:
                self.parser.set_tools(request.tools)

        # Model sometimes outputs separately causing delta_text to be empty.
        # If there were tool_calls before and all current tool_calls have ended,
        # return an empty tool_call for outer streaming output
        # to correctly output tool_call field
        if not delta_text and delta_token_ids:
            open_calls = current_text.count(
                self.parser.tool_call_start_token) - current_text.count(
                    self.parser.tool_call_end_token)
            if open_calls == 0 and self.parser.tool_call_index > 0:
                # If current_call_id is None, use last_completed_call_id
                call_id = self.parser.current_call_id or \
                    self.parser.last_completed_call_id
                return DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=self.parser.tool_call_index - 1,
                        id=call_id,
                        function=DeltaFunctionCall(arguments=''),
                        type='function',
                    )
                ])

        return self.parser.parse_single_streaming_chunks(delta_text)

parser `instance-attribute` ¶

parser = StreamingXMLToolCallParser()

init ¶

__init__(tokenizer: AnyTokenizer)

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def __init__(self, tokenizer: AnyTokenizer):
    super().__init__(tokenizer)
    self.parser = StreamingXMLToolCallParser()

    logger.info("vLLM Successfully import tool parser %s !",
                self.__class__.__name__)

extract_tool_calls ¶

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def extract_tool_calls(
    self,
    model_output: str,
    request: ChatCompletionRequest,
) -> ExtractedToolCallInformation:
    self.parser.reset_streaming_state()
    if request:
        self.parser.set_tools(request.tools)
    result = self.parser.parse_single_streaming_chunks(model_output)
    if not result.tool_calls:
        return ExtractedToolCallInformation(
            tool_calls=[],
            tools_called=False,
            content=result.content,
        )
    else:
        tool_calls = []
        for tool_call in result.tool_calls:
            if tool_call.function and tool_call.function.name:
                tool_calls.append(
                    ToolCall(
                        id=tool_call.id,
                        type=tool_call.type,
                        function=FunctionCall(
                            name=tool_call.function.name,
                            arguments=tool_call.function.arguments,
                        ),
                    ))
        return ExtractedToolCallInformation(
            tool_calls=tool_calls,
            tools_called=len(tool_calls) > 0,
            content=result.content,
        )

extract_tool_calls_streaming ¶

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]:
    if not previous_text:
        self.parser.reset_streaming_state()
        if request:
            self.parser.set_tools(request.tools)

    # Model sometimes outputs separately causing delta_text to be empty.
    # If there were tool_calls before and all current tool_calls have ended,
    # return an empty tool_call for outer streaming output
    # to correctly output tool_call field
    if not delta_text and delta_token_ids:
        open_calls = current_text.count(
            self.parser.tool_call_start_token) - current_text.count(
                self.parser.tool_call_end_token)
        if open_calls == 0 and self.parser.tool_call_index > 0:
            # If current_call_id is None, use last_completed_call_id
            call_id = self.parser.current_call_id or \
                self.parser.last_completed_call_id
            return DeltaMessage(tool_calls=[
                DeltaToolCall(
                    index=self.parser.tool_call_index - 1,
                    id=call_id,
                    function=DeltaFunctionCall(arguments=''),
                    type='function',
                )
            ])

    return self.parser.parse_single_streaming_chunks(delta_text)

StreamingXMLToolCallParser ¶

Simplified streaming XML tool call parser Supports streaming input, parsing, and output

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

class StreamingXMLToolCallParser:
    """
    Simplified streaming XML tool call parser
    Supports streaming input, parsing, and output
    """

    def __init__(self):
        self.reset_streaming_state()

        # Tool configuration information
        self.tools: Union[list[ChatCompletionToolsParam], None] = None
        self.tool_call_start_token: str = '<tool_call>'
        self.tool_call_end_token: str = '</tool_call>'
        self.function_start_token: str = '<function='
        self.function_end_token: str = '</function>'
        self.parameter_start_token: str = '<parameter='
        self.parameter_end_token: str = '</parameter>'

    def reset_streaming_state(self):
        """Reset streaming parsing state"""

        self.deltas = []
        # state for streaming
        self.tool_call_index = 0
        self.current_call_id = None
        self.last_completed_call_id = None
        self.current_function_name = None
        self.current_function_open = False
        self.parameters = {}
        self.current_param_name = None
        self.current_param_value = ''
        self.current_param_value_converted = ''
        self.current_param_is_first = False
        self.should_emit_end_newline = False
        self.start_quote_emitted = False

        self.streaming_buffer = ''
        self.last_processed_pos = 0

        self.text_content_buffer = ''

        # state for preprocessing and deferred parsing
        self._pre_inside_parameter = False
        self._pre_param_buffer = ""
        self._pre_current_param_name = None
        self.defer_current_parameter = False
        self.deferred_param_raw_value = ""

        # recreate parser
        self.parser = ParserCreate()
        self.setup_parser()

    def parse_single_streaming_chunks(self, xml_chunk: str) -> DeltaMessage:
        """
        Parse single streaming XML chunk and return Delta response
        This is the actual streaming interface that receives chunks 
        one by one and maintains internal state

        Args:
            xml_chunk: Single XML chunk string
        Returns:
            DeltaMessage: Contains delta information generated by this chunk, 
            returns empty response if no complete elements
        """
        # Record delta count before processing
        initial_delta_count = len(self.deltas)

        self.streaming_buffer += xml_chunk

        found_elements = self._process_complete_xml_elements()

        if found_elements:
            # If complete elements found, check if end events were missed
            # some tags may not have been triggered
            try:
                new_deltas = self.deltas[initial_delta_count:]
                # If this chunk contains </function>
                # but didn't generate '}', then complete it
                if (self.current_call_id is not None
                        and self.function_end_token in xml_chunk):

                    # - Added '}' (non-empty parameter ending)
                    # - Added '{}' (empty parameter function)
                    has_function_close = any((td.tool_calls and any(
                        (tc.function and tc.id == self.current_call_id
                         and isinstance(tc.function.arguments, str) and
                         (tc.function.arguments in ('}', '{}')))
                        for tc in td.tool_calls)) for td in new_deltas)
                    if not has_function_close:
                        # Close potentially unclosed element
                        if self.current_param_name:
                            self._end_element('parameter')
                        if self.current_function_name:
                            self._end_element('function')
                # If this chunk contains </tool_call>
                # but didn't generate final empty delta, then complete it
                if (self.current_call_id is not None
                        and self.tool_call_end_token in xml_chunk):
                    has_toolcall_close = any((td.tool_calls and any(
                        (tc.type == 'function' and tc.function and tc.function.
                         arguments == '' and tc.id == self.current_call_id)
                        for tc in td.tool_calls)) for td in new_deltas)
                    if not has_toolcall_close:
                        # Close potentially unclosed element
                        if self.current_param_name:
                            self._end_element('parameter')
                        if self.current_function_name:
                            self._end_element('function')
                        self._end_element('tool_call')
            except Exception as e:
                logger.warning("Error with fallback parsing: %s", e)
            # Merge newly generated deltas into single response
            result_delta = self._merge_new_deltas_to_single_response(
                initial_delta_count)
            return result_delta
        else:
            # No complete elements, check if there's unoutput text content
            if self.text_content_buffer and self.tool_call_index == 0:
                # Has text content but no tool_call yet, output text content
                text_delta = DeltaMessage(content=self.text_content_buffer)
                self._emit_delta(text_delta)
                # Clear buffer to avoid duplicate output
                self.text_content_buffer = ''
                return text_delta

            # If this chunk contains end tags but wasn't triggered by parser,
            # manually complete end events
            # Only execute when still on the same call as when entered,
            # to prevent accidentally closing new calls
            # in multi <tool_call> scenarios
            if (self.current_call_id is not None
                    and (self.function_end_token in xml_chunk
                         or self.tool_call_end_token in xml_chunk)):
                # Close potentially unclosed element
                if self.current_param_name:
                    self._end_element('parameter')
                if self.function_end_token in xml_chunk and \
                    self.current_function_name:
                    self._end_element('function')
                if self.tool_call_end_token in xml_chunk:
                    self._end_element('tool_call')
                # Return the merged delta result generated by this fallback
                result_delta = self._merge_new_deltas_to_single_response(
                    initial_delta_count)
                return result_delta

            # No complete elements, return empty response
            return DeltaMessage(content=None)

    def _escape_xml_special_chars(self, text: str) -> str:
        """
        Escape XML special characters
        Args:
            text: Original text
        Returns:
            Escaped text
        """
        xml_escapes = {
            '&': '&amp;',
            '<': '&lt;',
            '>': '&gt;',
            '"': '&quot;',
            "'": '&apos;'
        }

        for char, escape in xml_escapes.items():
            text = text.replace(char, escape)

        return text

    def _process_complete_xml_elements(self) -> bool:
        """
        Process complete XML elements in buffer

        Returns:
            bool: Whether complete elements were found and processed
        """
        found_any = False

        while self.last_processed_pos < len(self.streaming_buffer):
            # Find next complete xml element
            element, end_pos = self._find_next_complete_element(
                self.last_processed_pos)
            if element is None:
                # No complete element found, wait for more data
                break

            # Check if this element should be skipped
            if self._should_skip_element(element):
                self.last_processed_pos = end_pos
                continue

            # Found complete XML element, process it
            try:
                preprocessed_element = self._preprocess_xml_chunk(element)
                # Check if this is the first tool_call start
                if ((preprocessed_element.strip().startswith('<tool_call>') or
                     preprocessed_element.strip().startswith('<function name=')
                     ) and self.tool_call_index
                        == 0) and self.text_content_buffer:
                    # First tool_call starts,
                    # output previously collected text content first
                    text_delta = DeltaMessage(content=self.text_content_buffer)
                    self._emit_delta(text_delta)
                    # Clear buffer for potential subsequent text content
                    self.text_content_buffer = ''

                # If a new tool_call starts and
                # there are already completed tool_calls
                if (preprocessed_element.strip().startswith('<tool_call>')
                        and self.tool_call_index > 0 and self.current_call_id):
                    # Reset parser state but preserve generated deltas
                    if self.current_param_name:
                        self._end_element('parameter')
                    if self.current_function_open or self.current_function_name:
                        self._end_element('function')
                    # Output final tool_call tail delta
                    final_delta = DeltaMessage(
                        role=None,
                        content=None,
                        reasoning_content=None,
                        tool_calls=[
                            DeltaToolCall(index=self.tool_call_index - 1,
                                          id=self.current_call_id,
                                          type='function',
                                          function=DeltaFunctionCall(
                                              name=None, arguments=''))
                        ])
                    self._emit_delta(final_delta)
                    # Reset XML parser and current call state
                    self._reset_xml_parser_after_tool_call()
                # Parse preprocessed element
                self.parser.Parse(preprocessed_element, False)
                found_any = True

            except Exception as e:
                logger.warning("Error when parsing XML elements: %s", e)

            # Update processed position
            self.last_processed_pos = end_pos

        return found_any

    def _should_skip_element(self, element: str) -> bool:
        """
        Determine whether an element should be skipped

        Args:
            element: Element to evaluate

        Returns:
            bool: True means should skip, False means should process
        """

        # If it's a tool_call XML tag, don't skip
        if element.startswith(
                self.tool_call_start_token) or element.startswith(
                    self.function_start_token) or element.startswith(
                        self.parameter_start_token):
            return False

        # If currently not parsing tool calls and not blank,
        # collect this text instead of skipping
        # Only process other XML elements after tool_call appears,
        # otherwise treat as plain text
        if self.current_call_id is None and element:
            # Collect text content to buffer
            self.text_content_buffer += element
            return True  # Still skip, but content has been collected

        # If currently parsing tool calls,
        # this might be parameter value, don't skip
        if self.current_call_id is not None:
            return False

        # Skip blank content
        return not element

    def _find_next_complete_element(
            self, start_pos: int) -> tuple[Optional[str], int]:
        """
        Find next complete XML element from specified position

        Args:
            start_pos: Position to start searching

        Returns:
            (Complete element string, element end position), 
            returns (None, start_pos) if no complete element found
        """
        buffer = self.streaming_buffer[start_pos:]

        if not buffer:
            return None, start_pos

        if buffer.startswith('<'):
            # Need to ensure no new < appears,
            # find the nearest one between < and >
            tag_end = buffer.find('<', 1)
            tag_end2 = buffer.find('>', 1)
            if tag_end != -1 and tag_end2 != -1:
                # Next nearest is <
                if tag_end < tag_end2:
                    return buffer[:tag_end], start_pos + tag_end
                # Next nearest is >, means found XML element
                else:
                    return buffer[:tag_end2 + 1], start_pos + tag_end2 + 1
            elif tag_end != -1:
                return buffer[:tag_end], start_pos + tag_end
            elif tag_end2 != -1:
                return buffer[:tag_end2 + 1], start_pos + tag_end2 + 1
            else:
                # If currently not parsing tool calls (entering a tool_call),
                # check if starts with <tool_call>
                if self.current_call_id is None:
                    # Check if might be start of <tool_call>
                    if buffer == '<tool_call>'[:len(buffer)]:
                        # Might be start of <tool_call>, wait for more data
                        return None, start_pos
                    else:
                        # Not start of <tool_call>, treat as text
                        return buffer, start_pos + len(buffer)
                else:
                    # When parsing tool calls,
                    # wait for more data to get complete tag
                    return None, start_pos
        else:
            # Find text content (until next < or buffer end)
            next_tag_pos = buffer.find('<')
            if next_tag_pos != -1:
                # Found text content
                text_content = buffer[:next_tag_pos]
                return text_content, start_pos + next_tag_pos
            else:
                # Buffer end is all text, process
                # (no longer wait for more data)
                remaining = buffer
                return remaining, start_pos + len(remaining)

    def _merge_new_deltas_to_single_response(
            self, initial_count: int) -> DeltaMessage:
        """
        Merge newly generated deltas from this processing
        into a single DeltaMessage

        Args:
            initial_count: Delta count before processing

        Returns:
            Merged DeltaMessage containing all newly generated delta information
        """
        if len(self.deltas) <= initial_count:
            return DeltaMessage(content=None)

        # Get newly generated deltas
        new_deltas = self.deltas[initial_count:]

        if len(new_deltas) == 1:
            # Only one new delta, return directly
            return new_deltas[0]

        # Merge multiple new deltas
        merged_tool_calls: list[DeltaToolCall] = []
        merged_content: str = ''

        for delta in new_deltas:
            if delta.content:
                merged_content += delta.content
            if delta.tool_calls:
                # For tool_calls, we need to intelligently merge arguments
                for tool_call in delta.tool_calls:
                    # Find if there's already a tool_call with the same call_id
                    existing_call = None
                    for existing in merged_tool_calls:
                        if existing.id == tool_call.id:
                            existing_call = existing
                            break

                    if existing_call and existing_call.function:
                        # Merge to existing tool_call
                        if tool_call.function and tool_call.function.name:
                            existing_call.function.name = \
                                tool_call.function.name
                        if tool_call.function \
                            and tool_call.function.arguments is not None:
                            if existing_call.function.arguments is None:
                                existing_call.function.arguments = ''

                            # For streaming JSON parameters,
                            # simply concatenate in order
                            new_args = tool_call.function.arguments
                            existing_call.function.arguments += new_args
                        if tool_call.type:
                            existing_call.type = tool_call.type
                    else:
                        # Add new tool_call
                        merged_tool_calls.append(tool_call)

        return DeltaMessage(content=merged_content if merged_content else None,
                            tool_calls=merged_tool_calls)

    def _preprocess_xml_chunk(self, chunk: str) -> str:
        """
        Preprocess XML chunk, handle non-standard formats, 
        and escape special characters

        Args:
            chunk: Original XML chunk

        Returns:
            Processed XML chunk
        """

        # Check if this is a tool_call related element
        is_tool_call = False
        if chunk.startswith(self.tool_call_start_token) or chunk.startswith(
                self.tool_call_end_token):
            is_tool_call = True
        if chunk.startswith(self.function_start_token) or chunk.startswith(
                self.function_end_token):
            is_tool_call = True
        if chunk.startswith(self.parameter_start_token) or chunk.startswith(
                self.parameter_end_token):
            is_tool_call = True
        # Handle <function=name> format -> <function name="name">
        processed = re.sub(r'<function=([^>]+)>', r'<function name="\1">',
                           chunk)
        # Handle <parameter=name> format -> <parameter name="name">
        processed = re.sub(r'<parameter=([^>]+)>', r'<parameter name="\1">',
                           processed)

        original_chunk = chunk
        # If in parameter value accumulation mode
        if self._pre_inside_parameter:
            # Parameter end: output accumulated raw text
            # safely then return </parameter>
            if processed.startswith('</parameter>'):
                body_text = self._pre_param_buffer
                # Trigger deferred parsing mode
                # literal_eval+json output in end_element
                self.defer_current_parameter = True
                self.deferred_param_raw_value = body_text
                # Clean up state
                self._pre_inside_parameter = False
                self._pre_param_buffer = ""
                self._pre_current_param_name = None
                safe_text = self._escape_xml_special_chars(body_text)
                return f"{safe_text}</parameter>"
            else:
                # If this is the first block of content after entering parameter
                # evaluate if deferred parsing is needed;
                # If not needed, exit accumulation mode
                # and pass through directly
                if self._pre_param_buffer == "":
                    # Get current parameter type
                    param_type = self._get_param_type(
                        self._pre_current_param_name
                    ) if self._pre_current_param_name else 'string'
                    # Only these types need deferred parsing to
                    # handle Python literals containing single quotes
                    is_object_type = param_type in ["object"]
                    is_complex_type = (param_type
                                       in ["array", "arr", "sequence"]
                                       or param_type.startswith("dict")
                                       or param_type.startswith("list"))

                    # Only delay when contains container symbols
                    # and has single quotes and is complex type
                    has_container_hint = ('[' in original_chunk) or (
                        '{' in original_chunk) or ('(' in original_chunk)

                    # Determine if deferred parsing is needed
                    need_defer = False
                    if is_complex_type:
                        # Complex type, always need deferred parsing
                        need_defer = True
                    elif is_object_type and has_container_hint and (
                            "'" in original_chunk):
                        # Object type with container symbols
                        # and single quotes, need deferred parsing
                        need_defer = True

                    if not need_defer:
                        # No need for deferred parsing,
                        # exit parameter mode directly
                        self._pre_inside_parameter = False
                        return self._escape_xml_special_chars(original_chunk)
                self._pre_param_buffer += original_chunk
                return ""

        # Parameter start: enable accumulation
        if processed.startswith('<parameter name='):
            m = re.match(r'<parameter name="([^"]+)">', processed)
            if m:
                self._pre_current_param_name = m.group(1)
            self._pre_inside_parameter = True
            self._pre_param_buffer = ""
            return processed

        # If processed doesn't contain special_token, escape processed
        # This is because XML parsing encounters special characters
        # and reports errors, so escaping is needed
        if not is_tool_call:
            processed = self._escape_xml_special_chars(processed)
        return processed

    def _emit_delta(self, delta: DeltaMessage):
        """Emit Delta response (streaming output)"""
        self.deltas.append(delta)

    def _auto_close_open_parameter_if_needed(self,
                                             incoming_tag: Optional[str] = None
                                             ):
        """Before starting to process new elements, 
        if there are unclosed tags from before, 
        automatically complete their endings to the parser.
        - If there are unclosed parameters, 
        it's equivalent to feeding `</parameter>`
        - When about to start a new function or tool_call, 
        if there are unclosed functions, complete `</function>`.
        - When about to start a new tool_call, 
        if there are unclosed tool_calls, complete `</tool_call>`.
        """
        # First close unclosed parameters
        if self.current_param_name:
            self._end_element('parameter')

        # If about to start new function or tool_call,
        # and there are unclosed functions, close function first
        if incoming_tag in ('function',
                            'tool_call') and self.current_function_name:
            self._end_element('function')

        # If about to start new tool_call,
        # and there are unclosed tool_calls, close tool_call first
        if incoming_tag == 'tool_call' and self.current_call_id:
            self._end_element('tool_call')

    def _start_element(self, name: str, attrs: dict[str, str]):
        """Handle XML start element events"""

        if name == 'root':
            return

        if name == 'tool_call':
            # Before opening new tool_call,
            # automatically complete previous unclosed tags
            self._auto_close_open_parameter_if_needed('tool_call')

            self.parameters = {}
            self.current_call_id = self._get_next_call_id()
            self.current_param_is_first = True
            self.tool_call_index += 1
        elif name.startswith('function') or (name == 'function'):
            # If missing tool_call, manually complete
            if not self.current_call_id:
                self._start_element('tool_call', {})
            # Before opening new function,
            # automatically complete previous unclosed tags (parameter/function)
            self._auto_close_open_parameter_if_needed('function')
            function_name = self._extract_function_name(name, attrs)
            self.current_function_name = function_name
            self.current_function_open = True
            if function_name:
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.tool_call_index - 1,
                                  id=self.current_call_id,
                                  type='function',
                                  function=DeltaFunctionCall(
                                      name=function_name, arguments=''))
                ])
                self._emit_delta(delta)
        elif name.startswith('parameter') or (name == 'parameter'):
            # If previous parameter hasn't ended normally,
            # complete its end first, then start new parameter
            self._auto_close_open_parameter_if_needed('parameter')
            param_name = self._extract_parameter_name(name, attrs)
            self.current_param_name = param_name
            self.current_param_value = ''
            self.current_param_value_converted = ''
            self.start_quote_emitted = False  # Reset start quote flag

            # Only output parameter name and colon,
            # don't output quotes
            # decide after parameter value type is determined
            if param_name:
                if not self.parameters:
                    # First parameter
                    # start JSON, only output parameter name and colon
                    json_start = f'{{"{param_name}": '
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.tool_call_index - 1,
                                      id=self.current_call_id,
                                      type='function',
                                      function=DeltaFunctionCall(
                                          name=None, arguments=json_start))
                    ])
                    self._emit_delta(delta)
                    self.current_param_is_first = True
                else:
                    # Subsequent parameters
                    # add comma and parameter name, no quotes
                    json_continue = f', "{param_name}": '
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.tool_call_index - 1,
                                      id=self.current_call_id,
                                      type='function',
                                      function=DeltaFunctionCall(
                                          name=None, arguments=json_continue))
                    ])
                    self._emit_delta(delta)
                    self.current_param_is_first = False

    def _char_data(self, data: str):
        """Handle XML character data events"""
        if data and self.current_param_name:
            # If preprocessing stage determines deferred parsing is needed,
            # only cache character data, no streaming output
            if self.defer_current_parameter:
                original_data = data
                if self.should_emit_end_newline:
                    original_data = '\n' + original_data
                    self.should_emit_end_newline = False
                if original_data.endswith('\n'):
                    self.should_emit_end_newline = True
                    original_data = original_data[:-1]
                self.current_param_value += original_data
                return

            param_type = self._get_param_type(self.current_param_name)

            # Check if this is the first time receiving data for this parameter
            # If this is the first packet of data and starts with \n, remove \n
            if not self.current_param_value and data.startswith('\n'):
                data = data[1:]

            # Output start quote for string type (if not already output)
            if (param_type
                    in ['string', 'str', 'text', 'varchar', 'char', 'enum']
                    and not self.start_quote_emitted):
                quote_delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.tool_call_index - 1,
                                  id=self.current_call_id,
                                  type='function',
                                  function=DeltaFunctionCall(name=None,
                                                             arguments='"'))
                ])
                self._emit_delta(quote_delta)
                self.start_quote_emitted = True

            if not data:
                return

            original_data = data
            # Delay output of trailing newline
            if self.should_emit_end_newline:
                original_data = '\n' + original_data
                self.should_emit_end_newline = False
            if original_data.endswith('\n'):
                self.should_emit_end_newline = True
                original_data = original_data[:-1]
            self.current_param_value += original_data

            # convert parameter value by param_type
            converted_value = self._convert_param_value(
                self.current_param_value, param_type)
            output_data = self._convert_for_json_streaming(
                converted_value, param_type)

            delta_data = output_data[len(self.current_param_value_converted):]
            self.current_param_value_converted = output_data

            delta = DeltaMessage(tool_calls=[
                DeltaToolCall(index=self.tool_call_index - 1,
                              id=self.current_call_id,
                              type='function',
                              function=DeltaFunctionCall(name=None,
                                                         arguments=delta_data))
            ])
            self._emit_delta(delta)

    def _end_element(self, name: str):
        """Handle XML end element events"""

        if name == 'root':
            return

        # If function or tool_call ends and there are still unclosed parameters,
        # complete parameter end first
        if (name.startswith('function') or name == 'function'
                or name == 'tool_call') and self.current_param_name:
            self._auto_close_open_parameter_if_needed()

        if (name.startswith('parameter')
                or name == 'parameter') and self.current_param_name:
            # End current parameter
            param_name = self.current_param_name
            param_value = self.current_param_value

            # If in deferred parsing mode,
            # perform overall parsing on raw content
            # accumulated in preprocessing stage and output once
            if self.defer_current_parameter:
                raw_text = self.deferred_param_raw_value \
                if self.deferred_param_raw_value else param_value
                parsed_value = None
                output_arguments = None
                try:
                    # If previously delayed trailing newline,
                    # add it back before parsing
                    if self.should_emit_end_newline:
                        raw_for_parse = raw_text + '\n'
                    else:
                        raw_for_parse = raw_text
                    parsed_value = ast.literal_eval(raw_for_parse)
                    output_arguments = json.dumps(parsed_value,
                                                  ensure_ascii=False)
                except Exception:
                    # Fallback: output as string as-is
                    output_arguments = json.dumps(raw_text, ensure_ascii=False)
                    parsed_value = raw_text

                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.tool_call_index - 1,
                                  id=self.current_call_id,
                                  type='function',
                                  function=DeltaFunctionCall(
                                      name=None, arguments=output_arguments))
                ])
                self._emit_delta(delta)

                # Clean up and store
                self.should_emit_end_newline = False
                self.parameters[param_name] = parsed_value
                self.current_param_name = None
                self.current_param_value = ""
                self.current_param_value_converted = ""
                self.start_quote_emitted = False
                self.defer_current_parameter = False
                self.deferred_param_raw_value = ""
                return

            param_type = self._get_param_type(param_name)

            # convert complete parameter value by param_type
            converted_value = self._convert_param_value(
                param_value, param_type)

            # Decide whether to add end quote based on parameter type
            if param_type in [
                    'string', 'str', 'text', 'varchar', 'char', 'enum'
            ]:
                # For empty string parameters, need special handling
                if not param_value and not self.start_quote_emitted:
                    # No start quote output,
                    # directly output complete empty string
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.tool_call_index - 1,
                                      id=self.current_call_id,
                                      type='function',
                                      function=DeltaFunctionCall(
                                          name=None, arguments='""'))
                    ])
                    self._emit_delta(delta)
                else:
                    # Non-empty parameter value, output end quote
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.tool_call_index - 1,
                                      id=self.current_call_id,
                                      type='function',
                                      function=DeltaFunctionCall(
                                          name=None, arguments='"'))
                    ])
                    self._emit_delta(delta)

            self.should_emit_end_newline = False
            # Store converted value
            self.parameters[param_name] = converted_value
            self.current_param_name = None
            self.current_param_value = ''
            self.current_param_value_converted = ''
            self.start_quote_emitted = False

        elif name.startswith('function') or name == 'function':
            # if there are parameters, close JSON object
            if self.parameters:
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.tool_call_index - 1,
                                  id=self.current_call_id,
                                  type='function',
                                  function=DeltaFunctionCall(name=None,
                                                             arguments='}'))
                ])
                self._emit_delta(delta)
            # return empty object
            else:
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.tool_call_index - 1,
                                  id=self.current_call_id,
                                  type='function',
                                  function=DeltaFunctionCall(name=None,
                                                             arguments='{}'))
                ])
                self._emit_delta(delta)
            self.current_function_open = False

        elif name == 'tool_call':
            # Before ending tool_call,
            # ensure function is closed to complete missing right brace
            if self.current_function_open:
                # If there are still unclosed parameters, close them first
                if self.current_param_name:
                    self._end_element('parameter')
                # Close function, ensure output '}' or '{}'
                self._end_element('function')
            # Final Delta
            delta = DeltaMessage(tool_calls=[
                DeltaToolCall(index=self.tool_call_index - 1,
                              id=self.current_call_id,
                              type='function',
                              function=DeltaFunctionCall(name=None,
                                                         arguments=''))
            ])
            self._emit_delta(delta)

            # Check if there's text content to output (between tool_calls)
            if self.text_content_buffer.strip():
                text_delta = DeltaMessage(content=self.text_content_buffer)
                self._emit_delta(text_delta)

            self._reset_xml_parser_after_tool_call()

    def setup_parser(self):
        """Set up XML parser event handlers"""
        self.parser.buffer_text = True
        self.parser.StartElementHandler = self._start_element
        self.parser.EndElementHandler = self._end_element
        self.parser.CharacterDataHandler = self._char_data

    def set_tools(self, tools: Union[list[ChatCompletionToolsParam], None]):
        """Set tool configuration information"""
        self.tools = tools

    def _get_next_call_id(self):
        """Generate unique call ID"""
        return f'call_{uuid.uuid4().hex[:24]}'

    def _extract_function_name(self, name: str,
                               attrs: dict[str, str]) -> Optional[str]:
        """Extract function name from various formats"""
        if attrs and 'name' in attrs:
            return attrs['name']

        if '=' in name:
            parts = name.split('=', 1)
            if len(parts) == 2 and parts[0] == 'function':
                return parts[1]

        return None

    def _extract_parameter_name(self, name: str,
                                attrs: dict[str, str]) -> Optional[str]:
        """Extract parameter name from various formats"""
        if attrs and 'name' in attrs:
            return attrs['name']

        if '=' in name:
            parts = name.split('=', 1)
            if len(parts) == 2 and parts[0] == 'parameter':
                return parts[1]

        return None

    def _get_param_type(self, param_name: str) -> str:
        """Get parameter type based on tool configuration, defaults to string
        Args:
            param_name: Parameter name

        Returns:
            Parameter type
        """
        if not self.tools or not self.current_function_name:
            return 'string'

        for tool in self.tools:
            if not hasattr(tool, 'type') or not (hasattr(
                    tool, 'function') and hasattr(tool.function, 'name')):
                continue
            if tool.type == 'function' and \
                tool.function.name == self.current_function_name:
                if not hasattr(tool.function, 'parameters'):
                    return 'string'
                params = tool.function.parameters
                if isinstance(params, dict) and 'properties' in params:
                    properties = params['properties']
                    if param_name in properties and isinstance(
                            properties[param_name], dict):
                        return self.repair_param_type(
                            str(properties[param_name].get('type', 'string')))
                elif isinstance(params, dict) and param_name in params:
                    param_config = params[param_name]
                    if isinstance(param_config, dict):
                        return self.repair_param_type(
                            str(param_config.get('type', 'string')))
                break
        return 'string'

    def repair_param_type(self, param_type: str) -> str:
        """Repair unknown parameter types by treating them as string
        Args:
            param_type: Parameter type

        Returns:
            Repaired parameter type
        """
        if param_type in [
                'string', 'str', 'text', 'varchar', 'char', 'enum'
        ] or param_type.startswith('int') or param_type.startswith(
                'uint'
        ) or param_type.startswith('long') or param_type.startswith(
                'short'
        ) or param_type.startswith('unsigned') or param_type.startswith(
                'num') or param_type.startswith('float') or param_type in [
                    'boolean', 'bool', 'binary'
                ] or (param_type in ["object", "array", "arr", "sequence"]
                      or param_type.startswith("dict")
                      or param_type.startswith("list")):
            return param_type
        else:
            return 'string'

    def _convert_param_value(self, param_value: str, param_type: str) -> Any:
        """Convert value based on parameter type
        Args:
            param_value: Parameter value
            param_type: Parameter type

        Returns:
            Converted value
        """
        if param_value.lower() == 'null':
            return None

        param_type = param_type.strip().lower()
        if param_type in ['string', 'str', 'text', 'varchar', 'char', 'enum']:
            return param_value
        elif (param_type.startswith('int') or param_type.startswith('uint')
              or param_type.startswith('long')
              or param_type.startswith('short')
              or param_type.startswith('unsigned')):
            try:
                return int(param_value)
            except (ValueError, TypeError):
                logger.warning(
                    "Parsed value '%s' of parameter '%s' is not an integer "
                    "in tool '%s', degenerating to string.", param_value)
            return param_value
        elif param_type.startswith('num') or param_type.startswith('float'):
            try:
                float_param_value: float = float(param_value)
                return float_param_value if float_param_value - int(
                    float_param_value) != 0 else int(float_param_value)
            except (ValueError, TypeError):
                logger.warning(
                    "Parsed value '%s' of parameter '%s' is not a float "
                    "in tool '%s', degenerating to string.", param_value)
            return param_value
        elif param_type in ['boolean', 'bool', 'binary']:
            param_value = param_value.lower()
            return param_value == 'true'
        else:
            return param_value

    def _convert_for_json_streaming(self, converted_value: Any,
                                    param_type: str) -> str:
        """Convert converted_value based on 
        whether it's empty and if type is string
        Args:
            converted_value: Converted value
            param_type: Parameter type

        Returns:
            Converted string for streaming output
        """
        # Check if value is empty, but exclude numeric 0
        if converted_value is None or converted_value == '':
            return ''

        if param_type in ['string', 'str', 'text', 'varchar', 'char', 'enum']:
            # String type, remove double quotes
            return json.dumps(converted_value, ensure_ascii=False)[1:-1]
        else:
            # Non-string type, return complete JSON string
            if not isinstance(converted_value, str):
                return json.dumps(converted_value, ensure_ascii=False)
            else:
                return converted_value

    def _reset_xml_parser_after_tool_call(self):
        """
        Each tool_call is treated as a separate XML document, 
        so we need to reset the parser after each tool_call.
        """

        # recreate XML parser
        self.parser = ParserCreate()
        self.setup_parser()

        # Reset current tool_call state
        if self.current_call_id:
            self.last_completed_call_id = self.current_call_id
        self.current_call_id = None
        self.current_function_name = None
        self.current_function_open = False
        self.parameters = {}
        self.current_param_name = None
        self.current_param_value = ''
        self.current_param_value_converted = ''
        self.current_param_is_first = False
        self.should_emit_end_newline = False
        self.start_quote_emitted = False
        self.text_content_buffer = ''

        # Reset preprocessing and deferred parsing state
        self._pre_inside_parameter = False
        self._pre_param_buffer = ""
        self._pre_current_param_name = None
        self.defer_current_parameter = False
        self.deferred_param_raw_value = ""

function_end_token `instance-attribute` ¶

function_end_token: str = '</function>'

function_start_token `instance-attribute` ¶

function_start_token: str = '<function='

parameter_end_token `instance-attribute` ¶

parameter_end_token: str = '</parameter>'

parameter_start_token `instance-attribute` ¶

parameter_start_token: str = '<parameter='

tool_call_end_token `instance-attribute` ¶

tool_call_end_token: str = '</tool_call>'

tool_call_start_token `instance-attribute` ¶

tool_call_start_token: str = '<tool_call>'

tools `instance-attribute` ¶

tools: Union[list[ChatCompletionToolsParam], None] = None

init ¶

__init__()

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def __init__(self):
    self.reset_streaming_state()

    # Tool configuration information
    self.tools: Union[list[ChatCompletionToolsParam], None] = None
    self.tool_call_start_token: str = '<tool_call>'
    self.tool_call_end_token: str = '</tool_call>'
    self.function_start_token: str = '<function='
    self.function_end_token: str = '</function>'
    self.parameter_start_token: str = '<parameter='
    self.parameter_end_token: str = '</parameter>'

_auto_close_open_parameter_if_needed ¶

_auto_close_open_parameter_if_needed(
    incoming_tag: Optional[str] = None,
)

Before starting to process new elements, if there are unclosed tags from before, automatically complete their endings to the parser. - If there are unclosed parameters, it's equivalent to feeding </parameter> - When about to start a new function or tool_call, if there are unclosed functions, complete </function>. - When about to start a new tool_call, if there are unclosed tool_calls, complete </tool_call>.

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def _auto_close_open_parameter_if_needed(self,
                                         incoming_tag: Optional[str] = None
                                         ):
    """Before starting to process new elements, 
    if there are unclosed tags from before, 
    automatically complete their endings to the parser.
    - If there are unclosed parameters, 
    it's equivalent to feeding `</parameter>`
    - When about to start a new function or tool_call, 
    if there are unclosed functions, complete `</function>`.
    - When about to start a new tool_call, 
    if there are unclosed tool_calls, complete `</tool_call>`.
    """
    # First close unclosed parameters
    if self.current_param_name:
        self._end_element('parameter')

    # If about to start new function or tool_call,
    # and there are unclosed functions, close function first
    if incoming_tag in ('function',
                        'tool_call') and self.current_function_name:
        self._end_element('function')

    # If about to start new tool_call,
    # and there are unclosed tool_calls, close tool_call first
    if incoming_tag == 'tool_call' and self.current_call_id:
        self._end_element('tool_call')

_char_data ¶

_char_data(data: str)

Handle XML character data events

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def _char_data(self, data: str):
    """Handle XML character data events"""
    if data and self.current_param_name:
        # If preprocessing stage determines deferred parsing is needed,
        # only cache character data, no streaming output
        if self.defer_current_parameter:
            original_data = data
            if self.should_emit_end_newline:
                original_data = '\n' + original_data
                self.should_emit_end_newline = False
            if original_data.endswith('\n'):
                self.should_emit_end_newline = True
                original_data = original_data[:-1]
            self.current_param_value += original_data
            return

        param_type = self._get_param_type(self.current_param_name)

        # Check if this is the first time receiving data for this parameter
        # If this is the first packet of data and starts with \n, remove \n
        if not self.current_param_value and data.startswith('\n'):
            data = data[1:]

        # Output start quote for string type (if not already output)
        if (param_type
                in ['string', 'str', 'text', 'varchar', 'char', 'enum']
                and not self.start_quote_emitted):
            quote_delta = DeltaMessage(tool_calls=[
                DeltaToolCall(index=self.tool_call_index - 1,
                              id=self.current_call_id,
                              type='function',
                              function=DeltaFunctionCall(name=None,
                                                         arguments='"'))
            ])
            self._emit_delta(quote_delta)
            self.start_quote_emitted = True

        if not data:
            return

        original_data = data
        # Delay output of trailing newline
        if self.should_emit_end_newline:
            original_data = '\n' + original_data
            self.should_emit_end_newline = False
        if original_data.endswith('\n'):
            self.should_emit_end_newline = True
            original_data = original_data[:-1]
        self.current_param_value += original_data

        # convert parameter value by param_type
        converted_value = self._convert_param_value(
            self.current_param_value, param_type)
        output_data = self._convert_for_json_streaming(
            converted_value, param_type)

        delta_data = output_data[len(self.current_param_value_converted):]
        self.current_param_value_converted = output_data

        delta = DeltaMessage(tool_calls=[
            DeltaToolCall(index=self.tool_call_index - 1,
                          id=self.current_call_id,
                          type='function',
                          function=DeltaFunctionCall(name=None,
                                                     arguments=delta_data))
        ])
        self._emit_delta(delta)

_convert_for_json_streaming ¶

_convert_for_json_streaming(
    converted_value: Any, param_type: str
) -> str

Convert converted_value based on whether it's empty and if type is string Args: converted_value: Converted value param_type: Parameter type

Returns:

Type	Description
`str`	Converted string for streaming output

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def _convert_for_json_streaming(self, converted_value: Any,
                                param_type: str) -> str:
    """Convert converted_value based on 
    whether it's empty and if type is string
    Args:
        converted_value: Converted value
        param_type: Parameter type

    Returns:
        Converted string for streaming output
    """
    # Check if value is empty, but exclude numeric 0
    if converted_value is None or converted_value == '':
        return ''

    if param_type in ['string', 'str', 'text', 'varchar', 'char', 'enum']:
        # String type, remove double quotes
        return json.dumps(converted_value, ensure_ascii=False)[1:-1]
    else:
        # Non-string type, return complete JSON string
        if not isinstance(converted_value, str):
            return json.dumps(converted_value, ensure_ascii=False)
        else:
            return converted_value

_convert_param_value ¶

_convert_param_value(
    param_value: str, param_type: str
) -> Any

Convert value based on parameter type Args: param_value: Parameter value param_type: Parameter type

Returns:

Type	Description
`Any`	Converted value

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def _convert_param_value(self, param_value: str, param_type: str) -> Any:
    """Convert value based on parameter type
    Args:
        param_value: Parameter value
        param_type: Parameter type

    Returns:
        Converted value
    """
    if param_value.lower() == 'null':
        return None

    param_type = param_type.strip().lower()
    if param_type in ['string', 'str', 'text', 'varchar', 'char', 'enum']:
        return param_value
    elif (param_type.startswith('int') or param_type.startswith('uint')
          or param_type.startswith('long')
          or param_type.startswith('short')
          or param_type.startswith('unsigned')):
        try:
            return int(param_value)
        except (ValueError, TypeError):
            logger.warning(
                "Parsed value '%s' of parameter '%s' is not an integer "
                "in tool '%s', degenerating to string.", param_value)
        return param_value
    elif param_type.startswith('num') or param_type.startswith('float'):
        try:
            float_param_value: float = float(param_value)
            return float_param_value if float_param_value - int(
                float_param_value) != 0 else int(float_param_value)
        except (ValueError, TypeError):
            logger.warning(
                "Parsed value '%s' of parameter '%s' is not a float "
                "in tool '%s', degenerating to string.", param_value)
        return param_value
    elif param_type in ['boolean', 'bool', 'binary']:
        param_value = param_value.lower()
        return param_value == 'true'
    else:
        return param_value

_emit_delta ¶

_emit_delta(delta: DeltaMessage)

Emit Delta response (streaming output)

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def _emit_delta(self, delta: DeltaMessage):
    """Emit Delta response (streaming output)"""
    self.deltas.append(delta)

_end_element ¶

_end_element(name: str)

Handle XML end element events

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def _end_element(self, name: str):
    """Handle XML end element events"""

    if name == 'root':
        return

    # If function or tool_call ends and there are still unclosed parameters,
    # complete parameter end first
    if (name.startswith('function') or name == 'function'
            or name == 'tool_call') and self.current_param_name:
        self._auto_close_open_parameter_if_needed()

    if (name.startswith('parameter')
            or name == 'parameter') and self.current_param_name:
        # End current parameter
        param_name = self.current_param_name
        param_value = self.current_param_value

        # If in deferred parsing mode,
        # perform overall parsing on raw content
        # accumulated in preprocessing stage and output once
        if self.defer_current_parameter:
            raw_text = self.deferred_param_raw_value \
            if self.deferred_param_raw_value else param_value
            parsed_value = None
            output_arguments = None
            try:
                # If previously delayed trailing newline,
                # add it back before parsing
                if self.should_emit_end_newline:
                    raw_for_parse = raw_text + '\n'
                else:
                    raw_for_parse = raw_text
                parsed_value = ast.literal_eval(raw_for_parse)
                output_arguments = json.dumps(parsed_value,
                                              ensure_ascii=False)
            except Exception:
                # Fallback: output as string as-is
                output_arguments = json.dumps(raw_text, ensure_ascii=False)
                parsed_value = raw_text

            delta = DeltaMessage(tool_calls=[
                DeltaToolCall(index=self.tool_call_index - 1,
                              id=self.current_call_id,
                              type='function',
                              function=DeltaFunctionCall(
                                  name=None, arguments=output_arguments))
            ])
            self._emit_delta(delta)

            # Clean up and store
            self.should_emit_end_newline = False
            self.parameters[param_name] = parsed_value
            self.current_param_name = None
            self.current_param_value = ""
            self.current_param_value_converted = ""
            self.start_quote_emitted = False
            self.defer_current_parameter = False
            self.deferred_param_raw_value = ""
            return

        param_type = self._get_param_type(param_name)

        # convert complete parameter value by param_type
        converted_value = self._convert_param_value(
            param_value, param_type)

        # Decide whether to add end quote based on parameter type
        if param_type in [
                'string', 'str', 'text', 'varchar', 'char', 'enum'
        ]:
            # For empty string parameters, need special handling
            if not param_value and not self.start_quote_emitted:
                # No start quote output,
                # directly output complete empty string
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.tool_call_index - 1,
                                  id=self.current_call_id,
                                  type='function',
                                  function=DeltaFunctionCall(
                                      name=None, arguments='""'))
                ])
                self._emit_delta(delta)
            else:
                # Non-empty parameter value, output end quote
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.tool_call_index - 1,
                                  id=self.current_call_id,
                                  type='function',
                                  function=DeltaFunctionCall(
                                      name=None, arguments='"'))
                ])
                self._emit_delta(delta)

        self.should_emit_end_newline = False
        # Store converted value
        self.parameters[param_name] = converted_value
        self.current_param_name = None
        self.current_param_value = ''
        self.current_param_value_converted = ''
        self.start_quote_emitted = False

    elif name.startswith('function') or name == 'function':
        # if there are parameters, close JSON object
        if self.parameters:
            delta = DeltaMessage(tool_calls=[
                DeltaToolCall(index=self.tool_call_index - 1,
                              id=self.current_call_id,
                              type='function',
                              function=DeltaFunctionCall(name=None,
                                                         arguments='}'))
            ])
            self._emit_delta(delta)
        # return empty object
        else:
            delta = DeltaMessage(tool_calls=[
                DeltaToolCall(index=self.tool_call_index - 1,
                              id=self.current_call_id,
                              type='function',
                              function=DeltaFunctionCall(name=None,
                                                         arguments='{}'))
            ])
            self._emit_delta(delta)
        self.current_function_open = False

    elif name == 'tool_call':
        # Before ending tool_call,
        # ensure function is closed to complete missing right brace
        if self.current_function_open:
            # If there are still unclosed parameters, close them first
            if self.current_param_name:
                self._end_element('parameter')
            # Close function, ensure output '}' or '{}'
            self._end_element('function')
        # Final Delta
        delta = DeltaMessage(tool_calls=[
            DeltaToolCall(index=self.tool_call_index - 1,
                          id=self.current_call_id,
                          type='function',
                          function=DeltaFunctionCall(name=None,
                                                     arguments=''))
        ])
        self._emit_delta(delta)

        # Check if there's text content to output (between tool_calls)
        if self.text_content_buffer.strip():
            text_delta = DeltaMessage(content=self.text_content_buffer)
            self._emit_delta(text_delta)

        self._reset_xml_parser_after_tool_call()

_escape_xml_special_chars ¶

_escape_xml_special_chars(text: str) -> str

Escape XML special characters Args: text: Original text Returns: Escaped text

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def _escape_xml_special_chars(self, text: str) -> str:
    """
    Escape XML special characters
    Args:
        text: Original text
    Returns:
        Escaped text
    """
    xml_escapes = {
        '&': '&amp;',
        '<': '&lt;',
        '>': '&gt;',
        '"': '&quot;',
        "'": '&apos;'
    }

    for char, escape in xml_escapes.items():
        text = text.replace(char, escape)

    return text

_extract_function_name ¶

_extract_function_name(
    name: str, attrs: dict[str, str]
) -> Optional[str]

Extract function name from various formats

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def _extract_function_name(self, name: str,
                           attrs: dict[str, str]) -> Optional[str]:
    """Extract function name from various formats"""
    if attrs and 'name' in attrs:
        return attrs['name']

    if '=' in name:
        parts = name.split('=', 1)
        if len(parts) == 2 and parts[0] == 'function':
            return parts[1]

    return None

_extract_parameter_name ¶

_extract_parameter_name(
    name: str, attrs: dict[str, str]
) -> Optional[str]

Extract parameter name from various formats

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def _extract_parameter_name(self, name: str,
                            attrs: dict[str, str]) -> Optional[str]:
    """Extract parameter name from various formats"""
    if attrs and 'name' in attrs:
        return attrs['name']

    if '=' in name:
        parts = name.split('=', 1)
        if len(parts) == 2 and parts[0] == 'parameter':
            return parts[1]

    return None

_find_next_complete_element ¶

_find_next_complete_element(
    start_pos: int,
) -> tuple[Optional[str], int]

Find next complete XML element from specified position

Parameters:

Name	Type	Description	Default
`start_pos`	`int`	Position to start searching	required

Returns:

Type	Description
`Optional[str]`	(Complete element string, element end position),
`int`	returns (None, start_pos) if no complete element found

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def _find_next_complete_element(
        self, start_pos: int) -> tuple[Optional[str], int]:
    """
    Find next complete XML element from specified position

    Args:
        start_pos: Position to start searching

    Returns:
        (Complete element string, element end position), 
        returns (None, start_pos) if no complete element found
    """
    buffer = self.streaming_buffer[start_pos:]

    if not buffer:
        return None, start_pos

    if buffer.startswith('<'):
        # Need to ensure no new < appears,
        # find the nearest one between < and >
        tag_end = buffer.find('<', 1)
        tag_end2 = buffer.find('>', 1)
        if tag_end != -1 and tag_end2 != -1:
            # Next nearest is <
            if tag_end < tag_end2:
                return buffer[:tag_end], start_pos + tag_end
            # Next nearest is >, means found XML element
            else:
                return buffer[:tag_end2 + 1], start_pos + tag_end2 + 1
        elif tag_end != -1:
            return buffer[:tag_end], start_pos + tag_end
        elif tag_end2 != -1:
            return buffer[:tag_end2 + 1], start_pos + tag_end2 + 1
        else:
            # If currently not parsing tool calls (entering a tool_call),
            # check if starts with <tool_call>
            if self.current_call_id is None:
                # Check if might be start of <tool_call>
                if buffer == '<tool_call>'[:len(buffer)]:
                    # Might be start of <tool_call>, wait for more data
                    return None, start_pos
                else:
                    # Not start of <tool_call>, treat as text
                    return buffer, start_pos + len(buffer)
            else:
                # When parsing tool calls,
                # wait for more data to get complete tag
                return None, start_pos
    else:
        # Find text content (until next < or buffer end)
        next_tag_pos = buffer.find('<')
        if next_tag_pos != -1:
            # Found text content
            text_content = buffer[:next_tag_pos]
            return text_content, start_pos + next_tag_pos
        else:
            # Buffer end is all text, process
            # (no longer wait for more data)
            remaining = buffer
            return remaining, start_pos + len(remaining)

_get_next_call_id ¶

_get_next_call_id()

Generate unique call ID

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def _get_next_call_id(self):
    """Generate unique call ID"""
    return f'call_{uuid.uuid4().hex[:24]}'

_get_param_type ¶

_get_param_type(param_name: str) -> str

Get parameter type based on tool configuration, defaults to string Args: param_name: Parameter name

Returns:

Type	Description
`str`	Parameter type

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def _get_param_type(self, param_name: str) -> str:
    """Get parameter type based on tool configuration, defaults to string
    Args:
        param_name: Parameter name

    Returns:
        Parameter type
    """
    if not self.tools or not self.current_function_name:
        return 'string'

    for tool in self.tools:
        if not hasattr(tool, 'type') or not (hasattr(
                tool, 'function') and hasattr(tool.function, 'name')):
            continue
        if tool.type == 'function' and \
            tool.function.name == self.current_function_name:
            if not hasattr(tool.function, 'parameters'):
                return 'string'
            params = tool.function.parameters
            if isinstance(params, dict) and 'properties' in params:
                properties = params['properties']
                if param_name in properties and isinstance(
                        properties[param_name], dict):
                    return self.repair_param_type(
                        str(properties[param_name].get('type', 'string')))
            elif isinstance(params, dict) and param_name in params:
                param_config = params[param_name]
                if isinstance(param_config, dict):
                    return self.repair_param_type(
                        str(param_config.get('type', 'string')))
            break
    return 'string'

_merge_new_deltas_to_single_response ¶

_merge_new_deltas_to_single_response(
    initial_count: int,
) -> DeltaMessage

Merge newly generated deltas from this processing into a single DeltaMessage

Parameters:

Name	Type	Description	Default
`initial_count`	`int`	Delta count before processing	required

Returns:

Type	Description
`DeltaMessage`	Merged DeltaMessage containing all newly generated delta information

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def _merge_new_deltas_to_single_response(
        self, initial_count: int) -> DeltaMessage:
    """
    Merge newly generated deltas from this processing
    into a single DeltaMessage

    Args:
        initial_count: Delta count before processing

    Returns:
        Merged DeltaMessage containing all newly generated delta information
    """
    if len(self.deltas) <= initial_count:
        return DeltaMessage(content=None)

    # Get newly generated deltas
    new_deltas = self.deltas[initial_count:]

    if len(new_deltas) == 1:
        # Only one new delta, return directly
        return new_deltas[0]

    # Merge multiple new deltas
    merged_tool_calls: list[DeltaToolCall] = []
    merged_content: str = ''

    for delta in new_deltas:
        if delta.content:
            merged_content += delta.content
        if delta.tool_calls:
            # For tool_calls, we need to intelligently merge arguments
            for tool_call in delta.tool_calls:
                # Find if there's already a tool_call with the same call_id
                existing_call = None
                for existing in merged_tool_calls:
                    if existing.id == tool_call.id:
                        existing_call = existing
                        break

                if existing_call and existing_call.function:
                    # Merge to existing tool_call
                    if tool_call.function and tool_call.function.name:
                        existing_call.function.name = \
                            tool_call.function.name
                    if tool_call.function \
                        and tool_call.function.arguments is not None:
                        if existing_call.function.arguments is None:
                            existing_call.function.arguments = ''

                        # For streaming JSON parameters,
                        # simply concatenate in order
                        new_args = tool_call.function.arguments
                        existing_call.function.arguments += new_args
                    if tool_call.type:
                        existing_call.type = tool_call.type
                else:
                    # Add new tool_call
                    merged_tool_calls.append(tool_call)

    return DeltaMessage(content=merged_content if merged_content else None,
                        tool_calls=merged_tool_calls)

_preprocess_xml_chunk ¶

_preprocess_xml_chunk(chunk: str) -> str

Preprocess XML chunk, handle non-standard formats, and escape special characters

Parameters:

Name	Type	Description	Default
`chunk`	`str`	Original XML chunk	required

Returns:

Type	Description
`str`	Processed XML chunk

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def _preprocess_xml_chunk(self, chunk: str) -> str:
    """
    Preprocess XML chunk, handle non-standard formats, 
    and escape special characters

    Args:
        chunk: Original XML chunk

    Returns:
        Processed XML chunk
    """

    # Check if this is a tool_call related element
    is_tool_call = False
    if chunk.startswith(self.tool_call_start_token) or chunk.startswith(
            self.tool_call_end_token):
        is_tool_call = True
    if chunk.startswith(self.function_start_token) or chunk.startswith(
            self.function_end_token):
        is_tool_call = True
    if chunk.startswith(self.parameter_start_token) or chunk.startswith(
            self.parameter_end_token):
        is_tool_call = True
    # Handle <function=name> format -> <function name="name">
    processed = re.sub(r'<function=([^>]+)>', r'<function name="\1">',
                       chunk)
    # Handle <parameter=name> format -> <parameter name="name">
    processed = re.sub(r'<parameter=([^>]+)>', r'<parameter name="\1">',
                       processed)

    original_chunk = chunk
    # If in parameter value accumulation mode
    if self._pre_inside_parameter:
        # Parameter end: output accumulated raw text
        # safely then return </parameter>
        if processed.startswith('</parameter>'):
            body_text = self._pre_param_buffer
            # Trigger deferred parsing mode
            # literal_eval+json output in end_element
            self.defer_current_parameter = True
            self.deferred_param_raw_value = body_text
            # Clean up state
            self._pre_inside_parameter = False
            self._pre_param_buffer = ""
            self._pre_current_param_name = None
            safe_text = self._escape_xml_special_chars(body_text)
            return f"{safe_text}</parameter>"
        else:
            # If this is the first block of content after entering parameter
            # evaluate if deferred parsing is needed;
            # If not needed, exit accumulation mode
            # and pass through directly
            if self._pre_param_buffer == "":
                # Get current parameter type
                param_type = self._get_param_type(
                    self._pre_current_param_name
                ) if self._pre_current_param_name else 'string'
                # Only these types need deferred parsing to
                # handle Python literals containing single quotes
                is_object_type = param_type in ["object"]
                is_complex_type = (param_type
                                   in ["array", "arr", "sequence"]
                                   or param_type.startswith("dict")
                                   or param_type.startswith("list"))

                # Only delay when contains container symbols
                # and has single quotes and is complex type
                has_container_hint = ('[' in original_chunk) or (
                    '{' in original_chunk) or ('(' in original_chunk)

                # Determine if deferred parsing is needed
                need_defer = False
                if is_complex_type:
                    # Complex type, always need deferred parsing
                    need_defer = True
                elif is_object_type and has_container_hint and (
                        "'" in original_chunk):
                    # Object type with container symbols
                    # and single quotes, need deferred parsing
                    need_defer = True

                if not need_defer:
                    # No need for deferred parsing,
                    # exit parameter mode directly
                    self._pre_inside_parameter = False
                    return self._escape_xml_special_chars(original_chunk)
            self._pre_param_buffer += original_chunk
            return ""

    # Parameter start: enable accumulation
    if processed.startswith('<parameter name='):
        m = re.match(r'<parameter name="([^"]+)">', processed)
        if m:
            self._pre_current_param_name = m.group(1)
        self._pre_inside_parameter = True
        self._pre_param_buffer = ""
        return processed

    # If processed doesn't contain special_token, escape processed
    # This is because XML parsing encounters special characters
    # and reports errors, so escaping is needed
    if not is_tool_call:
        processed = self._escape_xml_special_chars(processed)
    return processed

_process_complete_xml_elements ¶

_process_complete_xml_elements() -> bool

Process complete XML elements in buffer

Returns:

Name	Type	Description
`bool`	`bool`	Whether complete elements were found and processed

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def _process_complete_xml_elements(self) -> bool:
    """
    Process complete XML elements in buffer

    Returns:
        bool: Whether complete elements were found and processed
    """
    found_any = False

    while self.last_processed_pos < len(self.streaming_buffer):
        # Find next complete xml element
        element, end_pos = self._find_next_complete_element(
            self.last_processed_pos)
        if element is None:
            # No complete element found, wait for more data
            break

        # Check if this element should be skipped
        if self._should_skip_element(element):
            self.last_processed_pos = end_pos
            continue

        # Found complete XML element, process it
        try:
            preprocessed_element = self._preprocess_xml_chunk(element)
            # Check if this is the first tool_call start
            if ((preprocessed_element.strip().startswith('<tool_call>') or
                 preprocessed_element.strip().startswith('<function name=')
                 ) and self.tool_call_index
                    == 0) and self.text_content_buffer:
                # First tool_call starts,
                # output previously collected text content first
                text_delta = DeltaMessage(content=self.text_content_buffer)
                self._emit_delta(text_delta)
                # Clear buffer for potential subsequent text content
                self.text_content_buffer = ''

            # If a new tool_call starts and
            # there are already completed tool_calls
            if (preprocessed_element.strip().startswith('<tool_call>')
                    and self.tool_call_index > 0 and self.current_call_id):
                # Reset parser state but preserve generated deltas
                if self.current_param_name:
                    self._end_element('parameter')
                if self.current_function_open or self.current_function_name:
                    self._end_element('function')
                # Output final tool_call tail delta
                final_delta = DeltaMessage(
                    role=None,
                    content=None,
                    reasoning_content=None,
                    tool_calls=[
                        DeltaToolCall(index=self.tool_call_index - 1,
                                      id=self.current_call_id,
                                      type='function',
                                      function=DeltaFunctionCall(
                                          name=None, arguments=''))
                    ])
                self._emit_delta(final_delta)
                # Reset XML parser and current call state
                self._reset_xml_parser_after_tool_call()
            # Parse preprocessed element
            self.parser.Parse(preprocessed_element, False)
            found_any = True

        except Exception as e:
            logger.warning("Error when parsing XML elements: %s", e)

        # Update processed position
        self.last_processed_pos = end_pos

    return found_any

_reset_xml_parser_after_tool_call ¶

_reset_xml_parser_after_tool_call()

Each tool_call is treated as a separate XML document, so we need to reset the parser after each tool_call.

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def _reset_xml_parser_after_tool_call(self):
    """
    Each tool_call is treated as a separate XML document, 
    so we need to reset the parser after each tool_call.
    """

    # recreate XML parser
    self.parser = ParserCreate()
    self.setup_parser()

    # Reset current tool_call state
    if self.current_call_id:
        self.last_completed_call_id = self.current_call_id
    self.current_call_id = None
    self.current_function_name = None
    self.current_function_open = False
    self.parameters = {}
    self.current_param_name = None
    self.current_param_value = ''
    self.current_param_value_converted = ''
    self.current_param_is_first = False
    self.should_emit_end_newline = False
    self.start_quote_emitted = False
    self.text_content_buffer = ''

    # Reset preprocessing and deferred parsing state
    self._pre_inside_parameter = False
    self._pre_param_buffer = ""
    self._pre_current_param_name = None
    self.defer_current_parameter = False
    self.deferred_param_raw_value = ""

_should_skip_element ¶

_should_skip_element(element: str) -> bool

Determine whether an element should be skipped

Parameters:

Name	Type	Description	Default
`element`	`str`	Element to evaluate	required

Returns:

Name	Type	Description
`bool`	`bool`	True means should skip, False means should process

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def _should_skip_element(self, element: str) -> bool:
    """
    Determine whether an element should be skipped

    Args:
        element: Element to evaluate

    Returns:
        bool: True means should skip, False means should process
    """

    # If it's a tool_call XML tag, don't skip
    if element.startswith(
            self.tool_call_start_token) or element.startswith(
                self.function_start_token) or element.startswith(
                    self.parameter_start_token):
        return False

    # If currently not parsing tool calls and not blank,
    # collect this text instead of skipping
    # Only process other XML elements after tool_call appears,
    # otherwise treat as plain text
    if self.current_call_id is None and element:
        # Collect text content to buffer
        self.text_content_buffer += element
        return True  # Still skip, but content has been collected

    # If currently parsing tool calls,
    # this might be parameter value, don't skip
    if self.current_call_id is not None:
        return False

    # Skip blank content
    return not element

_start_element ¶

_start_element(name: str, attrs: dict[str, str])

Handle XML start element events

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def _start_element(self, name: str, attrs: dict[str, str]):
    """Handle XML start element events"""

    if name == 'root':
        return

    if name == 'tool_call':
        # Before opening new tool_call,
        # automatically complete previous unclosed tags
        self._auto_close_open_parameter_if_needed('tool_call')

        self.parameters = {}
        self.current_call_id = self._get_next_call_id()
        self.current_param_is_first = True
        self.tool_call_index += 1
    elif name.startswith('function') or (name == 'function'):
        # If missing tool_call, manually complete
        if not self.current_call_id:
            self._start_element('tool_call', {})
        # Before opening new function,
        # automatically complete previous unclosed tags (parameter/function)
        self._auto_close_open_parameter_if_needed('function')
        function_name = self._extract_function_name(name, attrs)
        self.current_function_name = function_name
        self.current_function_open = True
        if function_name:
            delta = DeltaMessage(tool_calls=[
                DeltaToolCall(index=self.tool_call_index - 1,
                              id=self.current_call_id,
                              type='function',
                              function=DeltaFunctionCall(
                                  name=function_name, arguments=''))
            ])
            self._emit_delta(delta)
    elif name.startswith('parameter') or (name == 'parameter'):
        # If previous parameter hasn't ended normally,
        # complete its end first, then start new parameter
        self._auto_close_open_parameter_if_needed('parameter')
        param_name = self._extract_parameter_name(name, attrs)
        self.current_param_name = param_name
        self.current_param_value = ''
        self.current_param_value_converted = ''
        self.start_quote_emitted = False  # Reset start quote flag

        # Only output parameter name and colon,
        # don't output quotes
        # decide after parameter value type is determined
        if param_name:
            if not self.parameters:
                # First parameter
                # start JSON, only output parameter name and colon
                json_start = f'{{"{param_name}": '
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.tool_call_index - 1,
                                  id=self.current_call_id,
                                  type='function',
                                  function=DeltaFunctionCall(
                                      name=None, arguments=json_start))
                ])
                self._emit_delta(delta)
                self.current_param_is_first = True
            else:
                # Subsequent parameters
                # add comma and parameter name, no quotes
                json_continue = f', "{param_name}": '
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.tool_call_index - 1,
                                  id=self.current_call_id,
                                  type='function',
                                  function=DeltaFunctionCall(
                                      name=None, arguments=json_continue))
                ])
                self._emit_delta(delta)
                self.current_param_is_first = False

parse_single_streaming_chunks ¶

parse_single_streaming_chunks(
    xml_chunk: str,
) -> DeltaMessage

Parse single streaming XML chunk and return Delta response This is the actual streaming interface that receives chunks one by one and maintains internal state

Parameters:

Name	Type	Description	Default
`xml_chunk`	`str`	Single XML chunk string	required

Returns: DeltaMessage: Contains delta information generated by this chunk, returns empty response if no complete elements

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def parse_single_streaming_chunks(self, xml_chunk: str) -> DeltaMessage:
    """
    Parse single streaming XML chunk and return Delta response
    This is the actual streaming interface that receives chunks 
    one by one and maintains internal state

    Args:
        xml_chunk: Single XML chunk string
    Returns:
        DeltaMessage: Contains delta information generated by this chunk, 
        returns empty response if no complete elements
    """
    # Record delta count before processing
    initial_delta_count = len(self.deltas)

    self.streaming_buffer += xml_chunk

    found_elements = self._process_complete_xml_elements()

    if found_elements:
        # If complete elements found, check if end events were missed
        # some tags may not have been triggered
        try:
            new_deltas = self.deltas[initial_delta_count:]
            # If this chunk contains </function>
            # but didn't generate '}', then complete it
            if (self.current_call_id is not None
                    and self.function_end_token in xml_chunk):

                # - Added '}' (non-empty parameter ending)
                # - Added '{}' (empty parameter function)
                has_function_close = any((td.tool_calls and any(
                    (tc.function and tc.id == self.current_call_id
                     and isinstance(tc.function.arguments, str) and
                     (tc.function.arguments in ('}', '{}')))
                    for tc in td.tool_calls)) for td in new_deltas)
                if not has_function_close:
                    # Close potentially unclosed element
                    if self.current_param_name:
                        self._end_element('parameter')
                    if self.current_function_name:
                        self._end_element('function')
            # If this chunk contains </tool_call>
            # but didn't generate final empty delta, then complete it
            if (self.current_call_id is not None
                    and self.tool_call_end_token in xml_chunk):
                has_toolcall_close = any((td.tool_calls and any(
                    (tc.type == 'function' and tc.function and tc.function.
                     arguments == '' and tc.id == self.current_call_id)
                    for tc in td.tool_calls)) for td in new_deltas)
                if not has_toolcall_close:
                    # Close potentially unclosed element
                    if self.current_param_name:
                        self._end_element('parameter')
                    if self.current_function_name:
                        self._end_element('function')
                    self._end_element('tool_call')
        except Exception as e:
            logger.warning("Error with fallback parsing: %s", e)
        # Merge newly generated deltas into single response
        result_delta = self._merge_new_deltas_to_single_response(
            initial_delta_count)
        return result_delta
    else:
        # No complete elements, check if there's unoutput text content
        if self.text_content_buffer and self.tool_call_index == 0:
            # Has text content but no tool_call yet, output text content
            text_delta = DeltaMessage(content=self.text_content_buffer)
            self._emit_delta(text_delta)
            # Clear buffer to avoid duplicate output
            self.text_content_buffer = ''
            return text_delta

        # If this chunk contains end tags but wasn't triggered by parser,
        # manually complete end events
        # Only execute when still on the same call as when entered,
        # to prevent accidentally closing new calls
        # in multi <tool_call> scenarios
        if (self.current_call_id is not None
                and (self.function_end_token in xml_chunk
                     or self.tool_call_end_token in xml_chunk)):
            # Close potentially unclosed element
            if self.current_param_name:
                self._end_element('parameter')
            if self.function_end_token in xml_chunk and \
                self.current_function_name:
                self._end_element('function')
            if self.tool_call_end_token in xml_chunk:
                self._end_element('tool_call')
            # Return the merged delta result generated by this fallback
            result_delta = self._merge_new_deltas_to_single_response(
                initial_delta_count)
            return result_delta

        # No complete elements, return empty response
        return DeltaMessage(content=None)

repair_param_type ¶

repair_param_type(param_type: str) -> str

Repair unknown parameter types by treating them as string Args: param_type: Parameter type

Returns:

Type	Description
`str`	Repaired parameter type

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def repair_param_type(self, param_type: str) -> str:
    """Repair unknown parameter types by treating them as string
    Args:
        param_type: Parameter type

    Returns:
        Repaired parameter type
    """
    if param_type in [
            'string', 'str', 'text', 'varchar', 'char', 'enum'
    ] or param_type.startswith('int') or param_type.startswith(
            'uint'
    ) or param_type.startswith('long') or param_type.startswith(
            'short'
    ) or param_type.startswith('unsigned') or param_type.startswith(
            'num') or param_type.startswith('float') or param_type in [
                'boolean', 'bool', 'binary'
            ] or (param_type in ["object", "array", "arr", "sequence"]
                  or param_type.startswith("dict")
                  or param_type.startswith("list")):
        return param_type
    else:
        return 'string'

reset_streaming_state ¶

reset_streaming_state()

Reset streaming parsing state

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def reset_streaming_state(self):
    """Reset streaming parsing state"""

    self.deltas = []
    # state for streaming
    self.tool_call_index = 0
    self.current_call_id = None
    self.last_completed_call_id = None
    self.current_function_name = None
    self.current_function_open = False
    self.parameters = {}
    self.current_param_name = None
    self.current_param_value = ''
    self.current_param_value_converted = ''
    self.current_param_is_first = False
    self.should_emit_end_newline = False
    self.start_quote_emitted = False

    self.streaming_buffer = ''
    self.last_processed_pos = 0

    self.text_content_buffer = ''

    # state for preprocessing and deferred parsing
    self._pre_inside_parameter = False
    self._pre_param_buffer = ""
    self._pre_current_param_name = None
    self.defer_current_parameter = False
    self.deferred_param_raw_value = ""

    # recreate parser
    self.parser = ParserCreate()
    self.setup_parser()

set_tools ¶

set_tools(
    tools: Union[list[ChatCompletionToolsParam], None],
)

Set tool configuration information

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def set_tools(self, tools: Union[list[ChatCompletionToolsParam], None]):
    """Set tool configuration information"""
    self.tools = tools

setup_parser ¶

setup_parser()

Set up XML parser event handlers

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py

def setup_parser(self):
    """Set up XML parser event handlers"""
    self.parser.buffer_text = True
    self.parser.StartElementHandler = self._start_element
    self.parser.EndElementHandler = self._end_element
    self.parser.CharacterDataHandler = self._char_data

vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser ¶

logger module-attribute ¶

Qwen3XMLToolParser ¶

parser instance-attribute ¶

__init__ ¶

extract_tool_calls ¶

extract_tool_calls_streaming ¶

StreamingXMLToolCallParser ¶

function_end_token instance-attribute ¶

function_start_token instance-attribute ¶

parameter_end_token instance-attribute ¶

parameter_start_token instance-attribute ¶

tool_call_end_token instance-attribute ¶

tool_call_start_token instance-attribute ¶

tools instance-attribute ¶

__init__ ¶

_auto_close_open_parameter_if_needed ¶

_char_data ¶

_convert_for_json_streaming ¶

_convert_param_value ¶

_emit_delta ¶

_end_element ¶

_escape_xml_special_chars ¶

_extract_function_name ¶

_extract_parameter_name ¶

_find_next_complete_element ¶

_get_next_call_id ¶

_get_param_type ¶

_merge_new_deltas_to_single_response ¶

_preprocess_xml_chunk ¶

_process_complete_xml_elements ¶

_reset_xml_parser_after_tool_call ¶

_should_skip_element ¶

_start_element ¶

parse_single_streaming_chunks ¶

repair_param_type ¶

reset_streaming_state ¶

set_tools ¶

setup_parser ¶

logger `module-attribute` ¶

parser `instance-attribute` ¶

init ¶

function_end_token `instance-attribute` ¶

function_start_token `instance-attribute` ¶

parameter_end_token `instance-attribute` ¶

parameter_start_token `instance-attribute` ¶

tool_call_end_token `instance-attribute` ¶

tool_call_start_token `instance-attribute` ¶

tools `instance-attribute` ¶

init ¶