Skip to content

vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser

logger module-attribute

logger = init_logger(__name__)

Qwen3XMLToolParser

Bases: ToolParser

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
@ToolParserManager.register_module("qwen3_xml")
class Qwen3XMLToolParser(ToolParser):

    def __init__(self, tokenizer: AnyTokenizer):
        super().__init__(tokenizer)
        self.parser = StreamingXMLToolCallParser()

        logger.info("vLLM Successfully import tool parser %s !",
                    self.__class__.__name__)

    def extract_tool_calls(
        self,
        model_output: str,
        request: ChatCompletionRequest,
    ) -> ExtractedToolCallInformation:
        self.parser.reset_streaming_state()
        if request:
            self.parser.set_tools(request.tools)
        result = self.parser.parse_single_streaming_chunks(model_output)
        if not result.tool_calls:
            return ExtractedToolCallInformation(
                tool_calls=[],
                tools_called=False,
                content=result.content,
            )
        else:
            tool_calls = []
            for tool_call in result.tool_calls:
                if tool_call.function and tool_call.function.name:
                    tool_calls.append(
                        ToolCall(
                            id=tool_call.id,
                            type=tool_call.type,
                            function=FunctionCall(
                                name=tool_call.function.name,
                                arguments=tool_call.function.arguments,
                            ),
                        ))
            return ExtractedToolCallInformation(
                tool_calls=tool_calls,
                tools_called=len(tool_calls) > 0,
                content=result.content,
            )

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> Union[DeltaMessage, None]:
        if not previous_text:
            self.parser.reset_streaming_state()
            if request:
                self.parser.set_tools(request.tools)

        # Model sometimes outputs separately causing delta_text to be empty.
        # If there were tool_calls before and all current tool_calls have ended,
        # return an empty tool_call for outer streaming output
        # to correctly output tool_call field
        if not delta_text and delta_token_ids:
            open_calls = current_text.count(
                self.parser.tool_call_start_token) - current_text.count(
                    self.parser.tool_call_end_token)
            if open_calls == 0 and self.parser.tool_call_index > 0:
                # If current_call_id is None, use last_completed_call_id
                call_id = self.parser.current_call_id or \
                    self.parser.last_completed_call_id
                return DeltaMessage(tool_calls=[
                    DeltaToolCall(
                        index=self.parser.tool_call_index - 1,
                        id=call_id,
                        function=DeltaFunctionCall(arguments=''),
                        type='function',
                    )
                ])

        return self.parser.parse_single_streaming_chunks(delta_text)

parser instance-attribute

__init__

__init__(tokenizer: AnyTokenizer)
Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def __init__(self, tokenizer: AnyTokenizer):
    super().__init__(tokenizer)
    self.parser = StreamingXMLToolCallParser()

    logger.info("vLLM Successfully import tool parser %s !",
                self.__class__.__name__)

extract_tool_calls

extract_tool_calls(
    model_output: str, request: ChatCompletionRequest
) -> ExtractedToolCallInformation
Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def extract_tool_calls(
    self,
    model_output: str,
    request: ChatCompletionRequest,
) -> ExtractedToolCallInformation:
    self.parser.reset_streaming_state()
    if request:
        self.parser.set_tools(request.tools)
    result = self.parser.parse_single_streaming_chunks(model_output)
    if not result.tool_calls:
        return ExtractedToolCallInformation(
            tool_calls=[],
            tools_called=False,
            content=result.content,
        )
    else:
        tool_calls = []
        for tool_call in result.tool_calls:
            if tool_call.function and tool_call.function.name:
                tool_calls.append(
                    ToolCall(
                        id=tool_call.id,
                        type=tool_call.type,
                        function=FunctionCall(
                            name=tool_call.function.name,
                            arguments=tool_call.function.arguments,
                        ),
                    ))
        return ExtractedToolCallInformation(
            tool_calls=tool_calls,
            tools_called=len(tool_calls) > 0,
            content=result.content,
        )

extract_tool_calls_streaming

extract_tool_calls_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]
Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def extract_tool_calls_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
    request: ChatCompletionRequest,
) -> Union[DeltaMessage, None]:
    if not previous_text:
        self.parser.reset_streaming_state()
        if request:
            self.parser.set_tools(request.tools)

    # Model sometimes outputs separately causing delta_text to be empty.
    # If there were tool_calls before and all current tool_calls have ended,
    # return an empty tool_call for outer streaming output
    # to correctly output tool_call field
    if not delta_text and delta_token_ids:
        open_calls = current_text.count(
            self.parser.tool_call_start_token) - current_text.count(
                self.parser.tool_call_end_token)
        if open_calls == 0 and self.parser.tool_call_index > 0:
            # If current_call_id is None, use last_completed_call_id
            call_id = self.parser.current_call_id or \
                self.parser.last_completed_call_id
            return DeltaMessage(tool_calls=[
                DeltaToolCall(
                    index=self.parser.tool_call_index - 1,
                    id=call_id,
                    function=DeltaFunctionCall(arguments=''),
                    type='function',
                )
            ])

    return self.parser.parse_single_streaming_chunks(delta_text)

StreamingXMLToolCallParser

Simplified streaming XML tool call parser Supports streaming input, parsing, and output

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
  26
  27
  28
  29
  30
  31
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
class StreamingXMLToolCallParser:
    """
    Simplified streaming XML tool call parser
    Supports streaming input, parsing, and output
    """

    def __init__(self):
        self.reset_streaming_state()

        # Tool configuration information
        self.tools: Union[list[ChatCompletionToolsParam], None] = None
        self.tool_call_start_token: str = '<tool_call>'
        self.tool_call_end_token: str = '</tool_call>'
        self.function_start_token: str = '<function='
        self.function_end_token: str = '</function>'
        self.parameter_start_token: str = '<parameter='
        self.parameter_end_token: str = '</parameter>'

    def reset_streaming_state(self):
        """Reset streaming parsing state"""

        self.deltas = []
        # state for streaming
        self.tool_call_index = 0
        self.current_call_id = None
        self.last_completed_call_id = None
        self.current_function_name = None
        self.current_function_open = False
        self.parameters = {}
        self.current_param_name = None
        self.current_param_value = ''
        self.current_param_value_converted = ''
        self.current_param_is_first = False
        self.should_emit_end_newline = False
        self.start_quote_emitted = False

        self.streaming_buffer = ''
        self.last_processed_pos = 0

        self.text_content_buffer = ''

        # state for preprocessing and deferred parsing
        self._pre_inside_parameter = False
        self._pre_param_buffer = ""
        self._pre_current_param_name = None
        self.defer_current_parameter = False
        self.deferred_param_raw_value = ""

        # recreate parser
        self.parser = ParserCreate()
        self.setup_parser()

    def parse_single_streaming_chunks(self, xml_chunk: str) -> DeltaMessage:
        """
        Parse single streaming XML chunk and return Delta response
        This is the actual streaming interface that receives chunks 
        one by one and maintains internal state

        Args:
            xml_chunk: Single XML chunk string
        Returns:
            DeltaMessage: Contains delta information generated by this chunk, 
            returns empty response if no complete elements
        """
        # Record delta count before processing
        initial_delta_count = len(self.deltas)

        self.streaming_buffer += xml_chunk

        found_elements = self._process_complete_xml_elements()

        if found_elements:
            # If complete elements found, check if end events were missed
            # some tags may not have been triggered
            try:
                new_deltas = self.deltas[initial_delta_count:]
                # If this chunk contains </function>
                # but didn't generate '}', then complete it
                if (self.current_call_id is not None
                        and self.function_end_token in xml_chunk):

                    # - Added '}' (non-empty parameter ending)
                    # - Added '{}' (empty parameter function)
                    has_function_close = any((td.tool_calls and any(
                        (tc.function and tc.id == self.current_call_id
                         and isinstance(tc.function.arguments, str) and
                         (tc.function.arguments in ('}', '{}')))
                        for tc in td.tool_calls)) for td in new_deltas)
                    if not has_function_close:
                        # Close potentially unclosed element
                        if self.current_param_name:
                            self._end_element('parameter')
                        if self.current_function_name:
                            self._end_element('function')
                # If this chunk contains </tool_call>
                # but didn't generate final empty delta, then complete it
                if (self.current_call_id is not None
                        and self.tool_call_end_token in xml_chunk):
                    has_toolcall_close = any((td.tool_calls and any(
                        (tc.type == 'function' and tc.function and tc.function.
                         arguments == '' and tc.id == self.current_call_id)
                        for tc in td.tool_calls)) for td in new_deltas)
                    if not has_toolcall_close:
                        # Close potentially unclosed element
                        if self.current_param_name:
                            self._end_element('parameter')
                        if self.current_function_name:
                            self._end_element('function')
                        self._end_element('tool_call')
            except Exception as e:
                logger.warning("Error with fallback parsing: %s", e)
            # Merge newly generated deltas into single response
            result_delta = self._merge_new_deltas_to_single_response(
                initial_delta_count)
            return result_delta
        else:
            # No complete elements, check if there's unoutput text content
            if self.text_content_buffer and self.tool_call_index == 0:
                # Has text content but no tool_call yet, output text content
                text_delta = DeltaMessage(content=self.text_content_buffer)
                self._emit_delta(text_delta)
                # Clear buffer to avoid duplicate output
                self.text_content_buffer = ''
                return text_delta

            # If this chunk contains end tags but wasn't triggered by parser,
            # manually complete end events
            # Only execute when still on the same call as when entered,
            # to prevent accidentally closing new calls
            # in multi <tool_call> scenarios
            if (self.current_call_id is not None
                    and (self.function_end_token in xml_chunk
                         or self.tool_call_end_token in xml_chunk)):
                # Close potentially unclosed element
                if self.current_param_name:
                    self._end_element('parameter')
                if self.function_end_token in xml_chunk and \
                    self.current_function_name:
                    self._end_element('function')
                if self.tool_call_end_token in xml_chunk:
                    self._end_element('tool_call')
                # Return the merged delta result generated by this fallback
                result_delta = self._merge_new_deltas_to_single_response(
                    initial_delta_count)
                return result_delta

            # No complete elements, return empty response
            return DeltaMessage(content=None)

    def _escape_xml_special_chars(self, text: str) -> str:
        """
        Escape XML special characters
        Args:
            text: Original text
        Returns:
            Escaped text
        """
        xml_escapes = {
            '&': '&amp;',
            '<': '&lt;',
            '>': '&gt;',
            '"': '&quot;',
            "'": '&apos;'
        }

        for char, escape in xml_escapes.items():
            text = text.replace(char, escape)

        return text

    def _process_complete_xml_elements(self) -> bool:
        """
        Process complete XML elements in buffer

        Returns:
            bool: Whether complete elements were found and processed
        """
        found_any = False

        while self.last_processed_pos < len(self.streaming_buffer):
            # Find next complete xml element
            element, end_pos = self._find_next_complete_element(
                self.last_processed_pos)
            if element is None:
                # No complete element found, wait for more data
                break

            # Check if this element should be skipped
            if self._should_skip_element(element):
                self.last_processed_pos = end_pos
                continue

            # Found complete XML element, process it
            try:
                preprocessed_element = self._preprocess_xml_chunk(element)
                # Check if this is the first tool_call start
                if ((preprocessed_element.strip().startswith('<tool_call>') or
                     preprocessed_element.strip().startswith('<function name=')
                     ) and self.tool_call_index
                        == 0) and self.text_content_buffer:
                    # First tool_call starts,
                    # output previously collected text content first
                    text_delta = DeltaMessage(content=self.text_content_buffer)
                    self._emit_delta(text_delta)
                    # Clear buffer for potential subsequent text content
                    self.text_content_buffer = ''

                # If a new tool_call starts and
                # there are already completed tool_calls
                if (preprocessed_element.strip().startswith('<tool_call>')
                        and self.tool_call_index > 0 and self.current_call_id):
                    # Reset parser state but preserve generated deltas
                    if self.current_param_name:
                        self._end_element('parameter')
                    if self.current_function_open or self.current_function_name:
                        self._end_element('function')
                    # Output final tool_call tail delta
                    final_delta = DeltaMessage(
                        role=None,
                        content=None,
                        reasoning_content=None,
                        tool_calls=[
                            DeltaToolCall(index=self.tool_call_index - 1,
                                          id=self.current_call_id,
                                          type='function',
                                          function=DeltaFunctionCall(
                                              name=None, arguments=''))
                        ])
                    self._emit_delta(final_delta)
                    # Reset XML parser and current call state
                    self._reset_xml_parser_after_tool_call()
                # Parse preprocessed element
                self.parser.Parse(preprocessed_element, False)
                found_any = True

            except Exception as e:
                logger.warning("Error when parsing XML elements: %s", e)

            # Update processed position
            self.last_processed_pos = end_pos

        return found_any

    def _should_skip_element(self, element: str) -> bool:
        """
        Determine whether an element should be skipped

        Args:
            element: Element to evaluate

        Returns:
            bool: True means should skip, False means should process
        """

        # If it's a tool_call XML tag, don't skip
        if element.startswith(
                self.tool_call_start_token) or element.startswith(
                    self.function_start_token) or element.startswith(
                        self.parameter_start_token):
            return False

        # If currently not parsing tool calls and not blank,
        # collect this text instead of skipping
        # Only process other XML elements after tool_call appears,
        # otherwise treat as plain text
        if self.current_call_id is None and element:
            # Collect text content to buffer
            self.text_content_buffer += element
            return True  # Still skip, but content has been collected

        # If currently parsing tool calls,
        # this might be parameter value, don't skip
        if self.current_call_id is not None:
            return False

        # Skip blank content
        return not element

    def _find_next_complete_element(
            self, start_pos: int) -> tuple[Optional[str], int]:
        """
        Find next complete XML element from specified position

        Args:
            start_pos: Position to start searching

        Returns:
            (Complete element string, element end position), 
            returns (None, start_pos) if no complete element found
        """
        buffer = self.streaming_buffer[start_pos:]

        if not buffer:
            return None, start_pos

        if buffer.startswith('<'):
            # Need to ensure no new < appears,
            # find the nearest one between < and >
            tag_end = buffer.find('<', 1)
            tag_end2 = buffer.find('>', 1)
            if tag_end != -1 and tag_end2 != -1:
                # Next nearest is <
                if tag_end < tag_end2:
                    return buffer[:tag_end], start_pos + tag_end
                # Next nearest is >, means found XML element
                else:
                    return buffer[:tag_end2 + 1], start_pos + tag_end2 + 1
            elif tag_end != -1:
                return buffer[:tag_end], start_pos + tag_end
            elif tag_end2 != -1:
                return buffer[:tag_end2 + 1], start_pos + tag_end2 + 1
            else:
                # If currently not parsing tool calls (entering a tool_call),
                # check if starts with <tool_call>
                if self.current_call_id is None:
                    # Check if might be start of <tool_call>
                    if buffer == '<tool_call>'[:len(buffer)]:
                        # Might be start of <tool_call>, wait for more data
                        return None, start_pos
                    else:
                        # Not start of <tool_call>, treat as text
                        return buffer, start_pos + len(buffer)
                else:
                    # When parsing tool calls,
                    # wait for more data to get complete tag
                    return None, start_pos
        else:
            # Find text content (until next < or buffer end)
            next_tag_pos = buffer.find('<')
            if next_tag_pos != -1:
                # Found text content
                text_content = buffer[:next_tag_pos]
                return text_content, start_pos + next_tag_pos
            else:
                # Buffer end is all text, process
                # (no longer wait for more data)
                remaining = buffer
                return remaining, start_pos + len(remaining)

    def _merge_new_deltas_to_single_response(
            self, initial_count: int) -> DeltaMessage:
        """
        Merge newly generated deltas from this processing
        into a single DeltaMessage

        Args:
            initial_count: Delta count before processing

        Returns:
            Merged DeltaMessage containing all newly generated delta information
        """
        if len(self.deltas) <= initial_count:
            return DeltaMessage(content=None)

        # Get newly generated deltas
        new_deltas = self.deltas[initial_count:]

        if len(new_deltas) == 1:
            # Only one new delta, return directly
            return new_deltas[0]

        # Merge multiple new deltas
        merged_tool_calls: list[DeltaToolCall] = []
        merged_content: str = ''

        for delta in new_deltas:
            if delta.content:
                merged_content += delta.content
            if delta.tool_calls:
                # For tool_calls, we need to intelligently merge arguments
                for tool_call in delta.tool_calls:
                    # Find if there's already a tool_call with the same call_id
                    existing_call = None
                    for existing in merged_tool_calls:
                        if existing.id == tool_call.id:
                            existing_call = existing
                            break

                    if existing_call and existing_call.function:
                        # Merge to existing tool_call
                        if tool_call.function and tool_call.function.name:
                            existing_call.function.name = \
                                tool_call.function.name
                        if tool_call.function \
                            and tool_call.function.arguments is not None:
                            if existing_call.function.arguments is None:
                                existing_call.function.arguments = ''

                            # For streaming JSON parameters,
                            # simply concatenate in order
                            new_args = tool_call.function.arguments
                            existing_call.function.arguments += new_args
                        if tool_call.type:
                            existing_call.type = tool_call.type
                    else:
                        # Add new tool_call
                        merged_tool_calls.append(tool_call)

        return DeltaMessage(content=merged_content if merged_content else None,
                            tool_calls=merged_tool_calls)

    def _preprocess_xml_chunk(self, chunk: str) -> str:
        """
        Preprocess XML chunk, handle non-standard formats, 
        and escape special characters

        Args:
            chunk: Original XML chunk

        Returns:
            Processed XML chunk
        """

        # Check if this is a tool_call related element
        is_tool_call = False
        if chunk.startswith(self.tool_call_start_token) or chunk.startswith(
                self.tool_call_end_token):
            is_tool_call = True
        if chunk.startswith(self.function_start_token) or chunk.startswith(
                self.function_end_token):
            is_tool_call = True
        if chunk.startswith(self.parameter_start_token) or chunk.startswith(
                self.parameter_end_token):
            is_tool_call = True
        # Handle <function=name> format -> <function name="name">
        processed = re.sub(r'<function=([^>]+)>', r'<function name="\1">',
                           chunk)
        # Handle <parameter=name> format -> <parameter name="name">
        processed = re.sub(r'<parameter=([^>]+)>', r'<parameter name="\1">',
                           processed)

        original_chunk = chunk
        # If in parameter value accumulation mode
        if self._pre_inside_parameter:
            # Parameter end: output accumulated raw text
            # safely then return </parameter>
            if processed.startswith('</parameter>'):
                body_text = self._pre_param_buffer
                # Trigger deferred parsing mode
                # literal_eval+json output in end_element
                self.defer_current_parameter = True
                self.deferred_param_raw_value = body_text
                # Clean up state
                self._pre_inside_parameter = False
                self._pre_param_buffer = ""
                self._pre_current_param_name = None
                safe_text = self._escape_xml_special_chars(body_text)
                return f"{safe_text}</parameter>"
            else:
                # If this is the first block of content after entering parameter
                # evaluate if deferred parsing is needed;
                # If not needed, exit accumulation mode
                # and pass through directly
                if self._pre_param_buffer == "":
                    # Get current parameter type
                    param_type = self._get_param_type(
                        self._pre_current_param_name
                    ) if self._pre_current_param_name else 'string'
                    # Only these types need deferred parsing to
                    # handle Python literals containing single quotes
                    is_object_type = param_type in ["object"]
                    is_complex_type = (param_type
                                       in ["array", "arr", "sequence"]
                                       or param_type.startswith("dict")
                                       or param_type.startswith("list"))

                    # Only delay when contains container symbols
                    # and has single quotes and is complex type
                    has_container_hint = ('[' in original_chunk) or (
                        '{' in original_chunk) or ('(' in original_chunk)

                    # Determine if deferred parsing is needed
                    need_defer = False
                    if is_complex_type:
                        # Complex type, always need deferred parsing
                        need_defer = True
                    elif is_object_type and has_container_hint and (
                            "'" in original_chunk):
                        # Object type with container symbols
                        # and single quotes, need deferred parsing
                        need_defer = True

                    if not need_defer:
                        # No need for deferred parsing,
                        # exit parameter mode directly
                        self._pre_inside_parameter = False
                        return self._escape_xml_special_chars(original_chunk)
                self._pre_param_buffer += original_chunk
                return ""

        # Parameter start: enable accumulation
        if processed.startswith('<parameter name='):
            m = re.match(r'<parameter name="([^"]+)">', processed)
            if m:
                self._pre_current_param_name = m.group(1)
            self._pre_inside_parameter = True
            self._pre_param_buffer = ""
            return processed

        # If processed doesn't contain special_token, escape processed
        # This is because XML parsing encounters special characters
        # and reports errors, so escaping is needed
        if not is_tool_call:
            processed = self._escape_xml_special_chars(processed)
        return processed

    def _emit_delta(self, delta: DeltaMessage):
        """Emit Delta response (streaming output)"""
        self.deltas.append(delta)

    def _auto_close_open_parameter_if_needed(self,
                                             incoming_tag: Optional[str] = None
                                             ):
        """Before starting to process new elements, 
        if there are unclosed tags from before, 
        automatically complete their endings to the parser.
        - If there are unclosed parameters, 
        it's equivalent to feeding `</parameter>`
        - When about to start a new function or tool_call, 
        if there are unclosed functions, complete `</function>`.
        - When about to start a new tool_call, 
        if there are unclosed tool_calls, complete `</tool_call>`.
        """
        # First close unclosed parameters
        if self.current_param_name:
            self._end_element('parameter')

        # If about to start new function or tool_call,
        # and there are unclosed functions, close function first
        if incoming_tag in ('function',
                            'tool_call') and self.current_function_name:
            self._end_element('function')

        # If about to start new tool_call,
        # and there are unclosed tool_calls, close tool_call first
        if incoming_tag == 'tool_call' and self.current_call_id:
            self._end_element('tool_call')

    def _start_element(self, name: str, attrs: dict[str, str]):
        """Handle XML start element events"""

        if name == 'root':
            return

        if name == 'tool_call':
            # Before opening new tool_call,
            # automatically complete previous unclosed tags
            self._auto_close_open_parameter_if_needed('tool_call')

            self.parameters = {}
            self.current_call_id = self._get_next_call_id()
            self.current_param_is_first = True
            self.tool_call_index += 1
        elif name.startswith('function') or (name == 'function'):
            # If missing tool_call, manually complete
            if not self.current_call_id:
                self._start_element('tool_call', {})
            # Before opening new function,
            # automatically complete previous unclosed tags (parameter/function)
            self._auto_close_open_parameter_if_needed('function')
            function_name = self._extract_function_name(name, attrs)
            self.current_function_name = function_name
            self.current_function_open = True
            if function_name:
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.tool_call_index - 1,
                                  id=self.current_call_id,
                                  type='function',
                                  function=DeltaFunctionCall(
                                      name=function_name, arguments=''))
                ])
                self._emit_delta(delta)
        elif name.startswith('parameter') or (name == 'parameter'):
            # If previous parameter hasn't ended normally,
            # complete its end first, then start new parameter
            self._auto_close_open_parameter_if_needed('parameter')
            param_name = self._extract_parameter_name(name, attrs)
            self.current_param_name = param_name
            self.current_param_value = ''
            self.current_param_value_converted = ''
            self.start_quote_emitted = False  # Reset start quote flag

            # Only output parameter name and colon,
            # don't output quotes
            # decide after parameter value type is determined
            if param_name:
                if not self.parameters:
                    # First parameter
                    # start JSON, only output parameter name and colon
                    json_start = f'{{"{param_name}": '
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.tool_call_index - 1,
                                      id=self.current_call_id,
                                      type='function',
                                      function=DeltaFunctionCall(
                                          name=None, arguments=json_start))
                    ])
                    self._emit_delta(delta)
                    self.current_param_is_first = True
                else:
                    # Subsequent parameters
                    # add comma and parameter name, no quotes
                    json_continue = f', "{param_name}": '
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.tool_call_index - 1,
                                      id=self.current_call_id,
                                      type='function',
                                      function=DeltaFunctionCall(
                                          name=None, arguments=json_continue))
                    ])
                    self._emit_delta(delta)
                    self.current_param_is_first = False

    def _char_data(self, data: str):
        """Handle XML character data events"""
        if data and self.current_param_name:
            # If preprocessing stage determines deferred parsing is needed,
            # only cache character data, no streaming output
            if self.defer_current_parameter:
                original_data = data
                if self.should_emit_end_newline:
                    original_data = '\n' + original_data
                    self.should_emit_end_newline = False
                if original_data.endswith('\n'):
                    self.should_emit_end_newline = True
                    original_data = original_data[:-1]
                self.current_param_value += original_data
                return

            param_type = self._get_param_type(self.current_param_name)

            # Check if this is the first time receiving data for this parameter
            # If this is the first packet of data and starts with \n, remove \n
            if not self.current_param_value and data.startswith('\n'):
                data = data[1:]

            # Output start quote for string type (if not already output)
            if (param_type
                    in ['string', 'str', 'text', 'varchar', 'char', 'enum']
                    and not self.start_quote_emitted):
                quote_delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.tool_call_index - 1,
                                  id=self.current_call_id,
                                  type='function',
                                  function=DeltaFunctionCall(name=None,
                                                             arguments='"'))
                ])
                self._emit_delta(quote_delta)
                self.start_quote_emitted = True

            if not data:
                return

            original_data = data
            # Delay output of trailing newline
            if self.should_emit_end_newline:
                original_data = '\n' + original_data
                self.should_emit_end_newline = False
            if original_data.endswith('\n'):
                self.should_emit_end_newline = True
                original_data = original_data[:-1]
            self.current_param_value += original_data

            # convert parameter value by param_type
            converted_value = self._convert_param_value(
                self.current_param_value, param_type)
            output_data = self._convert_for_json_streaming(
                converted_value, param_type)

            delta_data = output_data[len(self.current_param_value_converted):]
            self.current_param_value_converted = output_data

            delta = DeltaMessage(tool_calls=[
                DeltaToolCall(index=self.tool_call_index - 1,
                              id=self.current_call_id,
                              type='function',
                              function=DeltaFunctionCall(name=None,
                                                         arguments=delta_data))
            ])
            self._emit_delta(delta)

    def _end_element(self, name: str):
        """Handle XML end element events"""

        if name == 'root':
            return

        # If function or tool_call ends and there are still unclosed parameters,
        # complete parameter end first
        if (name.startswith('function') or name == 'function'
                or name == 'tool_call') and self.current_param_name:
            self._auto_close_open_parameter_if_needed()

        if (name.startswith('parameter')
                or name == 'parameter') and self.current_param_name:
            # End current parameter
            param_name = self.current_param_name
            param_value = self.current_param_value

            # If in deferred parsing mode,
            # perform overall parsing on raw content
            # accumulated in preprocessing stage and output once
            if self.defer_current_parameter:
                raw_text = self.deferred_param_raw_value \
                if self.deferred_param_raw_value else param_value
                parsed_value = None
                output_arguments = None
                try:
                    # If previously delayed trailing newline,
                    # add it back before parsing
                    if self.should_emit_end_newline:
                        raw_for_parse = raw_text + '\n'
                    else:
                        raw_for_parse = raw_text
                    parsed_value = ast.literal_eval(raw_for_parse)
                    output_arguments = json.dumps(parsed_value,
                                                  ensure_ascii=False)
                except Exception:
                    # Fallback: output as string as-is
                    output_arguments = json.dumps(raw_text, ensure_ascii=False)
                    parsed_value = raw_text

                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.tool_call_index - 1,
                                  id=self.current_call_id,
                                  type='function',
                                  function=DeltaFunctionCall(
                                      name=None, arguments=output_arguments))
                ])
                self._emit_delta(delta)

                # Clean up and store
                self.should_emit_end_newline = False
                self.parameters[param_name] = parsed_value
                self.current_param_name = None
                self.current_param_value = ""
                self.current_param_value_converted = ""
                self.start_quote_emitted = False
                self.defer_current_parameter = False
                self.deferred_param_raw_value = ""
                return

            param_type = self._get_param_type(param_name)

            # convert complete parameter value by param_type
            converted_value = self._convert_param_value(
                param_value, param_type)

            # Decide whether to add end quote based on parameter type
            if param_type in [
                    'string', 'str', 'text', 'varchar', 'char', 'enum'
            ]:
                # For empty string parameters, need special handling
                if not param_value and not self.start_quote_emitted:
                    # No start quote output,
                    # directly output complete empty string
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.tool_call_index - 1,
                                      id=self.current_call_id,
                                      type='function',
                                      function=DeltaFunctionCall(
                                          name=None, arguments='""'))
                    ])
                    self._emit_delta(delta)
                else:
                    # Non-empty parameter value, output end quote
                    delta = DeltaMessage(tool_calls=[
                        DeltaToolCall(index=self.tool_call_index - 1,
                                      id=self.current_call_id,
                                      type='function',
                                      function=DeltaFunctionCall(
                                          name=None, arguments='"'))
                    ])
                    self._emit_delta(delta)

            self.should_emit_end_newline = False
            # Store converted value
            self.parameters[param_name] = converted_value
            self.current_param_name = None
            self.current_param_value = ''
            self.current_param_value_converted = ''
            self.start_quote_emitted = False

        elif name.startswith('function') or name == 'function':
            # if there are parameters, close JSON object
            if self.parameters:
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.tool_call_index - 1,
                                  id=self.current_call_id,
                                  type='function',
                                  function=DeltaFunctionCall(name=None,
                                                             arguments='}'))
                ])
                self._emit_delta(delta)
            # return empty object
            else:
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.tool_call_index - 1,
                                  id=self.current_call_id,
                                  type='function',
                                  function=DeltaFunctionCall(name=None,
                                                             arguments='{}'))
                ])
                self._emit_delta(delta)
            self.current_function_open = False

        elif name == 'tool_call':
            # Before ending tool_call,
            # ensure function is closed to complete missing right brace
            if self.current_function_open:
                # If there are still unclosed parameters, close them first
                if self.current_param_name:
                    self._end_element('parameter')
                # Close function, ensure output '}' or '{}'
                self._end_element('function')
            # Final Delta
            delta = DeltaMessage(tool_calls=[
                DeltaToolCall(index=self.tool_call_index - 1,
                              id=self.current_call_id,
                              type='function',
                              function=DeltaFunctionCall(name=None,
                                                         arguments=''))
            ])
            self._emit_delta(delta)

            # Check if there's text content to output (between tool_calls)
            if self.text_content_buffer.strip():
                text_delta = DeltaMessage(content=self.text_content_buffer)
                self._emit_delta(text_delta)

            self._reset_xml_parser_after_tool_call()

    def setup_parser(self):
        """Set up XML parser event handlers"""
        self.parser.buffer_text = True
        self.parser.StartElementHandler = self._start_element
        self.parser.EndElementHandler = self._end_element
        self.parser.CharacterDataHandler = self._char_data

    def set_tools(self, tools: Union[list[ChatCompletionToolsParam], None]):
        """Set tool configuration information"""
        self.tools = tools

    def _get_next_call_id(self):
        """Generate unique call ID"""
        return f'call_{uuid.uuid4().hex[:24]}'

    def _extract_function_name(self, name: str,
                               attrs: dict[str, str]) -> Optional[str]:
        """Extract function name from various formats"""
        if attrs and 'name' in attrs:
            return attrs['name']

        if '=' in name:
            parts = name.split('=', 1)
            if len(parts) == 2 and parts[0] == 'function':
                return parts[1]

        return None

    def _extract_parameter_name(self, name: str,
                                attrs: dict[str, str]) -> Optional[str]:
        """Extract parameter name from various formats"""
        if attrs and 'name' in attrs:
            return attrs['name']

        if '=' in name:
            parts = name.split('=', 1)
            if len(parts) == 2 and parts[0] == 'parameter':
                return parts[1]

        return None

    def _get_param_type(self, param_name: str) -> str:
        """Get parameter type based on tool configuration, defaults to string
        Args:
            param_name: Parameter name

        Returns:
            Parameter type
        """
        if not self.tools or not self.current_function_name:
            return 'string'

        for tool in self.tools:
            if not hasattr(tool, 'type') or not (hasattr(
                    tool, 'function') and hasattr(tool.function, 'name')):
                continue
            if tool.type == 'function' and \
                tool.function.name == self.current_function_name:
                if not hasattr(tool.function, 'parameters'):
                    return 'string'
                params = tool.function.parameters
                if isinstance(params, dict) and 'properties' in params:
                    properties = params['properties']
                    if param_name in properties and isinstance(
                            properties[param_name], dict):
                        return self.repair_param_type(
                            str(properties[param_name].get('type', 'string')))
                elif isinstance(params, dict) and param_name in params:
                    param_config = params[param_name]
                    if isinstance(param_config, dict):
                        return self.repair_param_type(
                            str(param_config.get('type', 'string')))
                break
        return 'string'

    def repair_param_type(self, param_type: str) -> str:
        """Repair unknown parameter types by treating them as string
        Args:
            param_type: Parameter type

        Returns:
            Repaired parameter type
        """
        if param_type in [
                'string', 'str', 'text', 'varchar', 'char', 'enum'
        ] or param_type.startswith('int') or param_type.startswith(
                'uint'
        ) or param_type.startswith('long') or param_type.startswith(
                'short'
        ) or param_type.startswith('unsigned') or param_type.startswith(
                'num') or param_type.startswith('float') or param_type in [
                    'boolean', 'bool', 'binary'
                ] or (param_type in ["object", "array", "arr", "sequence"]
                      or param_type.startswith("dict")
                      or param_type.startswith("list")):
            return param_type
        else:
            return 'string'

    def _convert_param_value(self, param_value: str, param_type: str) -> Any:
        """Convert value based on parameter type
        Args:
            param_value: Parameter value
            param_type: Parameter type

        Returns:
            Converted value
        """
        if param_value.lower() == 'null':
            return None

        param_type = param_type.strip().lower()
        if param_type in ['string', 'str', 'text', 'varchar', 'char', 'enum']:
            return param_value
        elif (param_type.startswith('int') or param_type.startswith('uint')
              or param_type.startswith('long')
              or param_type.startswith('short')
              or param_type.startswith('unsigned')):
            try:
                return int(param_value)
            except (ValueError, TypeError):
                logger.warning(
                    "Parsed value '%s' of parameter '%s' is not an integer "
                    "in tool '%s', degenerating to string.", param_value)
            return param_value
        elif param_type.startswith('num') or param_type.startswith('float'):
            try:
                float_param_value: float = float(param_value)
                return float_param_value if float_param_value - int(
                    float_param_value) != 0 else int(float_param_value)
            except (ValueError, TypeError):
                logger.warning(
                    "Parsed value '%s' of parameter '%s' is not a float "
                    "in tool '%s', degenerating to string.", param_value)
            return param_value
        elif param_type in ['boolean', 'bool', 'binary']:
            param_value = param_value.lower()
            return param_value == 'true'
        else:
            return param_value

    def _convert_for_json_streaming(self, converted_value: Any,
                                    param_type: str) -> str:
        """Convert converted_value based on 
        whether it's empty and if type is string
        Args:
            converted_value: Converted value
            param_type: Parameter type

        Returns:
            Converted string for streaming output
        """
        # Check if value is empty, but exclude numeric 0
        if converted_value is None or converted_value == '':
            return ''

        if param_type in ['string', 'str', 'text', 'varchar', 'char', 'enum']:
            # String type, remove double quotes
            return json.dumps(converted_value, ensure_ascii=False)[1:-1]
        else:
            # Non-string type, return complete JSON string
            if not isinstance(converted_value, str):
                return json.dumps(converted_value, ensure_ascii=False)
            else:
                return converted_value

    def _reset_xml_parser_after_tool_call(self):
        """
        Each tool_call is treated as a separate XML document, 
        so we need to reset the parser after each tool_call.
        """

        # recreate XML parser
        self.parser = ParserCreate()
        self.setup_parser()

        # Reset current tool_call state
        if self.current_call_id:
            self.last_completed_call_id = self.current_call_id
        self.current_call_id = None
        self.current_function_name = None
        self.current_function_open = False
        self.parameters = {}
        self.current_param_name = None
        self.current_param_value = ''
        self.current_param_value_converted = ''
        self.current_param_is_first = False
        self.should_emit_end_newline = False
        self.start_quote_emitted = False
        self.text_content_buffer = ''

        # Reset preprocessing and deferred parsing state
        self._pre_inside_parameter = False
        self._pre_param_buffer = ""
        self._pre_current_param_name = None
        self.defer_current_parameter = False
        self.deferred_param_raw_value = ""

function_end_token instance-attribute

function_end_token: str = '</function>'

function_start_token instance-attribute

function_start_token: str = '<function='

parameter_end_token instance-attribute

parameter_end_token: str = '</parameter>'

parameter_start_token instance-attribute

parameter_start_token: str = '<parameter='

tool_call_end_token instance-attribute

tool_call_end_token: str = '</tool_call>'

tool_call_start_token instance-attribute

tool_call_start_token: str = '<tool_call>'

tools instance-attribute

tools: Union[list[ChatCompletionToolsParam], None] = None

__init__

__init__()
Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def __init__(self):
    self.reset_streaming_state()

    # Tool configuration information
    self.tools: Union[list[ChatCompletionToolsParam], None] = None
    self.tool_call_start_token: str = '<tool_call>'
    self.tool_call_end_token: str = '</tool_call>'
    self.function_start_token: str = '<function='
    self.function_end_token: str = '</function>'
    self.parameter_start_token: str = '<parameter='
    self.parameter_end_token: str = '</parameter>'

_auto_close_open_parameter_if_needed

_auto_close_open_parameter_if_needed(
    incoming_tag: Optional[str] = None,
)

Before starting to process new elements, if there are unclosed tags from before, automatically complete their endings to the parser. - If there are unclosed parameters, it's equivalent to feeding </parameter> - When about to start a new function or tool_call, if there are unclosed functions, complete </function>. - When about to start a new tool_call, if there are unclosed tool_calls, complete </tool_call>.

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def _auto_close_open_parameter_if_needed(self,
                                         incoming_tag: Optional[str] = None
                                         ):
    """Before starting to process new elements, 
    if there are unclosed tags from before, 
    automatically complete their endings to the parser.
    - If there are unclosed parameters, 
    it's equivalent to feeding `</parameter>`
    - When about to start a new function or tool_call, 
    if there are unclosed functions, complete `</function>`.
    - When about to start a new tool_call, 
    if there are unclosed tool_calls, complete `</tool_call>`.
    """
    # First close unclosed parameters
    if self.current_param_name:
        self._end_element('parameter')

    # If about to start new function or tool_call,
    # and there are unclosed functions, close function first
    if incoming_tag in ('function',
                        'tool_call') and self.current_function_name:
        self._end_element('function')

    # If about to start new tool_call,
    # and there are unclosed tool_calls, close tool_call first
    if incoming_tag == 'tool_call' and self.current_call_id:
        self._end_element('tool_call')

_char_data

_char_data(data: str)

Handle XML character data events

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def _char_data(self, data: str):
    """Handle XML character data events"""
    if data and self.current_param_name:
        # If preprocessing stage determines deferred parsing is needed,
        # only cache character data, no streaming output
        if self.defer_current_parameter:
            original_data = data
            if self.should_emit_end_newline:
                original_data = '\n' + original_data
                self.should_emit_end_newline = False
            if original_data.endswith('\n'):
                self.should_emit_end_newline = True
                original_data = original_data[:-1]
            self.current_param_value += original_data
            return

        param_type = self._get_param_type(self.current_param_name)

        # Check if this is the first time receiving data for this parameter
        # If this is the first packet of data and starts with \n, remove \n
        if not self.current_param_value and data.startswith('\n'):
            data = data[1:]

        # Output start quote for string type (if not already output)
        if (param_type
                in ['string', 'str', 'text', 'varchar', 'char', 'enum']
                and not self.start_quote_emitted):
            quote_delta = DeltaMessage(tool_calls=[
                DeltaToolCall(index=self.tool_call_index - 1,
                              id=self.current_call_id,
                              type='function',
                              function=DeltaFunctionCall(name=None,
                                                         arguments='"'))
            ])
            self._emit_delta(quote_delta)
            self.start_quote_emitted = True

        if not data:
            return

        original_data = data
        # Delay output of trailing newline
        if self.should_emit_end_newline:
            original_data = '\n' + original_data
            self.should_emit_end_newline = False
        if original_data.endswith('\n'):
            self.should_emit_end_newline = True
            original_data = original_data[:-1]
        self.current_param_value += original_data

        # convert parameter value by param_type
        converted_value = self._convert_param_value(
            self.current_param_value, param_type)
        output_data = self._convert_for_json_streaming(
            converted_value, param_type)

        delta_data = output_data[len(self.current_param_value_converted):]
        self.current_param_value_converted = output_data

        delta = DeltaMessage(tool_calls=[
            DeltaToolCall(index=self.tool_call_index - 1,
                          id=self.current_call_id,
                          type='function',
                          function=DeltaFunctionCall(name=None,
                                                     arguments=delta_data))
        ])
        self._emit_delta(delta)

_convert_for_json_streaming

_convert_for_json_streaming(
    converted_value: Any, param_type: str
) -> str

Convert converted_value based on whether it's empty and if type is string Args: converted_value: Converted value param_type: Parameter type

Returns:

Type Description
str

Converted string for streaming output

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def _convert_for_json_streaming(self, converted_value: Any,
                                param_type: str) -> str:
    """Convert converted_value based on 
    whether it's empty and if type is string
    Args:
        converted_value: Converted value
        param_type: Parameter type

    Returns:
        Converted string for streaming output
    """
    # Check if value is empty, but exclude numeric 0
    if converted_value is None or converted_value == '':
        return ''

    if param_type in ['string', 'str', 'text', 'varchar', 'char', 'enum']:
        # String type, remove double quotes
        return json.dumps(converted_value, ensure_ascii=False)[1:-1]
    else:
        # Non-string type, return complete JSON string
        if not isinstance(converted_value, str):
            return json.dumps(converted_value, ensure_ascii=False)
        else:
            return converted_value

_convert_param_value

_convert_param_value(
    param_value: str, param_type: str
) -> Any

Convert value based on parameter type Args: param_value: Parameter value param_type: Parameter type

Returns:

Type Description
Any

Converted value

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def _convert_param_value(self, param_value: str, param_type: str) -> Any:
    """Convert value based on parameter type
    Args:
        param_value: Parameter value
        param_type: Parameter type

    Returns:
        Converted value
    """
    if param_value.lower() == 'null':
        return None

    param_type = param_type.strip().lower()
    if param_type in ['string', 'str', 'text', 'varchar', 'char', 'enum']:
        return param_value
    elif (param_type.startswith('int') or param_type.startswith('uint')
          or param_type.startswith('long')
          or param_type.startswith('short')
          or param_type.startswith('unsigned')):
        try:
            return int(param_value)
        except (ValueError, TypeError):
            logger.warning(
                "Parsed value '%s' of parameter '%s' is not an integer "
                "in tool '%s', degenerating to string.", param_value)
        return param_value
    elif param_type.startswith('num') or param_type.startswith('float'):
        try:
            float_param_value: float = float(param_value)
            return float_param_value if float_param_value - int(
                float_param_value) != 0 else int(float_param_value)
        except (ValueError, TypeError):
            logger.warning(
                "Parsed value '%s' of parameter '%s' is not a float "
                "in tool '%s', degenerating to string.", param_value)
        return param_value
    elif param_type in ['boolean', 'bool', 'binary']:
        param_value = param_value.lower()
        return param_value == 'true'
    else:
        return param_value

_emit_delta

_emit_delta(delta: DeltaMessage)

Emit Delta response (streaming output)

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def _emit_delta(self, delta: DeltaMessage):
    """Emit Delta response (streaming output)"""
    self.deltas.append(delta)

_end_element

_end_element(name: str)

Handle XML end element events

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def _end_element(self, name: str):
    """Handle XML end element events"""

    if name == 'root':
        return

    # If function or tool_call ends and there are still unclosed parameters,
    # complete parameter end first
    if (name.startswith('function') or name == 'function'
            or name == 'tool_call') and self.current_param_name:
        self._auto_close_open_parameter_if_needed()

    if (name.startswith('parameter')
            or name == 'parameter') and self.current_param_name:
        # End current parameter
        param_name = self.current_param_name
        param_value = self.current_param_value

        # If in deferred parsing mode,
        # perform overall parsing on raw content
        # accumulated in preprocessing stage and output once
        if self.defer_current_parameter:
            raw_text = self.deferred_param_raw_value \
            if self.deferred_param_raw_value else param_value
            parsed_value = None
            output_arguments = None
            try:
                # If previously delayed trailing newline,
                # add it back before parsing
                if self.should_emit_end_newline:
                    raw_for_parse = raw_text + '\n'
                else:
                    raw_for_parse = raw_text
                parsed_value = ast.literal_eval(raw_for_parse)
                output_arguments = json.dumps(parsed_value,
                                              ensure_ascii=False)
            except Exception:
                # Fallback: output as string as-is
                output_arguments = json.dumps(raw_text, ensure_ascii=False)
                parsed_value = raw_text

            delta = DeltaMessage(tool_calls=[
                DeltaToolCall(index=self.tool_call_index - 1,
                              id=self.current_call_id,
                              type='function',
                              function=DeltaFunctionCall(
                                  name=None, arguments=output_arguments))
            ])
            self._emit_delta(delta)

            # Clean up and store
            self.should_emit_end_newline = False
            self.parameters[param_name] = parsed_value
            self.current_param_name = None
            self.current_param_value = ""
            self.current_param_value_converted = ""
            self.start_quote_emitted = False
            self.defer_current_parameter = False
            self.deferred_param_raw_value = ""
            return

        param_type = self._get_param_type(param_name)

        # convert complete parameter value by param_type
        converted_value = self._convert_param_value(
            param_value, param_type)

        # Decide whether to add end quote based on parameter type
        if param_type in [
                'string', 'str', 'text', 'varchar', 'char', 'enum'
        ]:
            # For empty string parameters, need special handling
            if not param_value and not self.start_quote_emitted:
                # No start quote output,
                # directly output complete empty string
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.tool_call_index - 1,
                                  id=self.current_call_id,
                                  type='function',
                                  function=DeltaFunctionCall(
                                      name=None, arguments='""'))
                ])
                self._emit_delta(delta)
            else:
                # Non-empty parameter value, output end quote
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.tool_call_index - 1,
                                  id=self.current_call_id,
                                  type='function',
                                  function=DeltaFunctionCall(
                                      name=None, arguments='"'))
                ])
                self._emit_delta(delta)

        self.should_emit_end_newline = False
        # Store converted value
        self.parameters[param_name] = converted_value
        self.current_param_name = None
        self.current_param_value = ''
        self.current_param_value_converted = ''
        self.start_quote_emitted = False

    elif name.startswith('function') or name == 'function':
        # if there are parameters, close JSON object
        if self.parameters:
            delta = DeltaMessage(tool_calls=[
                DeltaToolCall(index=self.tool_call_index - 1,
                              id=self.current_call_id,
                              type='function',
                              function=DeltaFunctionCall(name=None,
                                                         arguments='}'))
            ])
            self._emit_delta(delta)
        # return empty object
        else:
            delta = DeltaMessage(tool_calls=[
                DeltaToolCall(index=self.tool_call_index - 1,
                              id=self.current_call_id,
                              type='function',
                              function=DeltaFunctionCall(name=None,
                                                         arguments='{}'))
            ])
            self._emit_delta(delta)
        self.current_function_open = False

    elif name == 'tool_call':
        # Before ending tool_call,
        # ensure function is closed to complete missing right brace
        if self.current_function_open:
            # If there are still unclosed parameters, close them first
            if self.current_param_name:
                self._end_element('parameter')
            # Close function, ensure output '}' or '{}'
            self._end_element('function')
        # Final Delta
        delta = DeltaMessage(tool_calls=[
            DeltaToolCall(index=self.tool_call_index - 1,
                          id=self.current_call_id,
                          type='function',
                          function=DeltaFunctionCall(name=None,
                                                     arguments=''))
        ])
        self._emit_delta(delta)

        # Check if there's text content to output (between tool_calls)
        if self.text_content_buffer.strip():
            text_delta = DeltaMessage(content=self.text_content_buffer)
            self._emit_delta(text_delta)

        self._reset_xml_parser_after_tool_call()

_escape_xml_special_chars

_escape_xml_special_chars(text: str) -> str

Escape XML special characters Args: text: Original text Returns: Escaped text

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def _escape_xml_special_chars(self, text: str) -> str:
    """
    Escape XML special characters
    Args:
        text: Original text
    Returns:
        Escaped text
    """
    xml_escapes = {
        '&': '&amp;',
        '<': '&lt;',
        '>': '&gt;',
        '"': '&quot;',
        "'": '&apos;'
    }

    for char, escape in xml_escapes.items():
        text = text.replace(char, escape)

    return text

_extract_function_name

_extract_function_name(
    name: str, attrs: dict[str, str]
) -> Optional[str]

Extract function name from various formats

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def _extract_function_name(self, name: str,
                           attrs: dict[str, str]) -> Optional[str]:
    """Extract function name from various formats"""
    if attrs and 'name' in attrs:
        return attrs['name']

    if '=' in name:
        parts = name.split('=', 1)
        if len(parts) == 2 and parts[0] == 'function':
            return parts[1]

    return None

_extract_parameter_name

_extract_parameter_name(
    name: str, attrs: dict[str, str]
) -> Optional[str]

Extract parameter name from various formats

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def _extract_parameter_name(self, name: str,
                            attrs: dict[str, str]) -> Optional[str]:
    """Extract parameter name from various formats"""
    if attrs and 'name' in attrs:
        return attrs['name']

    if '=' in name:
        parts = name.split('=', 1)
        if len(parts) == 2 and parts[0] == 'parameter':
            return parts[1]

    return None

_find_next_complete_element

_find_next_complete_element(
    start_pos: int,
) -> tuple[Optional[str], int]

Find next complete XML element from specified position

Parameters:

Name Type Description Default
start_pos int

Position to start searching

required

Returns:

Type Description
Optional[str]

(Complete element string, element end position),

int

returns (None, start_pos) if no complete element found

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def _find_next_complete_element(
        self, start_pos: int) -> tuple[Optional[str], int]:
    """
    Find next complete XML element from specified position

    Args:
        start_pos: Position to start searching

    Returns:
        (Complete element string, element end position), 
        returns (None, start_pos) if no complete element found
    """
    buffer = self.streaming_buffer[start_pos:]

    if not buffer:
        return None, start_pos

    if buffer.startswith('<'):
        # Need to ensure no new < appears,
        # find the nearest one between < and >
        tag_end = buffer.find('<', 1)
        tag_end2 = buffer.find('>', 1)
        if tag_end != -1 and tag_end2 != -1:
            # Next nearest is <
            if tag_end < tag_end2:
                return buffer[:tag_end], start_pos + tag_end
            # Next nearest is >, means found XML element
            else:
                return buffer[:tag_end2 + 1], start_pos + tag_end2 + 1
        elif tag_end != -1:
            return buffer[:tag_end], start_pos + tag_end
        elif tag_end2 != -1:
            return buffer[:tag_end2 + 1], start_pos + tag_end2 + 1
        else:
            # If currently not parsing tool calls (entering a tool_call),
            # check if starts with <tool_call>
            if self.current_call_id is None:
                # Check if might be start of <tool_call>
                if buffer == '<tool_call>'[:len(buffer)]:
                    # Might be start of <tool_call>, wait for more data
                    return None, start_pos
                else:
                    # Not start of <tool_call>, treat as text
                    return buffer, start_pos + len(buffer)
            else:
                # When parsing tool calls,
                # wait for more data to get complete tag
                return None, start_pos
    else:
        # Find text content (until next < or buffer end)
        next_tag_pos = buffer.find('<')
        if next_tag_pos != -1:
            # Found text content
            text_content = buffer[:next_tag_pos]
            return text_content, start_pos + next_tag_pos
        else:
            # Buffer end is all text, process
            # (no longer wait for more data)
            remaining = buffer
            return remaining, start_pos + len(remaining)

_get_next_call_id

_get_next_call_id()

Generate unique call ID

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def _get_next_call_id(self):
    """Generate unique call ID"""
    return f'call_{uuid.uuid4().hex[:24]}'

_get_param_type

_get_param_type(param_name: str) -> str

Get parameter type based on tool configuration, defaults to string Args: param_name: Parameter name

Returns:

Type Description
str

Parameter type

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def _get_param_type(self, param_name: str) -> str:
    """Get parameter type based on tool configuration, defaults to string
    Args:
        param_name: Parameter name

    Returns:
        Parameter type
    """
    if not self.tools or not self.current_function_name:
        return 'string'

    for tool in self.tools:
        if not hasattr(tool, 'type') or not (hasattr(
                tool, 'function') and hasattr(tool.function, 'name')):
            continue
        if tool.type == 'function' and \
            tool.function.name == self.current_function_name:
            if not hasattr(tool.function, 'parameters'):
                return 'string'
            params = tool.function.parameters
            if isinstance(params, dict) and 'properties' in params:
                properties = params['properties']
                if param_name in properties and isinstance(
                        properties[param_name], dict):
                    return self.repair_param_type(
                        str(properties[param_name].get('type', 'string')))
            elif isinstance(params, dict) and param_name in params:
                param_config = params[param_name]
                if isinstance(param_config, dict):
                    return self.repair_param_type(
                        str(param_config.get('type', 'string')))
            break
    return 'string'

_merge_new_deltas_to_single_response

_merge_new_deltas_to_single_response(
    initial_count: int,
) -> DeltaMessage

Merge newly generated deltas from this processing into a single DeltaMessage

Parameters:

Name Type Description Default
initial_count int

Delta count before processing

required

Returns:

Type Description
DeltaMessage

Merged DeltaMessage containing all newly generated delta information

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def _merge_new_deltas_to_single_response(
        self, initial_count: int) -> DeltaMessage:
    """
    Merge newly generated deltas from this processing
    into a single DeltaMessage

    Args:
        initial_count: Delta count before processing

    Returns:
        Merged DeltaMessage containing all newly generated delta information
    """
    if len(self.deltas) <= initial_count:
        return DeltaMessage(content=None)

    # Get newly generated deltas
    new_deltas = self.deltas[initial_count:]

    if len(new_deltas) == 1:
        # Only one new delta, return directly
        return new_deltas[0]

    # Merge multiple new deltas
    merged_tool_calls: list[DeltaToolCall] = []
    merged_content: str = ''

    for delta in new_deltas:
        if delta.content:
            merged_content += delta.content
        if delta.tool_calls:
            # For tool_calls, we need to intelligently merge arguments
            for tool_call in delta.tool_calls:
                # Find if there's already a tool_call with the same call_id
                existing_call = None
                for existing in merged_tool_calls:
                    if existing.id == tool_call.id:
                        existing_call = existing
                        break

                if existing_call and existing_call.function:
                    # Merge to existing tool_call
                    if tool_call.function and tool_call.function.name:
                        existing_call.function.name = \
                            tool_call.function.name
                    if tool_call.function \
                        and tool_call.function.arguments is not None:
                        if existing_call.function.arguments is None:
                            existing_call.function.arguments = ''

                        # For streaming JSON parameters,
                        # simply concatenate in order
                        new_args = tool_call.function.arguments
                        existing_call.function.arguments += new_args
                    if tool_call.type:
                        existing_call.type = tool_call.type
                else:
                    # Add new tool_call
                    merged_tool_calls.append(tool_call)

    return DeltaMessage(content=merged_content if merged_content else None,
                        tool_calls=merged_tool_calls)

_preprocess_xml_chunk

_preprocess_xml_chunk(chunk: str) -> str

Preprocess XML chunk, handle non-standard formats, and escape special characters

Parameters:

Name Type Description Default
chunk str

Original XML chunk

required

Returns:

Type Description
str

Processed XML chunk

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def _preprocess_xml_chunk(self, chunk: str) -> str:
    """
    Preprocess XML chunk, handle non-standard formats, 
    and escape special characters

    Args:
        chunk: Original XML chunk

    Returns:
        Processed XML chunk
    """

    # Check if this is a tool_call related element
    is_tool_call = False
    if chunk.startswith(self.tool_call_start_token) or chunk.startswith(
            self.tool_call_end_token):
        is_tool_call = True
    if chunk.startswith(self.function_start_token) or chunk.startswith(
            self.function_end_token):
        is_tool_call = True
    if chunk.startswith(self.parameter_start_token) or chunk.startswith(
            self.parameter_end_token):
        is_tool_call = True
    # Handle <function=name> format -> <function name="name">
    processed = re.sub(r'<function=([^>]+)>', r'<function name="\1">',
                       chunk)
    # Handle <parameter=name> format -> <parameter name="name">
    processed = re.sub(r'<parameter=([^>]+)>', r'<parameter name="\1">',
                       processed)

    original_chunk = chunk
    # If in parameter value accumulation mode
    if self._pre_inside_parameter:
        # Parameter end: output accumulated raw text
        # safely then return </parameter>
        if processed.startswith('</parameter>'):
            body_text = self._pre_param_buffer
            # Trigger deferred parsing mode
            # literal_eval+json output in end_element
            self.defer_current_parameter = True
            self.deferred_param_raw_value = body_text
            # Clean up state
            self._pre_inside_parameter = False
            self._pre_param_buffer = ""
            self._pre_current_param_name = None
            safe_text = self._escape_xml_special_chars(body_text)
            return f"{safe_text}</parameter>"
        else:
            # If this is the first block of content after entering parameter
            # evaluate if deferred parsing is needed;
            # If not needed, exit accumulation mode
            # and pass through directly
            if self._pre_param_buffer == "":
                # Get current parameter type
                param_type = self._get_param_type(
                    self._pre_current_param_name
                ) if self._pre_current_param_name else 'string'
                # Only these types need deferred parsing to
                # handle Python literals containing single quotes
                is_object_type = param_type in ["object"]
                is_complex_type = (param_type
                                   in ["array", "arr", "sequence"]
                                   or param_type.startswith("dict")
                                   or param_type.startswith("list"))

                # Only delay when contains container symbols
                # and has single quotes and is complex type
                has_container_hint = ('[' in original_chunk) or (
                    '{' in original_chunk) or ('(' in original_chunk)

                # Determine if deferred parsing is needed
                need_defer = False
                if is_complex_type:
                    # Complex type, always need deferred parsing
                    need_defer = True
                elif is_object_type and has_container_hint and (
                        "'" in original_chunk):
                    # Object type with container symbols
                    # and single quotes, need deferred parsing
                    need_defer = True

                if not need_defer:
                    # No need for deferred parsing,
                    # exit parameter mode directly
                    self._pre_inside_parameter = False
                    return self._escape_xml_special_chars(original_chunk)
            self._pre_param_buffer += original_chunk
            return ""

    # Parameter start: enable accumulation
    if processed.startswith('<parameter name='):
        m = re.match(r'<parameter name="([^"]+)">', processed)
        if m:
            self._pre_current_param_name = m.group(1)
        self._pre_inside_parameter = True
        self._pre_param_buffer = ""
        return processed

    # If processed doesn't contain special_token, escape processed
    # This is because XML parsing encounters special characters
    # and reports errors, so escaping is needed
    if not is_tool_call:
        processed = self._escape_xml_special_chars(processed)
    return processed

_process_complete_xml_elements

_process_complete_xml_elements() -> bool

Process complete XML elements in buffer

Returns:

Name Type Description
bool bool

Whether complete elements were found and processed

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def _process_complete_xml_elements(self) -> bool:
    """
    Process complete XML elements in buffer

    Returns:
        bool: Whether complete elements were found and processed
    """
    found_any = False

    while self.last_processed_pos < len(self.streaming_buffer):
        # Find next complete xml element
        element, end_pos = self._find_next_complete_element(
            self.last_processed_pos)
        if element is None:
            # No complete element found, wait for more data
            break

        # Check if this element should be skipped
        if self._should_skip_element(element):
            self.last_processed_pos = end_pos
            continue

        # Found complete XML element, process it
        try:
            preprocessed_element = self._preprocess_xml_chunk(element)
            # Check if this is the first tool_call start
            if ((preprocessed_element.strip().startswith('<tool_call>') or
                 preprocessed_element.strip().startswith('<function name=')
                 ) and self.tool_call_index
                    == 0) and self.text_content_buffer:
                # First tool_call starts,
                # output previously collected text content first
                text_delta = DeltaMessage(content=self.text_content_buffer)
                self._emit_delta(text_delta)
                # Clear buffer for potential subsequent text content
                self.text_content_buffer = ''

            # If a new tool_call starts and
            # there are already completed tool_calls
            if (preprocessed_element.strip().startswith('<tool_call>')
                    and self.tool_call_index > 0 and self.current_call_id):
                # Reset parser state but preserve generated deltas
                if self.current_param_name:
                    self._end_element('parameter')
                if self.current_function_open or self.current_function_name:
                    self._end_element('function')
                # Output final tool_call tail delta
                final_delta = DeltaMessage(
                    role=None,
                    content=None,
                    reasoning_content=None,
                    tool_calls=[
                        DeltaToolCall(index=self.tool_call_index - 1,
                                      id=self.current_call_id,
                                      type='function',
                                      function=DeltaFunctionCall(
                                          name=None, arguments=''))
                    ])
                self._emit_delta(final_delta)
                # Reset XML parser and current call state
                self._reset_xml_parser_after_tool_call()
            # Parse preprocessed element
            self.parser.Parse(preprocessed_element, False)
            found_any = True

        except Exception as e:
            logger.warning("Error when parsing XML elements: %s", e)

        # Update processed position
        self.last_processed_pos = end_pos

    return found_any

_reset_xml_parser_after_tool_call

_reset_xml_parser_after_tool_call()

Each tool_call is treated as a separate XML document, so we need to reset the parser after each tool_call.

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def _reset_xml_parser_after_tool_call(self):
    """
    Each tool_call is treated as a separate XML document, 
    so we need to reset the parser after each tool_call.
    """

    # recreate XML parser
    self.parser = ParserCreate()
    self.setup_parser()

    # Reset current tool_call state
    if self.current_call_id:
        self.last_completed_call_id = self.current_call_id
    self.current_call_id = None
    self.current_function_name = None
    self.current_function_open = False
    self.parameters = {}
    self.current_param_name = None
    self.current_param_value = ''
    self.current_param_value_converted = ''
    self.current_param_is_first = False
    self.should_emit_end_newline = False
    self.start_quote_emitted = False
    self.text_content_buffer = ''

    # Reset preprocessing and deferred parsing state
    self._pre_inside_parameter = False
    self._pre_param_buffer = ""
    self._pre_current_param_name = None
    self.defer_current_parameter = False
    self.deferred_param_raw_value = ""

_should_skip_element

_should_skip_element(element: str) -> bool

Determine whether an element should be skipped

Parameters:

Name Type Description Default
element str

Element to evaluate

required

Returns:

Name Type Description
bool bool

True means should skip, False means should process

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def _should_skip_element(self, element: str) -> bool:
    """
    Determine whether an element should be skipped

    Args:
        element: Element to evaluate

    Returns:
        bool: True means should skip, False means should process
    """

    # If it's a tool_call XML tag, don't skip
    if element.startswith(
            self.tool_call_start_token) or element.startswith(
                self.function_start_token) or element.startswith(
                    self.parameter_start_token):
        return False

    # If currently not parsing tool calls and not blank,
    # collect this text instead of skipping
    # Only process other XML elements after tool_call appears,
    # otherwise treat as plain text
    if self.current_call_id is None and element:
        # Collect text content to buffer
        self.text_content_buffer += element
        return True  # Still skip, but content has been collected

    # If currently parsing tool calls,
    # this might be parameter value, don't skip
    if self.current_call_id is not None:
        return False

    # Skip blank content
    return not element

_start_element

_start_element(name: str, attrs: dict[str, str])

Handle XML start element events

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def _start_element(self, name: str, attrs: dict[str, str]):
    """Handle XML start element events"""

    if name == 'root':
        return

    if name == 'tool_call':
        # Before opening new tool_call,
        # automatically complete previous unclosed tags
        self._auto_close_open_parameter_if_needed('tool_call')

        self.parameters = {}
        self.current_call_id = self._get_next_call_id()
        self.current_param_is_first = True
        self.tool_call_index += 1
    elif name.startswith('function') or (name == 'function'):
        # If missing tool_call, manually complete
        if not self.current_call_id:
            self._start_element('tool_call', {})
        # Before opening new function,
        # automatically complete previous unclosed tags (parameter/function)
        self._auto_close_open_parameter_if_needed('function')
        function_name = self._extract_function_name(name, attrs)
        self.current_function_name = function_name
        self.current_function_open = True
        if function_name:
            delta = DeltaMessage(tool_calls=[
                DeltaToolCall(index=self.tool_call_index - 1,
                              id=self.current_call_id,
                              type='function',
                              function=DeltaFunctionCall(
                                  name=function_name, arguments=''))
            ])
            self._emit_delta(delta)
    elif name.startswith('parameter') or (name == 'parameter'):
        # If previous parameter hasn't ended normally,
        # complete its end first, then start new parameter
        self._auto_close_open_parameter_if_needed('parameter')
        param_name = self._extract_parameter_name(name, attrs)
        self.current_param_name = param_name
        self.current_param_value = ''
        self.current_param_value_converted = ''
        self.start_quote_emitted = False  # Reset start quote flag

        # Only output parameter name and colon,
        # don't output quotes
        # decide after parameter value type is determined
        if param_name:
            if not self.parameters:
                # First parameter
                # start JSON, only output parameter name and colon
                json_start = f'{{"{param_name}": '
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.tool_call_index - 1,
                                  id=self.current_call_id,
                                  type='function',
                                  function=DeltaFunctionCall(
                                      name=None, arguments=json_start))
                ])
                self._emit_delta(delta)
                self.current_param_is_first = True
            else:
                # Subsequent parameters
                # add comma and parameter name, no quotes
                json_continue = f', "{param_name}": '
                delta = DeltaMessage(tool_calls=[
                    DeltaToolCall(index=self.tool_call_index - 1,
                                  id=self.current_call_id,
                                  type='function',
                                  function=DeltaFunctionCall(
                                      name=None, arguments=json_continue))
                ])
                self._emit_delta(delta)
                self.current_param_is_first = False

parse_single_streaming_chunks

parse_single_streaming_chunks(
    xml_chunk: str,
) -> DeltaMessage

Parse single streaming XML chunk and return Delta response This is the actual streaming interface that receives chunks one by one and maintains internal state

Parameters:

Name Type Description Default
xml_chunk str

Single XML chunk string

required

Returns: DeltaMessage: Contains delta information generated by this chunk, returns empty response if no complete elements

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def parse_single_streaming_chunks(self, xml_chunk: str) -> DeltaMessage:
    """
    Parse single streaming XML chunk and return Delta response
    This is the actual streaming interface that receives chunks 
    one by one and maintains internal state

    Args:
        xml_chunk: Single XML chunk string
    Returns:
        DeltaMessage: Contains delta information generated by this chunk, 
        returns empty response if no complete elements
    """
    # Record delta count before processing
    initial_delta_count = len(self.deltas)

    self.streaming_buffer += xml_chunk

    found_elements = self._process_complete_xml_elements()

    if found_elements:
        # If complete elements found, check if end events were missed
        # some tags may not have been triggered
        try:
            new_deltas = self.deltas[initial_delta_count:]
            # If this chunk contains </function>
            # but didn't generate '}', then complete it
            if (self.current_call_id is not None
                    and self.function_end_token in xml_chunk):

                # - Added '}' (non-empty parameter ending)
                # - Added '{}' (empty parameter function)
                has_function_close = any((td.tool_calls and any(
                    (tc.function and tc.id == self.current_call_id
                     and isinstance(tc.function.arguments, str) and
                     (tc.function.arguments in ('}', '{}')))
                    for tc in td.tool_calls)) for td in new_deltas)
                if not has_function_close:
                    # Close potentially unclosed element
                    if self.current_param_name:
                        self._end_element('parameter')
                    if self.current_function_name:
                        self._end_element('function')
            # If this chunk contains </tool_call>
            # but didn't generate final empty delta, then complete it
            if (self.current_call_id is not None
                    and self.tool_call_end_token in xml_chunk):
                has_toolcall_close = any((td.tool_calls and any(
                    (tc.type == 'function' and tc.function and tc.function.
                     arguments == '' and tc.id == self.current_call_id)
                    for tc in td.tool_calls)) for td in new_deltas)
                if not has_toolcall_close:
                    # Close potentially unclosed element
                    if self.current_param_name:
                        self._end_element('parameter')
                    if self.current_function_name:
                        self._end_element('function')
                    self._end_element('tool_call')
        except Exception as e:
            logger.warning("Error with fallback parsing: %s", e)
        # Merge newly generated deltas into single response
        result_delta = self._merge_new_deltas_to_single_response(
            initial_delta_count)
        return result_delta
    else:
        # No complete elements, check if there's unoutput text content
        if self.text_content_buffer and self.tool_call_index == 0:
            # Has text content but no tool_call yet, output text content
            text_delta = DeltaMessage(content=self.text_content_buffer)
            self._emit_delta(text_delta)
            # Clear buffer to avoid duplicate output
            self.text_content_buffer = ''
            return text_delta

        # If this chunk contains end tags but wasn't triggered by parser,
        # manually complete end events
        # Only execute when still on the same call as when entered,
        # to prevent accidentally closing new calls
        # in multi <tool_call> scenarios
        if (self.current_call_id is not None
                and (self.function_end_token in xml_chunk
                     or self.tool_call_end_token in xml_chunk)):
            # Close potentially unclosed element
            if self.current_param_name:
                self._end_element('parameter')
            if self.function_end_token in xml_chunk and \
                self.current_function_name:
                self._end_element('function')
            if self.tool_call_end_token in xml_chunk:
                self._end_element('tool_call')
            # Return the merged delta result generated by this fallback
            result_delta = self._merge_new_deltas_to_single_response(
                initial_delta_count)
            return result_delta

        # No complete elements, return empty response
        return DeltaMessage(content=None)

repair_param_type

repair_param_type(param_type: str) -> str

Repair unknown parameter types by treating them as string Args: param_type: Parameter type

Returns:

Type Description
str

Repaired parameter type

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def repair_param_type(self, param_type: str) -> str:
    """Repair unknown parameter types by treating them as string
    Args:
        param_type: Parameter type

    Returns:
        Repaired parameter type
    """
    if param_type in [
            'string', 'str', 'text', 'varchar', 'char', 'enum'
    ] or param_type.startswith('int') or param_type.startswith(
            'uint'
    ) or param_type.startswith('long') or param_type.startswith(
            'short'
    ) or param_type.startswith('unsigned') or param_type.startswith(
            'num') or param_type.startswith('float') or param_type in [
                'boolean', 'bool', 'binary'
            ] or (param_type in ["object", "array", "arr", "sequence"]
                  or param_type.startswith("dict")
                  or param_type.startswith("list")):
        return param_type
    else:
        return 'string'

reset_streaming_state

reset_streaming_state()

Reset streaming parsing state

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def reset_streaming_state(self):
    """Reset streaming parsing state"""

    self.deltas = []
    # state for streaming
    self.tool_call_index = 0
    self.current_call_id = None
    self.last_completed_call_id = None
    self.current_function_name = None
    self.current_function_open = False
    self.parameters = {}
    self.current_param_name = None
    self.current_param_value = ''
    self.current_param_value_converted = ''
    self.current_param_is_first = False
    self.should_emit_end_newline = False
    self.start_quote_emitted = False

    self.streaming_buffer = ''
    self.last_processed_pos = 0

    self.text_content_buffer = ''

    # state for preprocessing and deferred parsing
    self._pre_inside_parameter = False
    self._pre_param_buffer = ""
    self._pre_current_param_name = None
    self.defer_current_parameter = False
    self.deferred_param_raw_value = ""

    # recreate parser
    self.parser = ParserCreate()
    self.setup_parser()

set_tools

set_tools(
    tools: Union[list[ChatCompletionToolsParam], None],
)

Set tool configuration information

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def set_tools(self, tools: Union[list[ChatCompletionToolsParam], None]):
    """Set tool configuration information"""
    self.tools = tools

setup_parser

setup_parser()

Set up XML parser event handlers

Source code in vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
def setup_parser(self):
    """Set up XML parser event handlers"""
    self.parser.buffer_text = True
    self.parser.StartElementHandler = self._start_element
    self.parser.EndElementHandler = self._end_element
    self.parser.CharacterDataHandler = self._char_data