Skip to content

vllm.reasoning.deepseek_r1_reasoning_parser

DeepSeekR1ReasoningParser

Bases: BaseThinkingReasoningParser

Reasoning parser for DeepSeek R1 model.

The DeepSeek R1 model uses ... tokens to denote reasoning text. This parser extracts the reasoning content from the model output.

Source code in vllm/reasoning/deepseek_r1_reasoning_parser.py
@ReasoningParserManager.register_module("deepseek_r1")
class DeepSeekR1ReasoningParser(BaseThinkingReasoningParser):
    """
    Reasoning parser for DeepSeek R1 model.

    The DeepSeek R1 model uses <think>...</think> tokens to denote reasoning
    text. This parser extracts the reasoning content from the model output.
    """

    @property
    def start_token(self) -> str:
        """The token that starts reasoning content."""
        return "<think>"

    @property
    def end_token(self) -> str:
        """The token that ends reasoning content."""
        return "</think>"

    def extract_reasoning_content_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
    ) -> Union[DeltaMessage, None]:
        ret = super().extract_reasoning_content_streaming(
            previous_text,
            current_text,
            delta_text,
            previous_token_ids,
            current_token_ids,
            delta_token_ids,
        )
        if (ret is not None and self.start_token_id not in previous_token_ids
                and self.start_token_id not in delta_token_ids):
            if self.end_token_id in delta_token_ids:
                # end token in delta with more tokens,
                # extract reasoning content and content
                end_index = delta_text.find(self.end_token)
                reasoning_content = delta_text[:end_index]
                content = delta_text[end_index + len(self.end_token):]
                return DeltaMessage(
                    reasoning_content=reasoning_content,
                    content=content if content else None,
                )
            elif self.end_token_id in previous_token_ids:
                # end token in previous, thinking content ends
                return DeltaMessage(content=delta_text)
            else:
                # no end token in previous or delta, reasoning content continues
                return DeltaMessage(reasoning_content=delta_text)

        return ret

end_token property

end_token: str

The token that ends reasoning content.

start_token property

start_token: str

The token that starts reasoning content.

extract_reasoning_content_streaming

extract_reasoning_content_streaming(
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
) -> Union[DeltaMessage, None]
Source code in vllm/reasoning/deepseek_r1_reasoning_parser.py
def extract_reasoning_content_streaming(
    self,
    previous_text: str,
    current_text: str,
    delta_text: str,
    previous_token_ids: Sequence[int],
    current_token_ids: Sequence[int],
    delta_token_ids: Sequence[int],
) -> Union[DeltaMessage, None]:
    ret = super().extract_reasoning_content_streaming(
        previous_text,
        current_text,
        delta_text,
        previous_token_ids,
        current_token_ids,
        delta_token_ids,
    )
    if (ret is not None and self.start_token_id not in previous_token_ids
            and self.start_token_id not in delta_token_ids):
        if self.end_token_id in delta_token_ids:
            # end token in delta with more tokens,
            # extract reasoning content and content
            end_index = delta_text.find(self.end_token)
            reasoning_content = delta_text[:end_index]
            content = delta_text[end_index + len(self.end_token):]
            return DeltaMessage(
                reasoning_content=reasoning_content,
                content=content if content else None,
            )
        elif self.end_token_id in previous_token_ids:
            # end token in previous, thinking content ends
            return DeltaMessage(content=delta_text)
        else:
            # no end token in previous or delta, reasoning content continues
            return DeltaMessage(reasoning_content=delta_text)

    return ret