openvinotoolkit · pavel-esir · Sep 19, 2025 · Sep 23, 2025 · Sep 24, 2025 · Sep 25, 2025
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
@@ -782,7 +782,7 @@ jobs:
         run: |
           source ${{ env.INSTALL_DIR }}/setupvars.sh
           chmod +x ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching
-          ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching --gtest_filter="-AddSecondInputTest.*"
+          ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching --gtest_filter="-AddSecondInputTest.*" --gtest_filter="DeepSeekR1ReasoningParserTest.*" --gtest_filter="ParserTest.*"
 
       - name: Test Continuous Batching Tools
         if: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching }}

diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml
@@ -695,7 +695,7 @@ jobs:
         run: |
           source ${{ env.INSTALL_DIR }}/setupvars.sh
           chmod +x ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching
-          ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching --gtest_filter="-AddSecondInputTest.*"
+          ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching --gtest_filter="-AddSecondInputTest.*"  --gtest_filter="DeepSeekR1ReasoningParserTest.*" --gtest_filter="ParserTest.*
 
       - name: Test C++ Tools
         run: |

diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
@@ -866,7 +866,7 @@ jobs:
       - name: gtests unit tests
         run: |
           . "${{ env.INSTALL_DIR }}/setupvars.ps1"
-          & "${{ env.INSTALL_DIR }}/tests/tests_continuous_batching.exe" --gtest_filter="-AddSecondInputTest.*"
+          & "${{ env.INSTALL_DIR }}/tests/tests_continuous_batching.exe" --gtest_filter="-AddSecondInputTest.*"  --gtest_filter="DeepSeekR1ReasoningParserTest.*" --gtest_filter="ParserTest.*
 
       - name: Test C++ Tools
         run: |

diff --git a/samples/cpp/text_generation/CMakeLists.txt b/samples/cpp/text_generation/CMakeLists.txt
@@ -29,6 +29,7 @@ set (SAMPLE_LIST
     lora_greedy_causal_lm
     multinomial_causal_lm
     prompt_lookup_decoding_lm
+    parsed_output_sample
     speculative_decoding_lm)
 
 foreach(sample IN LISTS SAMPLE_LIST)

diff --git a/samples/cpp/text_generation/parsed_output_sample.cpp b/samples/cpp/text_generation/parsed_output_sample.cpp
@@ -0,0 +1,52 @@
+// Copyright (C) 2023-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "openvino/genai/llm_pipeline.hpp"
+#include "openvino/genai/parsers.hpp"
+#include "openvino/genai/text_streamer.hpp"
-#include "openvino/genai/text_streamer.hpp"
+#include "openvino/genai/text_streamer.hpp"
+#include "openvino/genai/parsing_state.hpp"
-#include "openvino/genai/text_streamer.hpp"
+#include "openvino/genai/text_streamer.hpp"
+#include "openvino/genai/parsing_state.hpp"
+
+
+class CurrentStreamer : public ov::genai::TextParserStreamer {
+private:
+public:
+    CurrentStreamer(const ov::genai::Tokenizer& tokenizer)
+        : ov::genai::TextParserStreamer(tokenizer) {}
+    ov::genai::StreamingStatus write(ov::genai::ParsedMessage& message) {
+       std::cout << message["content"].get_string() << std::flush;
+        return ov::genai::StreamingStatus::RUNNING;
+    }
+};
+
+
+int main(int argc, char* argv[]) try {
+    if (argc < 2 || argc > 3) {
+        throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> <DEVICE>");
+    }
+    // std::string prompt = "<｜begin▁of▁sentence｜><｜User｜>Please think of a dificcult task to solve x**2 + y**2 = 1<｜Assistant｜><think>";
+    std::string prompt = "<｜begin▁of▁sentence｜><｜User｜>Why is the Sky blue?<｜Assistant｜><think>";
+    std::string models_path = argv[1];
+
+    // Default device is CPU; can be overridden by the second argument
+    std::string device = (argc == 3) ? argv[2] : "CPU";  // GPU, NPU can be used as well
+    ov::genai::LLMPipeline pipe(models_path, device);
+
+    ov::genai::GenerationConfig config;
+    config.max_new_tokens = 1000;
+
+    auto tok = pipe.get_tokenizer();
+    std::shared_ptr<CurrentStreamer> streamer = std::make_shared<CurrentStreamer>(tok);
+
+    pipe.generate(prompt, config, streamer);
+
+
+} catch (const std::exception& error) {
+    try {
+        std::cerr << error.what() << '\n';
+    } catch (const std::ios_base::failure&) {}
+    return EXIT_FAILURE;
+} catch (...) {
+    try {
+        std::cerr << "Non-exception object thrown\n";
+    } catch (const std::ios_base::failure&) {}
+    return EXIT_FAILURE;
+}
diff --git a/samples/python/text_generation/chat_sample.py b/samples/python/text_generation/chat_sample.py
@@ -36,3 +36,36 @@ def main():
 
 if '__main__' == __name__:
     main()
+
+    pipe = openvino_genai.LLMPipeline(args.model_dir, device)
+
+    prompt = "What is the weather in New York today?"
+    res = pipe.generate(prompt, max_new_tokens=100, streamer=streamer)
+    print(res.texts[0])
+
+    res.parsed['tool_caling']
+
+    class LlamaToolCallParser(ParserBase):
+        def parse(self, parsed_data: ParsedData) -> ParsedData:
+            # parsed_data 
+            # process parsed_data 
+            # e.g. extract tool calls, or other fields from content
+            return new_parsed_output
+
+    llama_parser = LlamaToolCallParser()
+    res = pipe.generate(prompt, parsers=[llama_parser | "LLama3.2Pythonic"], max_new_tokens=100)
+
+# At the beginning msg['original_content'] is filled with full text
+msg = res.texts[i]
+for parser in m_parsers:
+    msg = parser.parse(msg)
+
+# At the end msg is filled with all parsed fields
+parsed_data = {
+    'original_content': '<|system|>You are a helpful assistant... I will call the `get_weather` function with the location… \n\nfunctools[{"name": "get_weather", "arguments": {"location": "New York", "unit": "celsius"}}]<|end|>',
+    'content': 'blah blah', 
+    'reasoning_content': '', 
+    'tool_calls': "[{\"name\":\"get_weather\",\"arguments\":{\"location\":\"New York, NY\",\"unit\":\"celsius\"}}]",
+}
+
+res.parsed: ParsedData
diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp
@@ -13,6 +13,7 @@
 #include "openvino/genai/tokenizer.hpp"
 #include "openvino/genai/scheduler_config.hpp"
 #include "openvino/genai/lora_adapter.hpp"
+#include "openvino/genai/parsers.hpp"
 
 namespace ov {
 namespace genai {
@@ -348,6 +349,9 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
     bool is_prompt_lookup() const;
     bool is_structured_output_generation() const;
 
+    // parsers
+    std::vector<std::variant<std::string, std::shared_ptr<ParserBase>>> parsers;
+
     OPENVINO_DEPRECATED("Please, use `is_assisting_generation()` instead of `is_speculative_decoding()`. This method will be removed in 2026.0.0 release")
     bool is_speculative_decoding() const;
 

diff --git a/src/cpp/include/openvino/genai/llm_pipeline.hpp b/src/cpp/include/openvino/genai/llm_pipeline.hpp
@@ -15,6 +15,7 @@
 #include "openvino/genai/perf_metrics.hpp"
 #include "openvino/genai/scheduler_config.hpp"
 #include "openvino/genai/common_types.hpp"
+#include "openvino/genai/json_container.hpp"
 
 namespace ov {
 namespace genai {
@@ -68,6 +69,8 @@ class DecodedResults {
     std::vector<float> scores;
     PerfMetrics perf_metrics;
     std::shared_ptr<ExtendedPerfMetrics> extended_perf_metrics;
+    // std::vector<ParsedMessage> parsed;
+    std::vector<JsonContainer> parsed;
 
     // @brief Convert DecodedResults to a string.
     operator std::string() const {

diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
@@ -0,0 +1,111 @@
+// Copyright (C) 2023-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+#include <string>
+#include <memory>
+#include <variant>
+#include <map>
+#include <functional>
+#include <optional>
+#include <vector>
+#include "openvino/genai/json_container.hpp"
+
+namespace ov {
+namespace genai {
+
+// TODO: will be converted to JSONLike object
+// using ParsedMessage = std::map<std::string, std::string>;
+using ParsedMessage = JsonContainer;
+
+class IncrementalParserBase {
+public:
+    IncrementalParserBase() = default;
+
+    // We return string which with filtered text to be added to content.
+    virtual std::string parse(
+        ParsedMessage& msg,
+        const std::string& previous_text, 
+        std::string& delta_text, 
+        const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
+        const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
+    ) = 0;
+
+    virtual bool is_active() const = 0;
+    static std::shared_ptr<IncrementalParserBase> get_parser(std::string name);
+};
+
+// Forward declaration
+class ReasoningParserImpl;
+
+class ReasoningParser : public IncrementalParserBase {
+private:
+    std::shared_ptr<ReasoningParserImpl> m_impl;
+public:
+    ReasoningParser(bool starts_with_thinking = true,
+                    bool keep_original_content = true);
+
+    std::string parse(
+        ParsedMessage& msg,
+        const std::string& previous_text, 
+        std::string& delta_text,
+        const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
+        const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
+    ) override;
+    bool is_active() const override;
+};
+
+class DeepSeekR1ReasoningParser : public ReasoningParser {
+public:
+    DeepSeekR1ReasoningParser(bool starts_with_thinking = true) : ReasoningParser(starts_with_thinking) {};
+    static std::string name() { return "DeepSeekR1ReasoningParser"; }
+};
+
+class Phi4ReasoningParser : public ReasoningParser {
+public:
+    Phi4ReasoningParser(bool starts_with_thinking = false) : ReasoningParser(starts_with_thinking) {};
+    static std::string name() { return "Phi4ReasoningParser"; }
+};
+
+class ParserBase {
+public:
+    ParserBase() = default;
+
+    virtual ParsedMessage parse(ParsedMessage& text) = 0;
+    static std::shared_ptr<ParserBase> get_parser(std::string name);
+};
+
+using ParserVariant = std::variant<std::shared_ptr<IncrementalParserBase>, std::string>;
+
+class Llama32PythonicToolParser : public ParserBase {
+// Does not modify original content, only extracts and adds tool calls
+public:
+    // TODO: Check that vLLM has the same default.
+    Llama32PythonicToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {}
+
+    ParsedMessage parse(ParsedMessage& input) override;
+    static std::string name() { return "Llama32PythonicToolParser"; }
+private:
+    bool m_keep_original_content = true;
+};
+
+class BaseReasoningParser : public ParserBase{
+public:
+    BaseReasoningParser(bool expect_open_tag = true, bool keep_original_content = true, std::string open_tag = "<think>", std::string close_tag = "</think>") :
+    m_expect_open_tag(expect_open_tag), 
+    m_keep_original_content(keep_original_content),
+    m_open_tag(open_tag), 
+    m_close_tag(close_tag) {}
+
+    ParsedMessage parse(ParsedMessage& input) override;
+
+private:
+    bool m_expect_open_tag = true;
+    bool m_keep_original_content = true;
+    std::string m_open_tag = "<think>";
+    std::string m_close_tag = "</think>";
+};
+
+
+}  // namespace genai
+}  // namespace ov
diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp
@@ -5,6 +5,7 @@
 
 #include "openvino/genai/streamer_base.hpp"
 #include "openvino/genai/tokenizer.hpp"
+#include "openvino/genai/parsers.hpp"
 
 namespace ov {
 namespace genai {
@@ -46,5 +47,21 @@ class OPENVINO_GENAI_EXPORTS TextStreamer : public StreamerBase {
     void compute_decoded_length_for_position(size_t cache_position);
 };
 
+class TextParserStreamer : public TextStreamer {
+public:
+    TextParserStreamer(const Tokenizer& tokenizer, std::vector<ParserVariant> parsers = {});
+
+    virtual StreamingStatus write(ParsedMessage& message) = 0;
+
+    CallbackTypeVariant write(std::string message);
+
+    ParsedMessage get_parsed_message() const { return m_parsed_message; }
+    std::vector<std::shared_ptr<IncrementalParserBase>> get_parsers() const { return m_parsers; }
+private:
+    ParsedMessage m_parsed_message;
+    std::string m_text_buffer;
+    std::vector<std::shared_ptr<IncrementalParserBase>> m_parsers;
+};
+
 }  // namespace genai
 }  // namespace ov
diff --git a/src/cpp/src/continuous_batching/pipeline.cpp b/src/cpp/src/continuous_batching/pipeline.cpp
@@ -58,7 +58,7 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::filesystem::p
 
     auto model = utils::read_model(models_path, properties);
     auto [properties_without_draft_model_without_gguf, enable_save_ov_model] = utils::extract_gguf_properties(properties_without_draft_model);
-    properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path;
+    // properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path;
     auto tokenizer = ov::genai::Tokenizer(models_path, tokenizer_properties);
     auto generation_config = utils::from_config_json_if_exists(models_path);
 
@@ -98,7 +98,7 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline(
 
     auto model = utils::read_model(models_path, properties_without_draft_model);
     auto [properties_without_draft_model_without_gguf, enable_save_ov_model] = utils::extract_gguf_properties(properties_without_draft_model);
-    properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path;
+    // properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path;
 
     auto generation_config = utils::from_config_json_if_exists(models_path);
 

diff --git a/src/cpp/src/generation_config.cpp b/src/cpp/src/generation_config.cpp
@@ -153,6 +153,7 @@ void GenerationConfig::update_generation_config(const ov::AnyMap& properties) {
 
     // Structured output
     read_anymap_param(properties, "structured_output_config", structured_output_config);
+    read_anymap_param(properties, "parsers", parsers);
 }
 
 

diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp
@@ -205,7 +205,53 @@ DecodedResults LLMPipeline::generate(
         StringInputs inputs,
         OptionalGenerationConfig generation_config,
         StreamerVariant streamer) {
-    return m_pimpl->generate(inputs, generation_config, streamer);
+    auto res = m_pimpl->generate(inputs, generation_config, streamer);
+
+    // If streamer is of StreamerBase type, and it is TextParserStreamer, get parsed message
+    if (auto streamer_obj = std::get_if<std::shared_ptr<StreamerBase>>(&streamer)) {
+        if (auto parser_streamer = std::dynamic_pointer_cast<TextParserStreamer>(*streamer_obj)) {
+            res.parsed.resize(res.texts.size());
+            res.parsed[0] = parser_streamer->get_parsed_message();
+        }
+    }
+
+    if (!generation_config.has_value() || (*generation_config).parsers.empty()) {
+        return res;
+    }
+
+    std::vector<std::shared_ptr<ParserBase>> parsers;
+    if (generation_config.has_value() && !(*generation_config).parsers.empty()) {
+        for (auto& parser_variant : (*generation_config).parsers) {
+            if (std::holds_alternative<std::string>(parser_variant)) {
+                auto parser_name = std::get<std::string>(parser_variant);
+                auto parser = ParserBase::get_parser(parser_name);
+                if (!parser) {
+                    OPENVINO_THROW("Parser with name ", parser_name, " is not registered");
+                }
+                parsers.push_back(parser);
+            } else if (std::holds_alternative<std::shared_ptr<ParserBase>>(parser_variant)) {
+                auto parser = std::get<std::shared_ptr<ParserBase>>(parser_variant);
+                parsers.push_back(parser);
+            }
+        }
+    }
+
+    res.parsed.resize(res.texts.size());
+
+    // Apply Base parsers sequentially even if IncrementalParser has run.
+    if (!parsers.empty()) {
+        for (size_t i = 0; i < res.texts.size(); ++i) {
+            ParsedMessage msg;
+            msg["content"] = res.texts[i];
+            for (auto& parser: parsers) {
+                // TODO: check if is_active() is needed here
+                // TODO: Check the state of incremental parser and reset if necessary
+                msg = parser->parse(msg);
+            }
+        }
+    }
+
+    return res;
 }
 
 DecodedResults LLMPipeline::generate(StringInputs text, const ov::AnyMap& config_map) {
-Original file line number
+Diff line change
@@ Expand Up @@
         // Structured output
         read_anymap_param(properties, "structured_output_config", structured_output_config);
+        read_anymap_param(properties, "parsers", parsers);
     }
@@ Expand Down @@