Skip to content

Commit d71fc3f

Browse files
committed
Fix pytests
1 parent 2be242f commit d71fc3f

File tree

3 files changed

+114
-52
lines changed

3 files changed

+114
-52
lines changed

src/cpp/src/parsers.cpp

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
2525
* @brief Ensure required fields exist in the message container.
2626
*/
2727
void ensure_message_fields(JsonContainer& message) {
28-
if (!message.contains("reasoning_content")) {
28+
// if (!message.contains("reasoning_content")) {
2929
message["reasoning_content"] = "";
30-
}
31-
if (!message.contains("content")) {
30+
// }
31+
// if (!message.contains("content")) {
3232
message["content"] = "";
33-
}
33+
// }
3434
}
3535

3636
/**
@@ -59,11 +59,15 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
5959
* @brief Handle the case where both open and close tags are found in the same chunk.
6060
*/
6161
void handle_complete_reasoning(JsonContainer& message, std::string_view txt_chunk,
62-
size_t open_idx, size_t close_idx) {
62+
size_t open_idx, size_t close_idx, std::string& delta_text) {
6363
// Extract reasoning content between tags
6464
message["reasoning_content"] = std::string(txt_chunk.substr(open_idx + m_open_tag.size(), close_idx - (open_idx + m_open_tag.size())));
6565
message["content"] = std::string(txt_chunk.substr(close_idx + m_close_tag.size()));
6666

67+
if (!m_keep_original_content) {
68+
delta_text = std::string(txt_chunk.substr(close_idx + m_close_tag.size()));
69+
}
70+
6771
m_think_tag_opened = false;
6872
m_deactivated = true;
6973
m_text_cache.clear();
@@ -73,10 +77,15 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
7377
* @brief Handle the case where only the open tag is found.
7478
*/
7579
void handle_open_tag(JsonContainer& message, std::string& reason_str,
76-
std::string_view txt_chunk, size_t open_idx) {
80+
std::string_view txt_chunk, size_t open_idx, std::string& delta_text) {
7781
// Start accumulating reasoning content
78-
reason_str.append(txt_chunk.substr(open_idx + m_open_tag.size()));
79-
message["reasoning_content"] = std::move(reason_str);
82+
message["reasoning_content"] = std::string(txt_chunk.substr(open_idx + m_open_tag.size()));
83+
84+
if (!m_keep_original_content) {
85+
delta_text.clear();
86+
} else {
87+
delta_text = txt_chunk;
88+
}
8089

8190
m_think_tag_opened = true;
8291
m_text_cache.clear();
@@ -85,10 +94,21 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
8594
/**
8695
* @brief Handle the case where the close tag is found.
8796
*/
88-
void handle_close_tag(JsonContainer& message, std::string_view txt_chunk, size_t close_idx) {
97+
void handle_close_tag(JsonContainer& message, std::string_view txt_chunk, size_t close_idx, std::string& delta_text) {
8998
// Append text before close tag to reasoning content
9099
message["reasoning_content"] = std::move(std::string(txt_chunk.substr(0, close_idx)));
91-
message["content"] = std::string(txt_chunk.substr(close_idx + m_close_tag.size()));;
100+
auto content = std::string(txt_chunk.substr(close_idx + m_close_tag.size()));
101+
message["content"] = content;
102+
103+
if (!m_keep_original_content) {
104+
// Despite the fact that we put txt_chung to delta_text it's correct.
105+
// Since txt_chunk contains some cached parts from the previous calls that were not yet processed yet
106+
// and we kept them in cache until we decide what to do with them. Here we definitely know that that cached parts
107+
// belonged to reasoning_content so we can discard them.
108+
delta_text = content;
109+
} else {
110+
delta_text = txt_chunk;
111+
}
92112

93113
m_text_cache.clear();
94114
m_think_tag_opened = false;
@@ -98,21 +118,24 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
98118
/**
99119
* @brief Handle accumulating text while inside reasoning tags.
100120
*/
101-
void handle_inside_reasoning(JsonContainer& message, std::string& reason_str, std::string_view txt_chunk) {
121+
void handle_inside_reasoning(JsonContainer& message, std::string& reason_str, std::string_view txt_chunk, std::string& delta_text) {
102122
// Find if the end of txt_chunk might be the start of a close tag
103123
const size_t num_chars_to_keep = find_close_tag_prefix_length(txt_chunk);
104124

105125
if (num_chars_to_keep > 0) {
106126
// Keep potential partial close tag in cache
107127
m_text_cache = std::string(txt_chunk.substr(txt_chunk.size() - num_chars_to_keep));
108-
reason_str.append(txt_chunk.substr(0, txt_chunk.size() - num_chars_to_keep));
128+
reason_str = txt_chunk.substr(0, txt_chunk.size() - num_chars_to_keep);
129+
delta_text = std::string(txt_chunk.substr(0, txt_chunk.size() - num_chars_to_keep));
109130
} else {
110131
// No partial close tag, accumulate all text
111-
reason_str.append(txt_chunk);
132+
reason_str = txt_chunk;
112133
m_text_cache.clear();
113134
}
114-
115135
message["reasoning_content"] = std::move(reason_str);
136+
if (!m_keep_original_content) {
137+
delta_text.clear();
138+
}
116139
}
117140

118141
public:
@@ -132,6 +155,7 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
132155
std::string& delta_text,
133156
const std::optional<std::vector<int64_t>>& delta_tokens
134157
) {
158+
ensure_message_fields(message);
135159
if (m_deactivated) {
136160
message["content"] = delta_text;
137161
return delta_text;
@@ -141,10 +165,9 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
141165
}
142166
m_first_run = false;
143167

144-
ensure_message_fields(message);
145168

146-
const std::string txt_chunk = m_text_cache + delta_text;
147-
std::string reason_str = std::move(message["reasoning_content"].get_string());
169+
std::string txt_chunk = m_text_cache + delta_text;
170+
std::string reason_str = message.contains("reasoning_content") ? std::move(message["reasoning_content"].get_string()) : "";
148171

149172
// Cache find() results to avoid redundant searches
150173
const auto open_idx = txt_chunk.find(m_open_tag);
@@ -156,18 +179,19 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
156179
? close_idx : std::string::npos;
157180

158181
if (close_idx_after_open != std::string::npos) {
159-
handle_complete_reasoning(message, txt_chunk, open_idx, close_idx_after_open);
182+
handle_complete_reasoning(message, txt_chunk, open_idx, close_idx_after_open, delta_text);
160183
} else {
161-
handle_open_tag(message, reason_str, txt_chunk, open_idx);
184+
handle_open_tag(message, reason_str, txt_chunk, open_idx, delta_text);
162185
}
163186
} else if (m_think_tag_opened && close_idx != std::string::npos) {
164-
handle_close_tag(message, txt_chunk, close_idx);
187+
handle_close_tag(message, txt_chunk, close_idx, delta_text);
165188
} else if (m_think_tag_opened) {
166-
handle_inside_reasoning(message, reason_str, txt_chunk);
189+
handle_inside_reasoning(message, reason_str, txt_chunk, delta_text);
167190
} else {
168191
// Think tag was not opened yet and not found in the current delta_text.
169192
// Accumulate text in the cache to detect if <think> is split between several delta_text pieces.
170193
m_text_cache += delta_text;
194+
delta_text.clear();
171195
}
172196

173197
return delta_text;

src/cpp/src/text_streamer.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -144,15 +144,14 @@ TextParserStreamerImpl(std::vector<std::shared_ptr<IncrementalParser>> parsers)
144144

145145
};
146146

147-
void concatenate_json_containers(JsonContainer& from, const JsonContainer& to, std::vector<std::string> keys_to_concatenate) {
147+
void concatenate_json_containers(const JsonContainer& from, JsonContainer& to, std::vector<std::string> keys_to_concatenate) {
148148
for (const auto& key : keys_to_concatenate) {
149149
if (to.contains(key) && from.contains(key)) {
150150
// If both are strings, concatenate
151151
if (to[key].is_string() && from[key].is_string()) {
152152
to[key] = to[key].get_string() + from[key].get_string();
153153
}
154154
} else if (from.contains(key)) {
155-
auto r = from[key];
156155
to[key] = from[key];
157156
}
158157
}
@@ -161,7 +160,9 @@ void concatenate_json_containers(JsonContainer& from, const JsonContainer& to, s
161160
TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector<std::shared_ptr<IncrementalParser>> parsers)
162161
: TextStreamer(tokenizer, [this](std::string s) -> CallbackTypeVariant {
163162
return this->write(s);
164-
}), m_pimpl{std::make_unique<TextParserStreamerImpl>(parsers)} {}
163+
}), m_pimpl{std::make_unique<TextParserStreamerImpl>(parsers)} {
164+
m_pimpl->m_parsed_message["content"] = "";
165+
}
165166

166167
CallbackTypeVariant TextParserStreamer::write(std::string message) {
167168
// When 'write' is called with string, it means new chunk of tokens is decoded into text
@@ -199,10 +200,18 @@ CallbackTypeVariant TextParserStreamer::write(std::string message) {
199200
// Message can be modified inside parser, if parser for example extracted tool calling from message content
200201
}
201202

203+
// std::cout << msg["content"].get_string() << std::endl;
204+
// std::cout << msg["reasoning_content"].get_string() << std::endl;
205+
202206
// concatenate msg with m_parsed_message
203-
concatenate_json_containers(msg, m_pimpl->m_parsed_message, {"content", "reasoning_content"});
207+
concatenate_json_containers(msg, m_pimpl->m_parsed_message, {"reasoning_content"});
208+
209+
// We have to put into DeltaMessage's "content" fields only chunks that belong to content.
210+
// But into m_parsed_message["content"] we need to accumulate full content if m_keep_original_content == True
211+
// and if m_keep_original_content == False only part that is outside reasoning tags and outside tool calls.
212+
213+
m_pimpl->m_parsed_message["content"] = m_pimpl->m_parsed_message["content"].get_string() + message;
204214

205-
// return write(m_pimpl->m_parsed_message);
206215
return write(msg);
207216
}
208217

@@ -212,6 +221,7 @@ JsonContainer TextParserStreamer::get_parsed_message() const {
212221

213222
void TextParserStreamer::reset() {
214223
m_pimpl->m_parsed_message = JsonContainer();
224+
m_pimpl->m_parsed_message["content"] = "";
215225
for (auto& parser : m_pimpl->m_parsers) {
216226
parser->reset();
217227
}

0 commit comments

Comments
 (0)