Skip to content

Commit 5e45fcd

Browse files
Merge pull request #13375 from colesmcintosh/fix/ollama-gpt-oss-thinking-field
2 parents 0b28930 + 8197fd7 commit 5e45fcd

File tree

2 files changed

+90
-3
lines changed

2 files changed

+90
-3
lines changed

litellm/llms/ollama/completion/transformation.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
ModelResponse,
2525
ModelResponseStream,
2626
ProviderField,
27+
StreamingChoices,
28+
Delta,
2729
)
2830

2931
from ..common_utils import OllamaError, _convert_image
@@ -423,7 +425,7 @@ def _handle_string_chunk(
423425
) -> Union[GenericStreamingChunk, ModelResponseStream]:
424426
return self.chunk_parser(json.loads(str_line))
425427

426-
def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
428+
def chunk_parser(self, chunk: dict) -> Union[GenericStreamingChunk, ModelResponseStream]:
427429
try:
428430
if "error" in chunk:
429431
raise Exception(f"Ollama Error - {chunk}")
@@ -459,6 +461,17 @@ def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
459461
finish_reason="stop",
460462
usage=None,
461463
)
464+
elif "thinking" in chunk and not chunk["response"]:
465+
# Return reasoning content as ModelResponseStream so UIs can render it
466+
thinking_content = chunk.get("thinking") or ""
467+
return ModelResponseStream(
468+
choices=[
469+
StreamingChoices(
470+
index=0,
471+
delta=Delta(reasoning_content=thinking_content),
472+
)
473+
]
474+
)
462475
else:
463476
raise Exception(f"Unable to parse ollama chunk - {chunk}")
464477
except Exception as e:

tests/test_litellm/llms/ollama/test_ollama_completion_transformation.py

Lines changed: 76 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,11 @@
1010
0, os.path.abspath("../../../../..")
1111
) # Adds the parent directory to the system path
1212

13-
from litellm.llms.ollama.completion.transformation import OllamaConfig
14-
from litellm.types.utils import Message, ModelResponse
13+
from litellm.llms.ollama.completion.transformation import (
14+
OllamaConfig,
15+
OllamaTextCompletionResponseIterator,
16+
)
17+
from litellm.types.utils import Message, ModelResponse, ModelResponseStream
1518

1619

1720
class TestOllamaConfig:
@@ -155,3 +158,74 @@ def test_transform_response_regular_json(self):
155158
assert result.choices[0]["message"].content == expected_content
156159
assert result.choices[0]["finish_reason"] == "stop"
157160
# No usage assertions here as we don't need to test them in every case
161+
162+
163+
class TestOllamaTextCompletionResponseIterator:
164+
def test_chunk_parser_with_thinking_field(self):
165+
"""Test that chunks with 'thinking' field and empty 'response' are handled correctly."""
166+
iterator = OllamaTextCompletionResponseIterator(
167+
streaming_response=iter([]), sync_stream=True, json_mode=False
168+
)
169+
170+
# Test chunk with thinking field - this is the problematic case from the issue
171+
chunk_with_thinking = {
172+
"model": "gpt-oss:20b",
173+
"created_at": "2025-08-06T14:34:31.5276077Z",
174+
"response": "",
175+
"thinking": "User",
176+
"done": False,
177+
}
178+
179+
result = iterator.chunk_parser(chunk_with_thinking)
180+
181+
# Should return a ModelResponseStream with reasoning content
182+
assert isinstance(result, ModelResponseStream)
183+
assert result.choices and result.choices[0].delta is not None
184+
assert getattr(result.choices[0].delta, "reasoning_content") == "User"
185+
186+
def test_chunk_parser_normal_response(self):
187+
"""Test that normal response chunks still work."""
188+
iterator = OllamaTextCompletionResponseIterator(
189+
streaming_response=iter([]), sync_stream=True, json_mode=False
190+
)
191+
192+
# Test normal chunk with response
193+
normal_chunk = {
194+
"model": "llama2",
195+
"created_at": "2025-08-06T14:34:31.5276077Z",
196+
"response": "Hello world",
197+
"done": False,
198+
}
199+
200+
result = iterator.chunk_parser(normal_chunk)
201+
202+
assert result["text"] == "Hello world"
203+
assert result["is_finished"] is False
204+
assert result["finish_reason"] == "stop"
205+
assert result["usage"] is None
206+
207+
def test_chunk_parser_done_chunk(self):
208+
"""Test that done chunks work correctly."""
209+
iterator = OllamaTextCompletionResponseIterator(
210+
streaming_response=iter([]), sync_stream=True, json_mode=False
211+
)
212+
213+
# Test done chunk
214+
done_chunk = {
215+
"model": "llama2",
216+
"created_at": "2025-08-06T14:34:31.5276077Z",
217+
"response": "",
218+
"done": True,
219+
"prompt_eval_count": 10,
220+
"eval_count": 5,
221+
}
222+
223+
result = iterator.chunk_parser(done_chunk)
224+
225+
assert result["text"] == ""
226+
assert result["is_finished"] is True
227+
assert result["finish_reason"] == "stop"
228+
assert result["usage"] is not None
229+
assert result["usage"]["prompt_tokens"] == 10
230+
assert result["usage"]["completion_tokens"] == 5
231+
assert result["usage"]["total_tokens"] == 15

0 commit comments

Comments
 (0)