1313from pydantic import BaseModel
1414from typing_extensions import Unpack , override
1515
16+ from ..tools import convert_pydantic_to_tool_spec
1617from ..types .content import ContentBlock , Messages
1718from ..types .exceptions import ContextWindowOverflowException
1819from ..types .streaming import StreamEvent
@@ -202,6 +203,10 @@ async def structured_output(
202203 ) -> AsyncGenerator [dict [str , Union [T , Any ]], None ]:
203204 """Get structured output from the model.
204205
206+ Some models do not support native structured output via response_format.
207+ In cases of proxies, we may not have a way to determine support, so we
208+ fallback to using tool calling to achieve structured output.
209+
205210 Args:
206211 output_model: The output model to use for the agent.
207212 prompt: The prompt messages to use for the agent.
@@ -211,42 +216,69 @@ async def structured_output(
211216 Yields:
212217 Model events with the last being the structured output.
213218 """
214- supports_schema = supports_response_schema (self .get_config ()["model_id" ])
219+ if supports_response_schema (self .get_config ()["model_id" ]):
220+ logger .debug ("structuring output using response schema" )
221+ result = await self ._structured_output_using_response_schema (output_model , prompt , system_prompt )
222+ else :
223+ logger .debug ("model does not support response schema, structuring output using tool approach" )
224+ result = await self ._structured_output_using_tool (output_model , prompt , system_prompt )
225+
226+ yield {"output" : result }
227+
228+ async def _structured_output_using_response_schema (
229+ self , output_model : Type [T ], prompt : Messages , system_prompt : Optional [str ] = None
230+ ) -> T :
231+ """Get structured output using native response_format support."""
232+ response = await litellm .acompletion (
233+ ** self .client_args ,
234+ model = self .get_config ()["model_id" ],
235+ messages = self .format_request (prompt , system_prompt = system_prompt )["messages" ],
236+ response_format = output_model ,
237+ )
215238
216- # If the provider does not support response schemas, we cannot reliably parse structured output.
217- # In that case we must not call the provider and must raise the documented ValueError.
218- if not supports_schema :
219- raise ValueError ("Model does not support response_format " )
239+ if len ( response . choices ) > 1 :
240+ raise ValueError ( "Multiple choices found in the response." )
241+ if not response . choices or response . choices [ 0 ]. finish_reason != "tool_calls" :
242+ raise ValueError ("No tool_calls found in response " )
220243
221- # For providers that DO support response schemas, call litellm and map context-window errors.
244+ choice = response . choices [ 0 ]
222245 try :
223- response = await litellm .acompletion (
224- ** self .client_args ,
225- model = self .get_config ()["model_id" ],
226- messages = self .format_request (prompt , system_prompt = system_prompt )["messages" ],
227- response_format = output_model ,
228- )
246+ # Parse the message content as JSON
247+ tool_call_data = json .loads (choice .message .content )
248+ # Instantiate the output model with the parsed data
249+ return output_model (** tool_call_data )
229250 except ContextWindowExceededError as e :
230251 logger .warning ("litellm client raised context window overflow in structured_output" )
231252 raise ContextWindowOverflowException (e ) from e
253+ except (json .JSONDecodeError , TypeError , ValueError ) as e :
254+ raise ValueError (f"Failed to parse or load content into model: { e } " ) from e
255+
256+ async def _structured_output_using_tool (
257+ self , output_model : Type [T ], prompt : Messages , system_prompt : Optional [str ] = None
258+ ) -> T :
259+ """Get structured output using tool calling fallback."""
260+ tool_spec = convert_pydantic_to_tool_spec (output_model )
261+ request = self .format_request (prompt , [tool_spec ], system_prompt , cast (ToolChoice , {"any" : {}}))
262+ args = {** self .client_args , ** request , "stream" : False }
263+ response = await litellm .acompletion (** args )
232264
233265 if len (response .choices ) > 1 :
234266 raise ValueError ("Multiple choices found in the response." )
267+ if not response .choices or response .choices [0 ].finish_reason != "tool_calls" :
268+ raise ValueError ("No tool_calls found in response" )
235269
236- # Find the first choice with tool_calls
237- for choice in response .choices :
238- if choice .finish_reason == "tool_calls" :
239- try :
240- # Parse the tool call content as JSON
241- tool_call_data = json .loads (choice .message .content )
242- # Instantiate the output model with the parsed data
243- yield {"output" : output_model (** tool_call_data )}
244- return
245- except (json .JSONDecodeError , TypeError , ValueError ) as e :
246- raise ValueError (f"Failed to parse or load content into model: { e } " ) from e
247-
248- # If no tool_calls found, raise an error
249- raise ValueError ("No tool_calls found in response" )
270+ choice = response .choices [0 ]
271+ try :
272+ # Parse the tool call content as JSON
273+ tool_call = choice .message .tool_calls [0 ]
274+ tool_call_data = json .loads (tool_call .function .arguments )
275+ # Instantiate the output model with the parsed data
276+ return output_model (** tool_call_data )
277+ except ContextWindowExceededError as e :
278+ logger .warning ("litellm client raised context window overflow in structured_output" )
279+ raise ContextWindowOverflowException (e ) from e
280+ except (json .JSONDecodeError , TypeError , ValueError ) as e :
281+ raise ValueError (f"Failed to parse or load content into model: { e } " ) from e
250282
251283 def _apply_proxy_prefix (self ) -> None :
252284 """Apply litellm_proxy/ prefix to model_id when use_litellm_proxy is True.
0 commit comments