diff --git a/src/strands/models/openai.py b/src/strands/models/openai.py index fc2e9c778..9023d15b5 100644 --- a/src/strands/models/openai.py +++ b/src/strands/models/openai.py @@ -173,6 +173,57 @@ def format_request_tool_message(cls, tool_result: ToolResult) -> dict[str, Any]: "content": [cls.format_request_message_content(content) for content in contents], } + @classmethod + def _split_tool_message_images( + cls, tool_message: dict[str, Any] + ) -> tuple[dict[str, Any], Optional[dict[str, Any]]]: + """Split a tool message into text-only tool message and optional user message with images. + + OpenAI API restricts images to user role messages only. This method extracts any image + content from a tool message and returns it separately as a user message. + + Args: + tool_message: A formatted tool message that may contain images. + + Returns: + A tuple of (tool_message_without_images, user_message_with_images_or_None). + """ + if tool_message.get("role") != "tool": + return tool_message, None + + content = tool_message.get("content", []) + if not isinstance(content, list): + return tool_message, None + + # Separate image and non-image content + text_content = [] + image_content = [] + + for item in content: + if isinstance(item, dict) and item.get("type") == "image_url": + image_content.append(item) + else: + text_content.append(item) + + # If no images found, return original message + if not image_content: + return tool_message, None + + # Create tool message with only text content + # If no text content, add a simple success message + tool_message_clean = { + "role": "tool", + "tool_call_id": tool_message["tool_call_id"], + "content": text_content + if text_content + else [{"type": "text", "text": "Tool execution completed successfully."}], + } + + # Create user message with only images + user_message_with_images = {"role": "user", "content": image_content} + + return tool_message_clean, user_message_with_images + @classmethod def _format_request_tool_choice(cls, tool_choice: ToolChoice | None) -> dict[str, Any]: """Format a tool choice for OpenAI compatibility. @@ -234,7 +285,14 @@ def format_request_messages(cls, messages: Messages, system_prompt: Optional[str **({"tool_calls": formatted_tool_calls} if formatted_tool_calls else {}), } formatted_messages.append(formatted_message) - formatted_messages.extend(formatted_tool_messages) + + # Process tool messages to extract images into separate user messages + # OpenAI API requires images to be in user role messages only + for tool_msg in formatted_tool_messages: + tool_msg_clean, user_msg_with_images = cls._split_tool_message_images(tool_msg) + formatted_messages.append(tool_msg_clean) + if user_msg_with_images: + formatted_messages.append(user_msg_with_images) return [message for message in formatted_messages if message["content"] or "tool_calls" in message] diff --git a/tests/strands/models/test_openai.py b/tests/strands/models/test_openai.py index f8c8568fe..7fd7a87ed 100644 --- a/tests/strands/models/test_openai.py +++ b/tests/strands/models/test_openai.py @@ -176,6 +176,171 @@ def test_format_request_tool_message(): assert tru_result == exp_result +def test_split_tool_message_images_with_image(): + """Test that images are extracted from tool messages.""" + tool_message = { + "role": "tool", + "tool_call_id": "c1", + "content": [ + {"type": "text", "text": "Result"}, + { + "type": "image_url", + "image_url": {"url": "", "detail": "auto", "format": "image/png"}, + }, + ], + } + + tool_clean, user_with_image = OpenAIModel._split_tool_message_images(tool_message) + + # Tool message should only have text + assert tool_clean["role"] == "tool" + assert tool_clean["tool_call_id"] == "c1" + assert len(tool_clean["content"]) == 1 + assert tool_clean["content"][0]["type"] == "text" + + # User message should have the image + assert user_with_image is not None + assert user_with_image["role"] == "user" + assert len(user_with_image["content"]) == 1 + assert user_with_image["content"][0]["type"] == "image_url" + + +def test_split_tool_message_images_without_image(): + """Test that tool messages without images are unchanged.""" + tool_message = {"role": "tool", "tool_call_id": "c1", "content": [{"type": "text", "text": "Result"}]} + + tool_clean, user_with_image = OpenAIModel._split_tool_message_images(tool_message) + + assert tool_clean == tool_message + assert user_with_image is None + + +def test_split_tool_message_images_only_image(): + """Test tool message with only image content.""" + tool_message = { + "role": "tool", + "tool_call_id": "c1", + "content": [{"type": "image_url", "image_url": {"url": ""}}], + } + + tool_clean, user_with_image = OpenAIModel._split_tool_message_images(tool_message) + + # Tool message should have default text + assert tool_clean["role"] == "tool" + assert len(tool_clean["content"]) == 1 + assert "successfully" in tool_clean["content"][0]["text"].lower() + + # User message should have the image + assert user_with_image is not None + assert user_with_image["role"] == "user" + assert len(user_with_image["content"]) == 1 + + +def test_format_request_messages_with_tool_result_containing_image(): + """Test that tool results with images are properly split into tool and user messages.""" + messages = [ + { + "content": [{"text": "Run the tool"}], + "role": "user", + }, + { + "content": [ + { + "toolUse": { + "input": {}, + "name": "image_tool", + "toolUseId": "t1", + }, + }, + ], + "role": "assistant", + }, + { + "content": [ + { + "toolResult": { + "toolUseId": "t1", + "status": "success", + "content": [ + {"text": "Image generated"}, + { + "image": { + "format": "png", + "source": {"bytes": b"fake_image_data"}, + } + }, + ], + } + } + ], + "role": "user", + }, + ] + + formatted = OpenAIModel.format_request_messages(messages) + + # Find the tool message + tool_messages = [msg for msg in formatted if msg.get("role") == "tool"] + assert len(tool_messages) == 1 + + # Tool message should only have text content + tool_msg = tool_messages[0] + assert all(c.get("type") != "image_url" for c in tool_msg["content"]) + + # There should be a user message right after the tool message with the image + tool_msg_idx = formatted.index(tool_msg) + assert tool_msg_idx + 1 < len(formatted) + user_msg = formatted[tool_msg_idx + 1] + assert user_msg["role"] == "user" + assert any(c.get("type") == "image_url" for c in user_msg["content"]) + + +def test_format_request_messages_with_multiple_images_in_tool_result(): + """Test tool result with multiple images.""" + messages = [ + { + "content": [ + { + "toolResult": { + "toolUseId": "t1", + "status": "success", + "content": [ + {"text": "Two images generated"}, + { + "image": { + "format": "png", + "source": {"bytes": b"image1"}, + } + }, + { + "image": { + "format": "jpg", + "source": {"bytes": b"image2"}, + } + }, + ], + } + } + ], + "role": "user", + }, + ] + + formatted = OpenAIModel.format_request_messages(messages) + + # Find user message with images + user_image_msgs = [ + msg + for msg in formatted + if msg.get("role") == "user" and any(c.get("type") == "image_url" for c in msg.get("content", [])) + ] + assert len(user_image_msgs) == 1 + + # Should have both images + image_contents = [c for c in user_image_msgs[0]["content"] if c.get("type") == "image_url"] + assert len(image_contents) == 2 + + def test_format_request_tool_choice_auto(): tool_choice = {"auto": {}} diff --git a/tests_integ/models/test_model_openai.py b/tests_integ/models/test_model_openai.py index 115a0819d..193d79863 100644 --- a/tests_integ/models/test_model_openai.py +++ b/tests_integ/models/test_model_openai.py @@ -148,7 +148,6 @@ def test_structured_output_multi_modal_input(agent, yellow_img, yellow_color): assert tru_color == exp_color -@pytest.mark.skip("https://github.com/strands-agents/sdk-python/issues/320") def test_tool_returning_images(model, yellow_img): @tool def tool_with_image_return():