Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 59 additions & 1 deletion src/strands/models/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,57 @@ def format_request_tool_message(cls, tool_result: ToolResult) -> dict[str, Any]:
"content": [cls.format_request_message_content(content) for content in contents],
}

@classmethod
def _split_tool_message_images(
cls, tool_message: dict[str, Any]
) -> tuple[dict[str, Any], Optional[dict[str, Any]]]:
"""Split a tool message into text-only tool message and optional user message with images.

OpenAI API restricts images to user role messages only. This method extracts any image
content from a tool message and returns it separately as a user message.

Args:
tool_message: A formatted tool message that may contain images.

Returns:
A tuple of (tool_message_without_images, user_message_with_images_or_None).
"""
if tool_message.get("role") != "tool":
return tool_message, None

content = tool_message.get("content", [])
if not isinstance(content, list):
return tool_message, None

# Separate image and non-image content
text_content = []
image_content = []

for item in content:
if isinstance(item, dict) and item.get("type") == "image_url":
image_content.append(item)
else:
text_content.append(item)

# If no images found, return original message
if not image_content:
return tool_message, None

# Create tool message with only text content
# If no text content, add a simple success message
tool_message_clean = {
"role": "tool",
"tool_call_id": tool_message["tool_call_id"],
"content": text_content
if text_content
else [{"type": "text", "text": "Tool execution completed successfully."}],
}

# Create user message with only images
user_message_with_images = {"role": "user", "content": image_content}

return tool_message_clean, user_message_with_images

@classmethod
def _format_request_tool_choice(cls, tool_choice: ToolChoice | None) -> dict[str, Any]:
"""Format a tool choice for OpenAI compatibility.
Expand Down Expand Up @@ -234,7 +285,14 @@ def format_request_messages(cls, messages: Messages, system_prompt: Optional[str
**({"tool_calls": formatted_tool_calls} if formatted_tool_calls else {}),
}
formatted_messages.append(formatted_message)
formatted_messages.extend(formatted_tool_messages)

# Process tool messages to extract images into separate user messages
# OpenAI API requires images to be in user role messages only
for tool_msg in formatted_tool_messages:
tool_msg_clean, user_msg_with_images = cls._split_tool_message_images(tool_msg)
formatted_messages.append(tool_msg_clean)
if user_msg_with_images:
formatted_messages.append(user_msg_with_images)

return [message for message in formatted_messages if message["content"] or "tool_calls" in message]

Expand Down
165 changes: 165 additions & 0 deletions tests/strands/models/test_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,171 @@ def test_format_request_tool_message():
assert tru_result == exp_result


def test_split_tool_message_images_with_image():
"""Test that images are extracted from tool messages."""
tool_message = {
"role": "tool",
"tool_call_id": "c1",
"content": [
{"type": "text", "text": "Result"},
{
"type": "image_url",
"image_url": {"url": "", "detail": "auto", "format": "image/png"},
},
],
}

tool_clean, user_with_image = OpenAIModel._split_tool_message_images(tool_message)

# Tool message should only have text
assert tool_clean["role"] == "tool"
assert tool_clean["tool_call_id"] == "c1"
assert len(tool_clean["content"]) == 1
assert tool_clean["content"][0]["type"] == "text"

# User message should have the image
assert user_with_image is not None
assert user_with_image["role"] == "user"
assert len(user_with_image["content"]) == 1
assert user_with_image["content"][0]["type"] == "image_url"


def test_split_tool_message_images_without_image():
"""Test that tool messages without images are unchanged."""
tool_message = {"role": "tool", "tool_call_id": "c1", "content": [{"type": "text", "text": "Result"}]}

tool_clean, user_with_image = OpenAIModel._split_tool_message_images(tool_message)

assert tool_clean == tool_message
assert user_with_image is None


def test_split_tool_message_images_only_image():
"""Test tool message with only image content."""
tool_message = {
"role": "tool",
"tool_call_id": "c1",
"content": [{"type": "image_url", "image_url": {"url": ""}}],
}

tool_clean, user_with_image = OpenAIModel._split_tool_message_images(tool_message)

# Tool message should have default text
assert tool_clean["role"] == "tool"
assert len(tool_clean["content"]) == 1
assert "successfully" in tool_clean["content"][0]["text"].lower()

# User message should have the image
assert user_with_image is not None
assert user_with_image["role"] == "user"
assert len(user_with_image["content"]) == 1


def test_format_request_messages_with_tool_result_containing_image():
"""Test that tool results with images are properly split into tool and user messages."""
messages = [
{
"content": [{"text": "Run the tool"}],
"role": "user",
},
{
"content": [
{
"toolUse": {
"input": {},
"name": "image_tool",
"toolUseId": "t1",
},
},
],
"role": "assistant",
},
{
"content": [
{
"toolResult": {
"toolUseId": "t1",
"status": "success",
"content": [
{"text": "Image generated"},
{
"image": {
"format": "png",
"source": {"bytes": b"fake_image_data"},
}
},
],
}
}
],
"role": "user",
},
]

formatted = OpenAIModel.format_request_messages(messages)

# Find the tool message
tool_messages = [msg for msg in formatted if msg.get("role") == "tool"]
assert len(tool_messages) == 1

# Tool message should only have text content
tool_msg = tool_messages[0]
assert all(c.get("type") != "image_url" for c in tool_msg["content"])

# There should be a user message right after the tool message with the image
tool_msg_idx = formatted.index(tool_msg)
assert tool_msg_idx + 1 < len(formatted)
user_msg = formatted[tool_msg_idx + 1]
assert user_msg["role"] == "user"
assert any(c.get("type") == "image_url" for c in user_msg["content"])


def test_format_request_messages_with_multiple_images_in_tool_result():
"""Test tool result with multiple images."""
messages = [
{
"content": [
{
"toolResult": {
"toolUseId": "t1",
"status": "success",
"content": [
{"text": "Two images generated"},
{
"image": {
"format": "png",
"source": {"bytes": b"image1"},
}
},
{
"image": {
"format": "jpg",
"source": {"bytes": b"image2"},
}
},
],
}
}
],
"role": "user",
},
]

formatted = OpenAIModel.format_request_messages(messages)

# Find user message with images
user_image_msgs = [
msg
for msg in formatted
if msg.get("role") == "user" and any(c.get("type") == "image_url" for c in msg.get("content", []))
]
assert len(user_image_msgs) == 1

# Should have both images
image_contents = [c for c in user_image_msgs[0]["content"] if c.get("type") == "image_url"]
assert len(image_contents) == 2


def test_format_request_tool_choice_auto():
tool_choice = {"auto": {}}

Expand Down
1 change: 0 additions & 1 deletion tests_integ/models/test_model_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,6 @@ def test_structured_output_multi_modal_input(agent, yellow_img, yellow_color):
assert tru_color == exp_color


@pytest.mark.skip("https://github.com/strands-agents/sdk-python/issues/320")
def test_tool_returning_images(model, yellow_img):
@tool
def tool_with_image_return():
Expand Down