fix(ai): truncate messages for google genai (#4992)

shellmayr · web-flow · commit ee3629cec4b0 · 2025-10-23T17:27:21.000+02:00
diff --git a/sentry_sdk/integrations/google_genai/utils.py b/sentry_sdk/integrations/google_genai/utils.py
@@ -15,7 +15,11 @@
 )
 
 import sentry_sdk
-from sentry_sdk.ai.utils import set_data_normalized
+from sentry_sdk.ai.utils import (
+    set_data_normalized,
+    truncate_and_annotate_messages,
+    normalize_message_roles,
+)
 from sentry_sdk.consts import OP, SPANDATA
 from sentry_sdk.scope import should_send_default_pii
 from sentry_sdk.utils import (
@@ -462,12 +466,18 @@ def set_span_data_for_request(span, integration, model, contents, kwargs):
             messages.append({"role": "user", "content": contents_text})
 
         if messages:
-            set_data_normalized(
-                span,
-                SPANDATA.GEN_AI_REQUEST_MESSAGES,
-                messages,
-                unpack=False,
+            normalized_messages = normalize_message_roles(messages)
+            scope = sentry_sdk.get_current_scope()
+            messages_data = truncate_and_annotate_messages(
+                normalized_messages, span, scope
             )
+            if messages_data is not None:
+                set_data_normalized(
+                    span,
+                    SPANDATA.GEN_AI_REQUEST_MESSAGES,
+                    messages_data,
+                    unpack=False,
+                )
 
     # Extract parameters directly from config (not nested under generation_config)
     for param, span_key in [
diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py
@@ -905,3 +905,51 @@ def test_tool_calls_extraction(sentry_init, capture_events, mock_genai_client):
     assert tool_calls[1]["type"] == "function_call"
     # Arguments are serialized as JSON strings
     assert json.loads(tool_calls[1]["arguments"]) == {"timezone": "PST"}
+
+
+def test_google_genai_message_truncation(
+    sentry_init, capture_events, mock_genai_client
+):
+    """Test that large messages are truncated properly in Google GenAI integration."""
+    sentry_init(
+        integrations=[GoogleGenAIIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    large_content = (
+        "This is a very long message that will exceed our size limits. " * 1000
+    )
+    small_content = "This is a small user message"
+
+    mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
+
+    with mock.patch.object(
+        mock_genai_client._api_client, "request", return_value=mock_http_response
+    ):
+        with start_transaction(name="google_genai"):
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash",
+                contents=small_content,
+                config=create_test_config(
+                    system_instruction=large_content,
+                ),
+            )
+
+    (event,) = events
+    invoke_span = event["spans"][0]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"]
+
+    messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert isinstance(messages_data, str)
+
+    parsed_messages = json.loads(messages_data)
+    assert isinstance(parsed_messages, list)
+    assert len(parsed_messages) == 1
+    assert parsed_messages[0]["role"] == "user"
+    assert small_content in parsed_messages[0]["content"]
+
+    assert (
+        event["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 2
+    )