Skip to content

Commit 814cd5a

Browse files
authored
fix(ai): introduce message truncation for openai (#4946)
1 parent ca4df94 commit 814cd5a

File tree

6 files changed

+445
-13
lines changed

6 files changed

+445
-13
lines changed

sentry_sdk/ai/utils.py

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
11
import json
2-
2+
from collections import deque
33
from typing import TYPE_CHECKING
4+
from sys import getsizeof
45

56
if TYPE_CHECKING:
6-
from typing import Any, Callable
7+
from typing import Any, Callable, Dict, List, Optional, Tuple
8+
79
from sentry_sdk.tracing import Span
810

911
import sentry_sdk
1012
from sentry_sdk.utils import logger
1113

14+
MAX_GEN_AI_MESSAGE_BYTES = 20_000 # 20KB
15+
1216

1317
class GEN_AI_ALLOWED_MESSAGE_ROLES:
1418
SYSTEM = "system"
@@ -95,3 +99,48 @@ def get_start_span_function():
9599
current_span is not None and current_span.containing_transaction is not None
96100
)
97101
return sentry_sdk.start_span if transaction_exists else sentry_sdk.start_transaction
102+
103+
104+
def _find_truncation_index(messages, max_bytes):
105+
# type: (List[Dict[str, Any]], int) -> int
106+
"""
107+
Find the index of the first message that would exceed the max bytes limit.
108+
Compute the individual message sizes, and return the index of the first message from the back
109+
of the list that would exceed the max bytes limit.
110+
"""
111+
running_sum = 0
112+
for idx in range(len(messages) - 1, -1, -1):
113+
size = len(json.dumps(messages[idx], separators=(",", ":")).encode("utf-8"))
114+
running_sum += size
115+
if running_sum > max_bytes:
116+
return idx + 1
117+
118+
return 0
119+
120+
121+
def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES):
122+
# type: (List[Dict[str, Any]], int) -> Tuple[List[Dict[str, Any]], int]
123+
serialized_json = json.dumps(messages, separators=(",", ":"))
124+
current_size = len(serialized_json.encode("utf-8"))
125+
126+
if current_size <= max_bytes:
127+
return messages, 0
128+
129+
truncation_index = _find_truncation_index(messages, max_bytes)
130+
return messages[truncation_index:], truncation_index
131+
132+
133+
def truncate_and_annotate_messages(
134+
messages, span, scope, max_bytes=MAX_GEN_AI_MESSAGE_BYTES
135+
):
136+
# type: (Optional[List[Dict[str, Any]]], Any, Any, int) -> Optional[List[Dict[str, Any]]]
137+
if not messages:
138+
return None
139+
140+
truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes)
141+
if removed_count > 0:
142+
scope._gen_ai_messages_truncated[span.span_id] = len(messages) - len(
143+
truncated_messages
144+
)
145+
146+
return truncated_messages

sentry_sdk/client.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,24 @@ def _prepare_event(
598598
if event_scrubber:
599599
event_scrubber.scrub_event(event)
600600

601+
if scope is not None and scope._gen_ai_messages_truncated:
602+
spans = event.get("spans", []) # type: List[Dict[str, Any]] | AnnotatedValue
603+
if isinstance(spans, list):
604+
for span in spans:
605+
span_id = span.get("span_id", None)
606+
span_data = span.get("data", {})
607+
if (
608+
span_id
609+
and span_id in scope._gen_ai_messages_truncated
610+
and SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data
611+
):
612+
span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES] = AnnotatedValue(
613+
span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES],
614+
{
615+
"len": scope._gen_ai_messages_truncated[span_id]
616+
+ len(span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES])
617+
},
618+
)
601619
if previous_total_spans is not None:
602620
event["spans"] = AnnotatedValue(
603621
event.get("spans", []), {"len": previous_total_spans}
@@ -606,6 +624,7 @@ def _prepare_event(
606624
event["breadcrumbs"] = AnnotatedValue(
607625
event.get("breadcrumbs", []), {"len": previous_total_breadcrumbs}
608626
)
627+
609628
# Postprocess the event here so that annotated types do
610629
# generally not surface in before_send
611630
if event is not None:

sentry_sdk/integrations/openai.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
from functools import wraps
2-
from collections.abc import Iterable
32

43
import sentry_sdk
54
from sentry_sdk import consts
65
from sentry_sdk.ai.monitoring import record_token_usage
7-
from sentry_sdk.ai.utils import set_data_normalized, normalize_message_roles
6+
from sentry_sdk.ai.utils import (
7+
set_data_normalized,
8+
normalize_message_roles,
9+
truncate_and_annotate_messages,
10+
)
811
from sentry_sdk.consts import SPANDATA
912
from sentry_sdk.integrations import DidNotEnable, Integration
1013
from sentry_sdk.scope import should_send_default_pii
@@ -18,7 +21,7 @@
1821
from typing import TYPE_CHECKING
1922

2023
if TYPE_CHECKING:
21-
from typing import Any, List, Optional, Callable, AsyncIterator, Iterator
24+
from typing import Any, Iterable, List, Optional, Callable, AsyncIterator, Iterator
2225
from sentry_sdk.tracing import Span
2326

2427
try:
@@ -189,9 +192,12 @@ def _set_input_data(span, kwargs, operation, integration):
189192
and integration.include_prompts
190193
):
191194
normalized_messages = normalize_message_roles(messages)
192-
set_data_normalized(
193-
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, normalized_messages, unpack=False
194-
)
195+
scope = sentry_sdk.get_current_scope()
196+
messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
197+
if messages_data is not None:
198+
set_data_normalized(
199+
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
200+
)
195201

196202
# Input attributes: Common
197203
set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai")

sentry_sdk/scope.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ class Scope:
188188
"_extras",
189189
"_breadcrumbs",
190190
"_n_breadcrumbs_truncated",
191+
"_gen_ai_messages_truncated",
191192
"_event_processors",
192193
"_error_processors",
193194
"_should_capture",
@@ -213,6 +214,7 @@ def __init__(self, ty=None, client=None):
213214
self._name = None # type: Optional[str]
214215
self._propagation_context = None # type: Optional[PropagationContext]
215216
self._n_breadcrumbs_truncated = 0 # type: int
217+
self._gen_ai_messages_truncated = {} # type: Dict[str, int]
216218

217219
self.client = NonRecordingClient() # type: sentry_sdk.client.BaseClient
218220

@@ -247,6 +249,7 @@ def __copy__(self):
247249

248250
rv._breadcrumbs = copy(self._breadcrumbs)
249251
rv._n_breadcrumbs_truncated = self._n_breadcrumbs_truncated
252+
rv._gen_ai_messages_truncated = self._gen_ai_messages_truncated.copy()
250253
rv._event_processors = self._event_processors.copy()
251254
rv._error_processors = self._error_processors.copy()
252255
rv._propagation_context = self._propagation_context
@@ -1583,6 +1586,8 @@ def update_from_scope(self, scope):
15831586
self._n_breadcrumbs_truncated = (
15841587
self._n_breadcrumbs_truncated + scope._n_breadcrumbs_truncated
15851588
)
1589+
if scope._gen_ai_messages_truncated:
1590+
self._gen_ai_messages_truncated.update(scope._gen_ai_messages_truncated)
15861591
if scope._span:
15871592
self._span = scope._span
15881593
if scope._attachments:

tests/integrations/openai/test_openai.py

Lines changed: 58 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import pytest
23

34
from sentry_sdk.utils import package_version
@@ -6,7 +7,6 @@
67
from openai import NOT_GIVEN
78
except ImportError:
89
NOT_GIVEN = None
9-
1010
try:
1111
from openai import omit
1212
except ImportError:
@@ -44,6 +44,9 @@
4444
OpenAIIntegration,
4545
_calculate_token_usage,
4646
)
47+
from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES
48+
from sentry_sdk._types import AnnotatedValue
49+
from sentry_sdk.serializer import serialize
4750

4851
from unittest import mock # python 3.3 and above
4952

@@ -1456,6 +1459,7 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools):
14561459

14571460
def test_openai_message_role_mapping(sentry_init, capture_events):
14581461
"""Test that OpenAI integration properly maps message roles like 'ai' to 'assistant'"""
1462+
14591463
sentry_init(
14601464
integrations=[OpenAIIntegration(include_prompts=True)],
14611465
traces_sample_rate=1.0,
@@ -1465,7 +1469,6 @@ def test_openai_message_role_mapping(sentry_init, capture_events):
14651469

14661470
client = OpenAI(api_key="z")
14671471
client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
1468-
14691472
# Test messages with mixed roles including "ai" that should be mapped to "assistant"
14701473
test_messages = [
14711474
{"role": "system", "content": "You are helpful."},
@@ -1476,11 +1479,9 @@ def test_openai_message_role_mapping(sentry_init, capture_events):
14761479

14771480
with start_transaction(name="openai tx"):
14781481
client.chat.completions.create(model="test-model", messages=test_messages)
1479-
1482+
# Verify that the span was created correctly
14801483
(event,) = events
14811484
span = event["spans"][0]
1482-
1483-
# Verify that the span was created correctly
14841485
assert span["op"] == "gen_ai.chat"
14851486
assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
14861487

@@ -1505,3 +1506,55 @@ def test_openai_message_role_mapping(sentry_init, capture_events):
15051506
# Verify no "ai" roles remain
15061507
roles = [msg["role"] for msg in stored_messages]
15071508
assert "ai" not in roles
1509+
1510+
1511+
def test_openai_message_truncation(sentry_init, capture_events):
1512+
"""Test that large messages are truncated properly in OpenAI integration."""
1513+
sentry_init(
1514+
integrations=[OpenAIIntegration(include_prompts=True)],
1515+
traces_sample_rate=1.0,
1516+
send_default_pii=True,
1517+
)
1518+
events = capture_events()
1519+
1520+
client = OpenAI(api_key="z")
1521+
client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
1522+
1523+
large_content = (
1524+
"This is a very long message that will exceed our size limits. " * 1000
1525+
)
1526+
large_messages = [
1527+
{"role": "system", "content": "You are a helpful assistant."},
1528+
{"role": "user", "content": large_content},
1529+
{"role": "assistant", "content": large_content},
1530+
{"role": "user", "content": large_content},
1531+
]
1532+
1533+
with start_transaction(name="openai tx"):
1534+
client.chat.completions.create(
1535+
model="some-model",
1536+
messages=large_messages,
1537+
)
1538+
1539+
(event,) = events
1540+
span = event["spans"][0]
1541+
assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
1542+
1543+
messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
1544+
assert isinstance(messages_data, str)
1545+
1546+
parsed_messages = json.loads(messages_data)
1547+
assert isinstance(parsed_messages, list)
1548+
assert len(parsed_messages) <= len(large_messages)
1549+
1550+
if "_meta" in event and len(parsed_messages) < len(large_messages):
1551+
meta_path = event["_meta"]
1552+
if (
1553+
"spans" in meta_path
1554+
and "0" in meta_path["spans"]
1555+
and "data" in meta_path["spans"]["0"]
1556+
):
1557+
span_meta = meta_path["spans"]["0"]["data"]
1558+
if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_meta:
1559+
messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES]
1560+
assert "len" in messages_meta.get("", {})

0 commit comments

Comments
 (0)