feat(agent): Add opt-in flag to include tool specs in traces for evaluation

Ratish1 · Ratish1 · commit 1e709948104c · 2025-11-04T22:45:10.000+04:00
diff --git a/src/strands/agent/agent.py b/src/strands/agent/agent.py
@@ -658,7 +658,10 @@ async def stream_async(
         # Process input and get message to add (if any)
         messages = self._convert_prompt_to_messages(prompt)
 
-        self.trace_span = self._start_agent_trace_span(messages)
+        self.trace_span = self._start_agent_trace_span(
+            messages,
+            all_tools_config=self.tool_registry.get_all_tools_config() or {},
+        )
 
         with trace_api.use_span(self.trace_span):
             try:
@@ -922,22 +925,41 @@ def _record_tool_execution(
         self._append_message(tool_result_msg)
         self._append_message(assistant_msg)
 
-    def _start_agent_trace_span(self, messages: Messages) -> trace_api.Span:
+    def _start_agent_trace_span(self, messages: Messages, all_tools_config: Optional[dict] = None) -> trace_api.Span:
         """Starts a trace span for the agent.
 
         Args:
             messages: The input messages.
+            all_tools_config: Optional dictionary of tool configurations.
         """
         model_id = self.model.config.get("model_id") if hasattr(self.model, "config") else None
-        return self.tracer.start_agent_span(
+        span = self.tracer.start_agent_span(
             messages=messages,
             agent_name=self.name,
             model_id=model_id,
-            tools=self.tool_names,
             system_prompt=self.system_prompt,
             custom_trace_attributes=self.trace_attributes,
         )
 
+        if self.tracer.include_tool_definitions and all_tools_config:
+            try:
+                tool_details = [
+                    {
+                        "name": name,
+                        "description": spec.get("description"),
+                        "inputSchema": spec.get("inputSchema"),
+                        "outputSchema": spec.get("outputSchema"),
+                    }
+                    for name, spec in all_tools_config.items()
+                ]
+                serialized_tools = serialize(tool_details)
+                span.set_attribute("gen_ai.tool.definitions", serialized_tools)
+            except Exception:
+                # A failure in telemetry should not crash the agent
+                logger.exception("failed to attach tool metadata to agent span")
+
+        return span
+
     def _end_agent_trace_span(
         self,
         response: Optional[AgentResult] = None,
diff --git a/src/strands/telemetry/config.py b/src/strands/telemetry/config.py
@@ -89,6 +89,8 @@ def __init__(
         Args:
             tracer_provider: Optional pre-configured tracer provider.
                 If None, a new one will be created and set as global.
+            include_tool_definitions: Whether to include tool definitions in traces.
+                Defaults to False.
 
         The instance is ready to use immediately after initialization, though
         trace exporters must be configured separately using the setup methods.
diff --git a/src/strands/telemetry/tracer.py b/src/strands/telemetry/tracer.py
@@ -81,9 +81,7 @@ class Tracer:
     are sent to the OTLP endpoint.
     """
 
-    def __init__(
-        self,
-    ) -> None:
+    def __init__(self) -> None:
         """Initialize the tracer."""
         self.service_name = __name__
         self.tracer_provider: Optional[trace_api.TracerProvider] = None
@@ -92,17 +90,18 @@ def __init__(
         ThreadingInstrumentor().instrument()
 
         # Read OTEL_SEMCONV_STABILITY_OPT_IN environment variable
-        self.use_latest_genai_conventions = self._parse_semconv_opt_in()
+        opt_in_values = self._parse_semconv_opt_in()
+        self.use_latest_genai_conventions = "gen_ai_latest_experimental" in opt_in_values
+        self.include_tool_definitions = "gen_ai_tool_definitions" in opt_in_values
 
-    def _parse_semconv_opt_in(self) -> bool:
+    def _parse_semconv_opt_in(self) -> set[str]:
         """Parse the OTEL_SEMCONV_STABILITY_OPT_IN environment variable.
 
         Returns:
-            Set of opt-in values from the environment variable
+            A set of opt-in values from the environment variable.
         """
         opt_in_env = os.getenv("OTEL_SEMCONV_STABILITY_OPT_IN", "")
-
-        return "gen_ai_latest_experimental" in opt_in_env
+        return {value.strip() for value in opt_in_env.split(",")}
 
     def _start_span(
         self,
diff --git a/tests/strands/agent/test_agent.py b/tests/strands/agent/test_agent.py
@@ -1353,7 +1353,6 @@ def test_agent_call_creates_and_ends_span_on_success(mock_get_tracer, mock_model
         messages=[{"content": [{"text": "test prompt"}], "role": "user"}],
         agent_name="Strands Agents",
         model_id=unittest.mock.ANY,
-        tools=agent.tool_names,
         system_prompt=agent.system_prompt,
         custom_trace_attributes=agent.trace_attributes,
     )
@@ -1387,7 +1386,6 @@ async def test_event_loop(*args, **kwargs):
         messages=[{"content": [{"text": "test prompt"}], "role": "user"}],
         agent_name="Strands Agents",
         model_id=unittest.mock.ANY,
-        tools=agent.tool_names,
         system_prompt=agent.system_prompt,
         custom_trace_attributes=agent.trace_attributes,
     )
@@ -1425,7 +1423,6 @@ def test_agent_call_creates_and_ends_span_on_exception(mock_get_tracer, mock_mod
         messages=[{"content": [{"text": "test prompt"}], "role": "user"}],
         agent_name="Strands Agents",
         model_id=unittest.mock.ANY,
-        tools=agent.tool_names,
         system_prompt=agent.system_prompt,
         custom_trace_attributes=agent.trace_attributes,
     )
@@ -1461,7 +1458,6 @@ async def test_agent_stream_async_creates_and_ends_span_on_exception(mock_get_tr
         messages=[{"content": [{"text": "test prompt"}], "role": "user"}],
         agent_name="Strands Agents",
         model_id=unittest.mock.ANY,
-        tools=agent.tool_names,
         system_prompt=agent.system_prompt,
         custom_trace_attributes=agent.trace_attributes,
     )
@@ -2162,6 +2158,68 @@ def shell(command: str):
     assert agent.messages[-1] == {"content": [{"text": "I invoked a tool!"}], "role": "assistant"}
 
 
+def test_agent_does_not_include_tools_in_trace_by_default(tool_decorated, monkeypatch):
+    """Verify that by default, the agent does not add tool specs to the trace."""
+    monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "")
+    with unittest.mock.patch("strands.agent.agent.get_tracer") as mock_get_tracer:
+        # We need to re-import the tracer to pick up the new env var
+        import importlib
+
+        from strands.telemetry import tracer
+
+        importlib.reload(tracer)
+
+        mock_tracer_instance = tracer.Tracer()
+        mock_span = unittest.mock.MagicMock()
+        mock_tracer_instance.start_agent_span = unittest.mock.MagicMock(return_value=mock_span)
+        mock_get_tracer.return_value = mock_tracer_instance
+
+        mock_model = MockedModelProvider([{"role": "assistant", "content": [{"text": "hello!"}]}])
+
+        agent = Agent(tools=[tool_decorated], model=mock_model)
+        agent("test prompt")
+
+        # Check that set_attribute was not called for our specific key
+        called_attributes = [call.args[0] for call in mock_span.set_attribute.call_args_list]
+        assert "gen_ai.tool.definitions" not in called_attributes
+
+
+def test_agent_includes_tools_in_trace_when_enabled(tool_decorated, monkeypatch):
+    """Verify that the agent adds tool specs to the trace when the flag is enabled."""
+    monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_tool_definitions")
+    with unittest.mock.patch("strands.agent.agent.get_tracer") as mock_get_tracer:
+        # We need to re-import the tracer to pick up the new env var
+        import importlib
+
+        from strands.telemetry import tracer
+
+        importlib.reload(tracer)
+
+        mock_tracer_instance = tracer.Tracer()
+        mock_span = unittest.mock.MagicMock()
+        mock_tracer_instance.start_agent_span = unittest.mock.MagicMock(return_value=mock_span)
+        mock_get_tracer.return_value = mock_tracer_instance
+
+        mock_model = MockedModelProvider([{"role": "assistant", "content": [{"text": "hello!"}]}])
+
+        agent = Agent(tools=[tool_decorated], model=mock_model)
+        agent("test prompt")
+
+        # Verify the correct data is serialized and set as an attribute
+        tool_spec = tool_decorated.tool_spec
+        expected_tool_details = [
+            {
+                "name": tool_spec.get("name"),
+                "description": tool_spec.get("description"),
+                "inputSchema": tool_spec.get("inputSchema"),
+                "outputSchema": tool_spec.get("outputSchema"),
+            }
+        ]
+        expected_json = serialize(expected_tool_details)
+
+        mock_span.set_attribute.assert_any_call("gen_ai.tool.definitions", expected_json)
+
+
 @pytest.mark.parametrize(
     "content, expected",
     [