open-telemetry · xrmx · Nov 10, 2025 · Nov 4, 2025 · Nov 5, 2025 · Nov 5, 2025
@@ -42,6 +42,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   ([#3882](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3882))
 - `opentelemetry-instrumentation-aiohttp-server`: delay initialization of tracer, meter and excluded urls to instrumentation for testability
   ([#3836](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3836))
+- `opentelemetry-instrumentation-elasticsearch`: Enhance elasticsearch query body sanitization
+  ([#3919](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3919)) 
+
 
 ## Version 1.38.0/0.59b0 (2025-10-16)
 

diff --git a/...ry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py b/...ry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py
@@ -13,52 +13,25 @@
 # limitations under the License.
 import json
 
-sanitized_keys = (
-    "message",
-    "should",
-    "filter",
-    "query",
-    "queries",
-    "intervals",
-    "match",
-)
 sanitized_value = "?"
 
 
-# pylint: disable=C0103
-def _flatten_dict(d, parent_key=""):
-    items = []
-    for k, v in d.items():
-        new_key = parent_key + "." + k if parent_key else k
-        # recursive call _flatten_dict for a non-empty dict value
-        if isinstance(v, dict) and v:
-            items.extend(_flatten_dict(v, new_key).items())
-        else:
-            items.append((new_key, v))
-    return dict(items)
-
-
-def _unflatten_dict(d):
-    res = {}
-    for k, v in d.items():
-        keys = k.split(".")
-        d = res
-        for key in keys[:-1]:
-            if key not in d:
-                d[key] = {}
-            d = d[key]
-        d[keys[-1]] = v
-    return res
+def _mask_leaf_nodes(obj):
+    """
+    Recursively traverses JSON structure and masks leaf node values.
+    Leaf nodes are final values that are no longer dict or list.
+    """
+    if isinstance(obj, dict):
+        return {key: _mask_leaf_nodes(value) for key, value in obj.items()}
+    if isinstance(obj, list):
+        return [_mask_leaf_nodes(item) for item in obj]
+    # Mask leaf node
+    return sanitized_value
 
 
 def sanitize_body(body) -> str:
     if isinstance(body, str):
         body = json.loads(body)
 
-    flatten_body = _flatten_dict(body)
-
-    for key in flatten_body:
-        if key.endswith(sanitized_keys):
-            flatten_body[key] = sanitized_value
-
-    return str(_unflatten_dict(flatten_body))
+    masked_body = _mask_leaf_nodes(body)
+    return str(masked_body)
diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/helpers_es6.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/helpers_es6.py
@@ -18,11 +18,11 @@ class Index:
         "doc": {
             "properties": {
                 "title": {
-                    "analyzer": "snowball",
-                    "fields": {"raw": {"type": "keyword"}},
-                    "type": "text",
+                    "analyzer": "?",
+                    "fields": {"raw": {"type": "?"}},
+                    "type": "?",
                 },
-                "body": {"analyzer": "snowball", "type": "text"},
+                "body": {"analyzer": "?", "type": "?"},
             }
         }
     }

diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/helpers_es7.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/helpers_es7.py
@@ -17,11 +17,11 @@ class Index:
     "mappings": {
         "properties": {
             "title": {
-                "analyzer": "snowball",
-                "fields": {"raw": {"type": "keyword"}},
-                "type": "text",
+                "analyzer": "?",
+                "fields": {"raw": {"type": "?"}},
+                "type": "?",
             },
-            "body": {"analyzer": "snowball", "type": "text"},
+            "body": {"analyzer": "?", "type": "?"},
         }
     }
 }

diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/helpers_es8.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/helpers_es8.py
@@ -29,11 +29,11 @@ class Index:
     "mappings": {
         "properties": {
             "title": {
-                "analyzer": "snowball",
-                "fields": {"raw": {"type": "keyword"}},
-                "type": "text",
+                "analyzer": "?",
+                "fields": {"raw": {"type": "?"}},
+                "type": "?",
             },
-            "body": {"analyzer": "snowball", "type": "text"},
+            "body": {"analyzer": "?", "type": "?"},
         }
     }
 }

diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/sanitization_queries.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/sanitization_queries.py
@@ -44,22 +44,149 @@
     }
 }
 
+term_query = {
+    "query": {
+        "bool": {
+            "must": [
+                {"term": {"user_email": "john.doe@company.com"}},
+                {"term": {"ssn": "123-45-6789"}},
+                {"term": {"credit_card": "4111-1111-1111-1111"}},
+            ]
+        }
+    }
+}
+
 interval_query_sanitized = {
     "query": {
         "intervals": {
-            "my_text": {"all_of": {"ordered": True, "intervals": "?"}}
+            "my_text": {
+                "all_of": {
+                    "ordered": "?",
+                    "intervals": [
+                        {
+                            "match": {
+                                "query": "?",
+                                "max_gaps": "?",
+                                "ordered": "?",
+                            }
+                        },
+                        {
+                            "any_of": {
+                                "intervals": [
+                                    {"match": {"query": "?"}},
+                                    {"match": {"query": "?"}},
+                                ]
+                            }
+                        },
+                    ],
+                }
+            }
         }
     }
 }
+
 match_query_sanitized = {"query": {"match": {"message": {"query": "?"}}}}
+
 filter_query_sanitized = {
     "query": {
         "bool": {
             "must": [
-                {"match": {"title": "Search"}},
-                {"match": {"content": "Elasticsearch"}},
+                {"match": {"title": "?"}},
+                {"match": {"content": "?"}},
             ],
-            "filter": "?",
+            "filter": [
+                {"term": {"status": "?"}},
+                {"range": {"publish_date": {"gte": "?"}}},
+            ],
+        }
+    }
+}
+
+term_query_sanitized = {
+    "query": {
+        "bool": {
+            "must": [
+                {"term": {"user_email": "?"}},
+                {"term": {"ssn": "?"}},
+                {"term": {"credit_card": "?"}},
+            ]
         }
     }
 }
+
+aggregation_query = {
+    "query": {"match_all": {}},
+    "aggs": {
+        "price_ranges": {
+            "range": {
+                "field": "price",
+                "ranges": [
+                    {"to": 50, "key": "cheap"},
+                    {"from": 50, "to": 100, "key": "medium"},
+                    {"from": 100, "key": "expensive"},
+                ],
+            }
+        },
+        "avg_price": {"avg": {"field": "price"}},
+        "top_tags": {
+            "terms": {"field": "tags", "size": 10, "order": {"_count": "desc"}}
+        },
+    },
+}
+
+aggregation_query_sanitized = {
+    "query": {"match_all": {}},
+    "aggs": {
+        "price_ranges": {
+            "range": {
+                "field": "?",
+                "ranges": [
+                    {"to": "?", "key": "?"},
+                    {"from": "?", "to": "?", "key": "?"},
+                    {"from": "?", "key": "?"},
+                ],
+            }
+        },
+        "avg_price": {"avg": {"field": "?"}},
+        "top_tags": {
+            "terms": {"field": "?", "size": "?", "order": {"_count": "?"}}
+        },
+    },
+}
+
+script_query = {
+    "query": {
+        "script": {
+            "script": {
+                "source": "doc['price'].value > params.threshold",
+                "lang": "painless",
+                "params": {"threshold": 100},
+            }
+        }
+    },
+    "script_fields": {
+        "discounted_price": {
+            "script": {
+                "source": "doc['price'].value * params.discount",
+                "params": {"discount": 0.9},
+            }
+        }
+    },
+}
+
+script_query_sanitized = {
+    "query": {
+        "script": {
+            "script": {
+                "source": "?",
+                "lang": "?",
+                "params": {"threshold": "?"},
+            }
+        }
+    },
+    "script_fields": {
+        "discounted_price": {
+            "script": {"source": "?", "params": {"discount": "?"}}
+        }
+    },
+}
diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py
@@ -83,7 +83,9 @@ class TestElasticsearchIntegration(TestBase):
         "elasticsearch.url": "/test-index/_search",
         "elasticsearch.method": helpers.dsl_search_method,
         "elasticsearch.target": "test-index",
-        DB_STATEMENT: str({"query": {"bool": {"filter": "?"}}}),
+        DB_STATEMENT: str(
+            {"query": {"bool": {"filter": [{"term": {"author": "?"}}]}}}
+        ),
     }
 
     create_attributes = {
@@ -419,8 +421,8 @@ def test_dsl_index(self, request_mock):
         self.assertEqual(
             literal_eval(span.attributes[DB_STATEMENT]),
             {
-                "body": "A few words here, a few words there",
-                "title": "About searching",
+                "body": "?",
+                "title": "?",
             },
         )
 
@@ -582,6 +584,18 @@ def test_body_sanitization(self, _):
             sanitize_body(json.dumps(sanitization_queries.interval_query)),
             str(sanitization_queries.interval_query_sanitized),
         )
+        self.assertEqual(
+            sanitize_body(sanitization_queries.term_query),
+            str(sanitization_queries.term_query_sanitized),
+        )
+        self.assertEqual(
+            sanitize_body(sanitization_queries.aggregation_query),
+            str(sanitization_queries.aggregation_query_sanitized),
+        )
+        self.assertEqual(
+            sanitize_body(sanitization_queries.script_query),
+            str(sanitization_queries.script_query_sanitized),
+        )
 
     def test_bulk(self, request_mock):
         request_mock.return_value = helpers.mock_response("{}")