Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
([#3882](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3882))
- `opentelemetry-instrumentation-aiohttp-server`: delay initialization of tracer, meter and excluded urls to instrumentation for testability
([#3836](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3836))
- `opentelemetry-instrumentation-elasticsearch`: Enhance elasticsearch query body sanitization
([#3919](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3919))


## Version 1.38.0/0.59b0 (2025-10-16)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,52 +13,25 @@
# limitations under the License.
import json

sanitized_keys = (
"message",
"should",
"filter",
"query",
"queries",
"intervals",
"match",
)
sanitized_value = "?"


# pylint: disable=C0103
def _flatten_dict(d, parent_key=""):
items = []
for k, v in d.items():
new_key = parent_key + "." + k if parent_key else k
# recursive call _flatten_dict for a non-empty dict value
if isinstance(v, dict) and v:
items.extend(_flatten_dict(v, new_key).items())
else:
items.append((new_key, v))
return dict(items)


def _unflatten_dict(d):
res = {}
for k, v in d.items():
keys = k.split(".")
d = res
for key in keys[:-1]:
if key not in d:
d[key] = {}
d = d[key]
d[keys[-1]] = v
return res
def _mask_leaf_nodes(obj):
"""
Recursively traverses JSON structure and masks leaf node values.
Leaf nodes are final values that are no longer dict or list.
"""
if isinstance(obj, dict):
return {key: _mask_leaf_nodes(value) for key, value in obj.items()}
if isinstance(obj, list):
return [_mask_leaf_nodes(item) for item in obj]
# Mask leaf node
return sanitized_value


def sanitize_body(body) -> str:
if isinstance(body, str):
body = json.loads(body)

flatten_body = _flatten_dict(body)

for key in flatten_body:
if key.endswith(sanitized_keys):
flatten_body[key] = sanitized_value

return str(_unflatten_dict(flatten_body))
masked_body = _mask_leaf_nodes(body)
return str(masked_body)
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ class Index:
"doc": {
"properties": {
"title": {
"analyzer": "snowball",
"fields": {"raw": {"type": "keyword"}},
"type": "text",
"analyzer": "?",
"fields": {"raw": {"type": "?"}},
"type": "?",
},
"body": {"analyzer": "snowball", "type": "text"},
"body": {"analyzer": "?", "type": "?"},
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ class Index:
"mappings": {
"properties": {
"title": {
"analyzer": "snowball",
"fields": {"raw": {"type": "keyword"}},
"type": "text",
"analyzer": "?",
"fields": {"raw": {"type": "?"}},
"type": "?",
},
"body": {"analyzer": "snowball", "type": "text"},
"body": {"analyzer": "?", "type": "?"},
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@ class Index:
"mappings": {
"properties": {
"title": {
"analyzer": "snowball",
"fields": {"raw": {"type": "keyword"}},
"type": "text",
"analyzer": "?",
"fields": {"raw": {"type": "?"}},
"type": "?",
},
"body": {"analyzer": "snowball", "type": "text"},
"body": {"analyzer": "?", "type": "?"},
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,22 +44,149 @@
}
}

term_query = {
"query": {
"bool": {
"must": [
{"term": {"user_email": "john.doe@company.com"}},
{"term": {"ssn": "123-45-6789"}},
{"term": {"credit_card": "4111-1111-1111-1111"}},
]
}
}
}

interval_query_sanitized = {
"query": {
"intervals": {
"my_text": {"all_of": {"ordered": True, "intervals": "?"}}
"my_text": {
"all_of": {
"ordered": "?",
"intervals": [
{
"match": {
"query": "?",
"max_gaps": "?",
"ordered": "?",
}
},
{
"any_of": {
"intervals": [
{"match": {"query": "?"}},
{"match": {"query": "?"}},
]
}
},
],
}
}
}
}
}

match_query_sanitized = {"query": {"match": {"message": {"query": "?"}}}}

filter_query_sanitized = {
"query": {
"bool": {
"must": [
{"match": {"title": "Search"}},
{"match": {"content": "Elasticsearch"}},
{"match": {"title": "?"}},
{"match": {"content": "?"}},
],
"filter": "?",
"filter": [
{"term": {"status": "?"}},
{"range": {"publish_date": {"gte": "?"}}},
],
}
}
}

term_query_sanitized = {
"query": {
"bool": {
"must": [
{"term": {"user_email": "?"}},
{"term": {"ssn": "?"}},
{"term": {"credit_card": "?"}},
]
}
}
}

aggregation_query = {
"query": {"match_all": {}},
"aggs": {
"price_ranges": {
"range": {
"field": "price",
"ranges": [
{"to": 50, "key": "cheap"},
{"from": 50, "to": 100, "key": "medium"},
{"from": 100, "key": "expensive"},
],
}
},
"avg_price": {"avg": {"field": "price"}},
"top_tags": {
"terms": {"field": "tags", "size": 10, "order": {"_count": "desc"}}
},
},
}

aggregation_query_sanitized = {
"query": {"match_all": {}},
"aggs": {
"price_ranges": {
"range": {
"field": "?",
"ranges": [
{"to": "?", "key": "?"},
{"from": "?", "to": "?", "key": "?"},
{"from": "?", "key": "?"},
],
}
},
"avg_price": {"avg": {"field": "?"}},
"top_tags": {
"terms": {"field": "?", "size": "?", "order": {"_count": "?"}}
},
},
}

script_query = {
"query": {
"script": {
"script": {
"source": "doc['price'].value > params.threshold",
"lang": "painless",
"params": {"threshold": 100},
}
}
},
"script_fields": {
"discounted_price": {
"script": {
"source": "doc['price'].value * params.discount",
"params": {"discount": 0.9},
}
}
},
}

script_query_sanitized = {
"query": {
"script": {
"script": {
"source": "?",
"lang": "?",
"params": {"threshold": "?"},
}
}
},
"script_fields": {
"discounted_price": {
"script": {"source": "?", "params": {"discount": "?"}}
}
},
}
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,9 @@ class TestElasticsearchIntegration(TestBase):
"elasticsearch.url": "/test-index/_search",
"elasticsearch.method": helpers.dsl_search_method,
"elasticsearch.target": "test-index",
DB_STATEMENT: str({"query": {"bool": {"filter": "?"}}}),
DB_STATEMENT: str(
{"query": {"bool": {"filter": [{"term": {"author": "?"}}]}}}
),
}

create_attributes = {
Expand Down Expand Up @@ -419,8 +421,8 @@ def test_dsl_index(self, request_mock):
self.assertEqual(
literal_eval(span.attributes[DB_STATEMENT]),
{
"body": "A few words here, a few words there",
"title": "About searching",
"body": "?",
"title": "?",
},
)

Expand Down Expand Up @@ -582,6 +584,18 @@ def test_body_sanitization(self, _):
sanitize_body(json.dumps(sanitization_queries.interval_query)),
str(sanitization_queries.interval_query_sanitized),
)
self.assertEqual(
sanitize_body(sanitization_queries.term_query),
str(sanitization_queries.term_query_sanitized),
)
self.assertEqual(
sanitize_body(sanitization_queries.aggregation_query),
str(sanitization_queries.aggregation_query_sanitized),
)
self.assertEqual(
sanitize_body(sanitization_queries.script_query),
str(sanitization_queries.script_query_sanitized),
)

def test_bulk(self, request_mock):
request_mock.return_value = helpers.mock_response("{}")
Expand Down