Skip to content

Commit fc4629a

Browse files
committed
enhance elasticsearch body sanitization
1 parent f9b8d5a commit fc4629a

File tree

3 files changed

+157
-44
lines changed

3 files changed

+157
-44
lines changed

instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py

Lines changed: 14 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -13,52 +13,26 @@
1313
# limitations under the License.
1414
import json
1515

16-
sanitized_keys = (
17-
"message",
18-
"should",
19-
"filter",
20-
"query",
21-
"queries",
22-
"intervals",
23-
"match",
24-
)
2516
sanitized_value = "?"
2617

2718

28-
# pylint: disable=C0103
29-
def _flatten_dict(d, parent_key=""):
30-
items = []
31-
for k, v in d.items():
32-
new_key = parent_key + "." + k if parent_key else k
33-
# recursive call _flatten_dict for a non-empty dict value
34-
if isinstance(v, dict) and v:
35-
items.extend(_flatten_dict(v, new_key).items())
36-
else:
37-
items.append((new_key, v))
38-
return dict(items)
39-
40-
41-
def _unflatten_dict(d):
42-
res = {}
43-
for k, v in d.items():
44-
keys = k.split(".")
45-
d = res
46-
for key in keys[:-1]:
47-
if key not in d:
48-
d[key] = {}
49-
d = d[key]
50-
d[keys[-1]] = v
51-
return res
19+
def _mask_leaf_nodes(obj):
20+
"""
21+
Recursively traverses JSON structure and masks leaf node values.
22+
Leaf nodes are final values that are no longer dict or list.
23+
"""
24+
if isinstance(obj, dict):
25+
return {key: _mask_leaf_nodes(value) for key, value in obj.items()}
26+
elif isinstance(obj, list):
27+
return [_mask_leaf_nodes(item) for item in obj]
28+
else:
29+
# Mask leaf node
30+
return sanitized_value
5231

5332

5433
def sanitize_body(body) -> str:
5534
if isinstance(body, str):
5635
body = json.loads(body)
5736

58-
flatten_body = _flatten_dict(body)
59-
60-
for key in flatten_body:
61-
if key.endswith(sanitized_keys):
62-
flatten_body[key] = sanitized_value
63-
64-
return str(_unflatten_dict(flatten_body))
37+
masked_body = _mask_leaf_nodes(body)
38+
return str(masked_body)

instrumentation/opentelemetry-instrumentation-elasticsearch/tests/sanitization_queries.py

Lines changed: 131 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,22 +44,149 @@
4444
}
4545
}
4646

47+
term_query = {
48+
"query": {
49+
"bool": {
50+
"must": [
51+
{"term": {"user_email": "john.doe@company.com"}},
52+
{"term": {"ssn": "123-45-6789"}},
53+
{"term": {"credit_card": "4111-1111-1111-1111"}},
54+
]
55+
}
56+
}
57+
}
58+
4759
interval_query_sanitized = {
4860
"query": {
4961
"intervals": {
50-
"my_text": {"all_of": {"ordered": True, "intervals": "?"}}
62+
"my_text": {
63+
"all_of": {
64+
"ordered": "?",
65+
"intervals": [
66+
{
67+
"match": {
68+
"query": "?",
69+
"max_gaps": "?",
70+
"ordered": "?",
71+
}
72+
},
73+
{
74+
"any_of": {
75+
"intervals": [
76+
{"match": {"query": "?"}},
77+
{"match": {"query": "?"}},
78+
]
79+
}
80+
},
81+
],
82+
}
83+
}
5184
}
5285
}
5386
}
87+
5488
match_query_sanitized = {"query": {"match": {"message": {"query": "?"}}}}
89+
5590
filter_query_sanitized = {
5691
"query": {
5792
"bool": {
5893
"must": [
59-
{"match": {"title": "Search"}},
60-
{"match": {"content": "Elasticsearch"}},
94+
{"match": {"title": "?"}},
95+
{"match": {"content": "?"}},
6196
],
62-
"filter": "?",
97+
"filter": [
98+
{"term": {"status": "?"}},
99+
{"range": {"publish_date": {"gte": "?"}}},
100+
],
101+
}
102+
}
103+
}
104+
105+
term_query_sanitized = {
106+
"query": {
107+
"bool": {
108+
"must": [
109+
{"term": {"user_email": "?"}},
110+
{"term": {"ssn": "?"}},
111+
{"term": {"credit_card": "?"}},
112+
]
63113
}
64114
}
65115
}
116+
117+
aggregation_query = {
118+
"query": {"match_all": {}},
119+
"aggs": {
120+
"price_ranges": {
121+
"range": {
122+
"field": "price",
123+
"ranges": [
124+
{"to": 50, "key": "cheap"},
125+
{"from": 50, "to": 100, "key": "medium"},
126+
{"from": 100, "key": "expensive"},
127+
],
128+
}
129+
},
130+
"avg_price": {"avg": {"field": "price"}},
131+
"top_tags": {
132+
"terms": {"field": "tags", "size": 10, "order": {"_count": "desc"}}
133+
},
134+
},
135+
}
136+
137+
aggregation_query_sanitized = {
138+
"query": {"match_all": {}},
139+
"aggs": {
140+
"price_ranges": {
141+
"range": {
142+
"field": "?",
143+
"ranges": [
144+
{"to": "?", "key": "?"},
145+
{"from": "?", "to": "?", "key": "?"},
146+
{"from": "?", "key": "?"},
147+
],
148+
}
149+
},
150+
"avg_price": {"avg": {"field": "?"}},
151+
"top_tags": {
152+
"terms": {"field": "?", "size": "?", "order": {"_count": "?"}}
153+
},
154+
},
155+
}
156+
157+
script_query = {
158+
"query": {
159+
"script": {
160+
"script": {
161+
"source": "doc['price'].value > params.threshold",
162+
"lang": "painless",
163+
"params": {"threshold": 100},
164+
}
165+
}
166+
},
167+
"script_fields": {
168+
"discounted_price": {
169+
"script": {
170+
"source": "doc['price'].value * params.discount",
171+
"params": {"discount": 0.9},
172+
}
173+
}
174+
},
175+
}
176+
177+
script_query_sanitized = {
178+
"query": {
179+
"script": {
180+
"script": {
181+
"source": "?",
182+
"lang": "?",
183+
"params": {"threshold": "?"},
184+
}
185+
}
186+
},
187+
"script_fields": {
188+
"discounted_price": {
189+
"script": {"source": "?", "params": {"discount": "?"}}
190+
}
191+
},
192+
}

instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,18 @@ def test_body_sanitization(self, _):
582582
sanitize_body(json.dumps(sanitization_queries.interval_query)),
583583
str(sanitization_queries.interval_query_sanitized),
584584
)
585+
self.assertEqual(
586+
sanitize_body(sanitization_queries.term_query),
587+
str(sanitization_queries.term_query_sanitized),
588+
)
589+
self.assertEqual(
590+
sanitize_body(sanitization_queries.aggregation_query),
591+
str(sanitization_queries.aggregation_query_sanitized),
592+
)
593+
self.assertEqual(
594+
sanitize_body(sanitization_queries.script_query),
595+
str(sanitization_queries.script_query_sanitized),
596+
)
585597

586598
def test_bulk(self, request_mock):
587599
request_mock.return_value = helpers.mock_response("{}")

0 commit comments

Comments
 (0)