Skip to content

Commit 8733248

Browse files
authored
Add system tests for API Security Custom Data Classification (RFC-0980) (#5468)
Add RFC-0980 system tests: processor overrides v2 (include/exclude) custom scanners with user-defined tags RC capabilities 16/17 schema extraction validation
1 parent 16ae7a8 commit 8733248

File tree

10 files changed

+226
-3
lines changed

10 files changed

+226
-3
lines changed

manifests/cpp_nginx.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@ tests/:
1919
Test_API_Security_RC_ASM_DD_processors: v1.8.0
2020
Test_API_Security_RC_ASM_DD_scanners: v1.8.0
2121
test_apisec_sampling.py: "irrelevant (sampling works differently in proxies: RFC 1035)"
22+
test_custom_data_classification.py:
23+
Test_API_Security_Custom_Data_Classification_Capabilities: missing_feature
24+
Test_API_Security_Custom_Data_Classification_Multiple_Scanners: missing_feature
25+
Test_API_Security_Custom_Data_Classification_Negative: missing_feature
26+
Test_API_Security_Custom_Data_Classification_Processor_Override: missing_feature
27+
Test_API_Security_Custom_Data_Classification_Scanner: missing_feature
2228
test_endpoint_discovery.py: irrelevant (not applicable to proxies)
2329
test_schemas.py:
2430
Test_Scanners: v1.8.0

manifests/dotnet.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ tests/:
1616
Test_API_Security_Sampling_Different_Status: v2.46.0
1717
Test_API_Security_Sampling_Rate: v2.46.0
1818
Test_API_Security_Sampling_With_Delay: v2.50.0
19+
test_custom_data_classification.py:
20+
Test_API_Security_Custom_Data_Classification_Capabilities: missing_feature
21+
Test_API_Security_Custom_Data_Classification_Multiple_Scanners: missing_feature
22+
Test_API_Security_Custom_Data_Classification_Negative: missing_feature
23+
Test_API_Security_Custom_Data_Classification_Processor_Override: missing_feature
24+
Test_API_Security_Custom_Data_Classification_Scanner: missing_feature
1925
test_endpoint_discovery.py:
2026
Test_Endpoint_Discovery: v3.24.0
2127
test_schemas.py:

manifests/golang.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ tests/:
2323
net-http: irrelevant (net-http doesn't handle path params)
2424
net-http-orchestrion: irrelevant (net-http doesn't handle path params)
2525
Test_API_Security_Sampling_With_Delay: missing_feature
26+
test_custom_data_classification.py:
27+
Test_API_Security_Custom_Data_Classification_Capabilities: missing_feature
28+
Test_API_Security_Custom_Data_Classification_Multiple_Scanners: missing_feature
29+
Test_API_Security_Custom_Data_Classification_Negative: missing_feature
30+
Test_API_Security_Custom_Data_Classification_Processor_Override: missing_feature
31+
Test_API_Security_Custom_Data_Classification_Scanner: missing_feature
2632
test_endpoint_discovery.py:
2733
Test_Endpoint_Discovery: missing_feature
2834
test_schemas.py:

manifests/java.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,22 @@ tests/:
7070
spring-boot-3-native: irrelevant (GraalVM. Tracing support only)
7171
vertx3: v1.51.0
7272
vertx4: v1.51.0
73+
test_custom_data_classification.py:
74+
Test_API_Security_Custom_Data_Classification_Capabilities:
75+
'*': missing_feature
76+
spring-boot-3-native: irrelevant (GraalVM. Tracing support only)
77+
Test_API_Security_Custom_Data_Classification_Multiple_Scanners:
78+
'*': missing_feature
79+
spring-boot-3-native: irrelevant (GraalVM. Tracing support only)
80+
Test_API_Security_Custom_Data_Classification_Negative:
81+
'*': missing_feature
82+
spring-boot-3-native: irrelevant (GraalVM. Tracing support only)
83+
Test_API_Security_Custom_Data_Classification_Processor_Override:
84+
'*': missing_feature
85+
spring-boot-3-native: irrelevant (GraalVM. Tracing support only)
86+
Test_API_Security_Custom_Data_Classification_Scanner:
87+
'*': missing_feature
88+
spring-boot-3-native: irrelevant (GraalVM. Tracing support only)
7389
test_endpoint_discovery.py:
7490
Test_Endpoint_Discovery:
7591
'*': missing_feature

manifests/nodejs.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,12 @@ tests/:
9090
Test_API_Security_Sampling_Different_Status: *ref_5_27_0
9191
Test_API_Security_Sampling_Rate: irrelevant (new api security sampling algorithm implemented)
9292
Test_API_Security_Sampling_With_Delay: *ref_5_27_0
93+
test_custom_data_classification.py:
94+
Test_API_Security_Custom_Data_Classification_Capabilities: missing_feature
95+
Test_API_Security_Custom_Data_Classification_Multiple_Scanners: missing_feature
96+
Test_API_Security_Custom_Data_Classification_Negative: missing_feature
97+
Test_API_Security_Custom_Data_Classification_Processor_Override: missing_feature
98+
Test_API_Security_Custom_Data_Classification_Scanner: missing_feature
9399
test_endpoint_discovery.py:
94100
Test_Endpoint_Discovery:
95101
'*': missing_feature

manifests/php.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ tests/:
1616
Test_API_Security_Sampling_Different_Status: v1.11.0
1717
Test_API_Security_Sampling_Rate: irrelevant (new sampling algorithm implemented)
1818
Test_API_Security_Sampling_With_Delay: v1.11.0
19+
test_custom_data_classification.py:
20+
Test_API_Security_Custom_Data_Classification_Capabilities: missing_feature
21+
Test_API_Security_Custom_Data_Classification_Multiple_Scanners: missing_feature
22+
Test_API_Security_Custom_Data_Classification_Negative: missing_feature
23+
Test_API_Security_Custom_Data_Classification_Processor_Override: missing_feature
24+
Test_API_Security_Custom_Data_Classification_Scanner: missing_feature
1925
test_endpoint_discovery.py:
2026
Test_Endpoint_Discovery: missing_feature
2127
test_schemas.py:

manifests/python.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ tests/:
1616
Test_API_Security_Sampling_Different_Status: v2.6.0
1717
Test_API_Security_Sampling_Rate: irrelevant (new api security sampling algorithm implemented)
1818
Test_API_Security_Sampling_With_Delay: v2.6.0
19+
test_custom_data_classification.py:
20+
Test_API_Security_Custom_Data_Classification_Capabilities: missing_feature
21+
Test_API_Security_Custom_Data_Classification_Multiple_Scanners: missing_feature
22+
Test_API_Security_Custom_Data_Classification_Negative: missing_feature
23+
Test_API_Security_Custom_Data_Classification_Processor_Override: missing_feature
24+
Test_API_Security_Custom_Data_Classification_Scanner: missing_feature
1925
test_endpoint_discovery.py:
2026
Test_Endpoint_Discovery:
2127
'*': v3.13.0.dev

manifests/ruby.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ tests/:
2020
rack: irrelevant (rack does not have path parameters support)
2121
Test_API_Security_Sampling_Rate: irrelevant
2222
Test_API_Security_Sampling_With_Delay: v2.18.0
23+
test_custom_data_classification.py:
24+
Test_API_Security_Custom_Data_Classification_Capabilities: missing_feature
25+
Test_API_Security_Custom_Data_Classification_Multiple_Scanners: missing_feature
26+
Test_API_Security_Custom_Data_Classification_Negative: missing_feature
27+
Test_API_Security_Custom_Data_Classification_Processor_Override: missing_feature
28+
Test_API_Security_Custom_Data_Classification_Scanner: missing_feature
2329
test_endpoint_discovery.py:
2430
Test_Endpoint_Discovery:
2531
"*": v2.22.0.dev
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
# Unless explicitly stated otherwise all files in this repository are licensed under the the Apache License Version 2.0.
2+
# This product includes software developed at Datadog (https://www.datadoghq.com/).
3+
# Copyright 2021 Datadog, Inc.
4+
5+
from utils import interfaces, rfc, scenarios, weblog, features, logger
6+
from utils.dd_constants import Capabilities
7+
8+
from tests.appsec.api_security.utils import BaseAppsecApiSecurityRcTest
9+
10+
11+
def get_schema(request, address):
12+
"""Get api security schema from spans"""
13+
for _, _, span in interfaces.library.get_spans(request):
14+
meta = span.get("meta", {})
15+
key = "_dd.appsec.s." + address
16+
payload = meta.get(key)
17+
if payload is not None:
18+
return payload
19+
else:
20+
logger.info(f"Schema not found in span meta for {key}")
21+
return None
22+
23+
24+
@rfc("https://docs.google.com/document/d/1wBrd-ShGoA9-aP96o0VIe46eBgw73GL1315R8QjuMoc/edit?tab=t.0")
25+
@scenarios.appsec_api_security_rc
26+
@features.api_security_configuration
27+
class Test_API_Security_Custom_Data_Classification_Capabilities(BaseAppsecApiSecurityRcTest):
28+
"""Validate that ASM_PROCESSOR_OVERRIDES and ASM_CUSTOM_DATA_SCANNERS capabilities are exposed"""
29+
30+
def setup_capabilities_check(self):
31+
"""Setup for capabilities validation"""
32+
self.setup_scenario()
33+
34+
def test_capabilities_check(self):
35+
"""Verify both ASM_PROCESSOR_OVERRIDES and ASM_CUSTOM_DATA_SCANNERS capabilities"""
36+
# Verify capability 16: ASM_PROCESSOR_OVERRIDES
37+
interfaces.library.assert_rc_capability(Capabilities.ASM_PROCESSOR_OVERRIDES)
38+
39+
# Verify capability 17: ASM_CUSTOM_DATA_SCANNERS
40+
interfaces.library.assert_rc_capability(Capabilities.ASM_CUSTOM_DATA_SCANNERS)
41+
42+
43+
@rfc("https://docs.google.com/document/d/1wBrd-ShGoA9-aP96o0VIe46eBgw73GL1315R8QjuMoc/edit?tab=t.0")
44+
@scenarios.appsec_api_security_rc
45+
@features.api_security_configuration
46+
class Test_API_Security_Custom_Data_Classification_Processor_Override(BaseAppsecApiSecurityRcTest):
47+
"""Test API Security - Custom Data Classification with Processor Override"""
48+
49+
def setup_request_method(self):
50+
"""Test that processor overrides work correctly with custom scanners"""
51+
self.setup_scenario()
52+
self.request = weblog.get("/tag_value/api_rc_processor/200?testcard=1234567890")
53+
54+
def test_request_method(self):
55+
"""Verify custom scanner detects data based on processor override configuration"""
56+
schema = get_schema(self.request, "req.querytest")
57+
assert self.request.status_code == 200
58+
assert schema is not None, "Schema should be present in the span"
59+
assert isinstance(schema, list), "Schema should be a list"
60+
61+
# Verify that the custom scanner detected the testcard parameter
62+
if len(schema) > 0:
63+
assert "testcard" in schema[0], "testcard parameter should be in the schema"
64+
65+
66+
@rfc("https://docs.google.com/document/d/1wBrd-ShGoA9-aP96o0VIe46eBgw73GL1315R8QjuMoc/edit?tab=t.0")
67+
@scenarios.appsec_api_security_rc
68+
@features.api_security_configuration
69+
class Test_API_Security_Custom_Data_Classification_Scanner(BaseAppsecApiSecurityRcTest):
70+
"""Test API Security - Custom Data Classification with Custom Scanner"""
71+
72+
def setup_request_method(self):
73+
"""Test that custom scanners work correctly for request body"""
74+
self.setup_scenario()
75+
self.request = weblog.post("/tag_value/api_rc_scanner/200", data={"testcard": "1234567890"})
76+
77+
def test_request_method(self):
78+
"""Verify custom scanner detects and classifies sensitive data in request body"""
79+
schema = get_schema(self.request, "req.bodytest")
80+
assert self.request.status_code == 200
81+
assert schema is not None, "Schema should be present in the span"
82+
assert isinstance(schema, list), "Schema should be a list"
83+
84+
# Verify that the custom scanner detected the testcard field
85+
if len(schema) > 0:
86+
assert "testcard" in schema[0], "testcard field should be in the schema"
87+
# Check if the value was classified with custom tags
88+
# Structure: schema[0]["testcard"] = [[[value_length, classification]], metadata]
89+
if isinstance(schema[0]["testcard"], list) and len(schema[0]["testcard"]) > 0:
90+
values = schema[0]["testcard"][0]
91+
if isinstance(values, list) and len(values) > 0 and isinstance(values[0], list):
92+
if len(values[0]) > 1:
93+
classification = values[0][1]
94+
assert isinstance(classification, dict), "Classification should be a dict"
95+
assert "category" in classification, "Classification should include category"
96+
assert classification["category"] == "testcategory", "Category should be testcategory"
97+
assert "type" in classification, "Classification should include type"
98+
assert classification["type"] == "card", "Type should be card"
99+
100+
101+
@rfc("https://docs.google.com/document/d/1wBrd-ShGoA9-aP96o0VIe46eBgw73GL1315R8QjuMoc/edit?tab=t.0")
102+
@scenarios.appsec_api_security_rc
103+
@features.api_security_configuration
104+
class Test_API_Security_Custom_Data_Classification_Multiple_Scanners(BaseAppsecApiSecurityRcTest):
105+
"""Test API Security - Multiple Custom Scanners"""
106+
107+
def setup_request_method(self):
108+
"""Test that multiple scanners work together correctly"""
109+
self.setup_scenario()
110+
self.request = weblog.post(
111+
"/tag_value/api_rc_scanner/200", data={"mail": "[email protected]", "testcard": "1234567890"}
112+
)
113+
114+
def test_request_method(self):
115+
"""Verify both standard and custom scanners detect their respective data"""
116+
schema = get_schema(self.request, "req.bodytest")
117+
assert self.request.status_code == 200
118+
assert schema is not None, "Schema should be present in the span"
119+
assert isinstance(schema, list), "Schema should be a list"
120+
121+
if len(schema) > 0:
122+
# Check for email detection by standard scanner
123+
assert "mail" in schema[0], "mail field should be in the schema"
124+
# Check for testcard detection by custom scanner
125+
assert "testcard" in schema[0], "testcard field should be in the schema"
126+
127+
128+
@rfc("https://docs.google.com/document/d/1wBrd-ShGoA9-aP96o0VIe46eBgw73GL1315R8QjuMoc/edit?tab=t.0")
129+
@scenarios.appsec_api_security_rc
130+
@features.api_security_configuration
131+
class Test_API_Security_Custom_Data_Classification_Negative(BaseAppsecApiSecurityRcTest):
132+
"""Test API Security - Custom Data Classification Negative Cases"""
133+
134+
def setup_request_method(self):
135+
"""Test that data not matching scanner patterns is not classified"""
136+
self.setup_scenario()
137+
self.request = weblog.post("/tag_value/api_rc_scanner/200", data={"normalfield": "normalvalue"})
138+
139+
def test_request_method(self):
140+
"""Verify that normal data without sensitive patterns is not over-classified"""
141+
schema = get_schema(self.request, "req.bodytest")
142+
assert self.request.status_code == 200
143+
assert schema is not None, "Schema should be present in the span"
144+
145+
# The schema should exist but the field should not be classified as sensitive
146+
if len(schema) > 0 and "normalfield" in schema[0]:
147+
field_data = schema[0]["normalfield"]
148+
# If it's classified, it should not have sensitive category tags
149+
# Structure: field_data = [[[value_length, classification]], metadata]
150+
if isinstance(field_data, list) and len(field_data) > 0:
151+
values = field_data[0]
152+
if isinstance(values, list) and len(values) > 0 and isinstance(values[0], list):
153+
if len(values[0]) > 1:
154+
classification = values[0][1]
155+
if isinstance(classification, dict) and "category" in classification:
156+
assert classification["category"] not in [
157+
"pii",
158+
"testcategory",
159+
], "Normal fields should not be classified as sensitive"

tests/appsec/api_security/utils.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,16 @@ def setup_scenario(self) -> None:
1010
rc_state.set_config(
1111
"datadog/2/ASM/ASM-base/config",
1212
{
13-
"processor_override": [
14-
{"target": ["extract-content"], "scanners": ["test-scanner-002", "test-scanner-custom-001"]}
13+
"processor_overrides": [
14+
{
15+
"target": [{"id": "extract-content"}],
16+
"scanners": {
17+
"include": [{"id": "test-scanner-001"}, {"id": "test-scanner-custom-001"}],
18+
"exclude": [],
19+
},
20+
}
1521
],
16-
"custom_scanners": [
22+
"scanners": [
1723
{
1824
"id": "test-scanner-custom-001",
1925
"name": "Custom scanner",

0 commit comments

Comments
 (0)