docs(examples): add Colang 2.0 example for sensitive data detection (#1301)

lapinek · web-flow · commit 5d33e2b783d8 · 2025-08-18T13:14:25.000+02:00
* fix: Add explicit global declarations in sensitive_data_detection Colang v2 flows
diff --git a/examples/configs/sensitive_data_detection_v2/README.md b/examples/configs/sensitive_data_detection_v2/README.md
@@ -0,0 +1,41 @@
+# Presidio-based Sensitive Data Detection Example
+
+This example demonstrates how to detect and redact sensitive data using [Presidio](https://github.com/Microsoft/presidio).
+
+## Prerequisites
+
+- `Presidio`
+
+  You can install it with:
+
+  ```bash
+  poetry run pip install presidio-analyzer presidio-anonymizer
+  ```
+
+  > **Note**
+  >
+  > Presidio may come with an unsupported version of `numpy`. To reinstall the supported version, run:
+  > ```bash
+  > poetry install
+  > ```
+
+- `en_core_web_lg` spaCy model
+
+  You can download it with:
+
+  ```bash
+  poetry run python -m spacy download en_core_web_lg
+  ```
+
+## Running example
+
+To test this configuration, run the CLI chat from the `examples/configs/sensitive_data_detection_v2` directory:
+
+```bash
+poetry run nemoguardrails chat --config=.
+```
+
+## Documentation
+
+- [Presidio-based Sensitive Data Detection configuration](../../../docs/user-guides/guardrails-library.md#presidio-based-sensitive-data-detection)
+- [Presidio Integration guide](../../../docs/user-guides/community/presidio.md)
diff --git a/examples/configs/sensitive_data_detection_v2/config.yml b/examples/configs/sensitive_data_detection_v2/config.yml
@@ -0,0 +1,29 @@
+colang_version: "2.x"
+
+models:
+  - type: main
+    engine: openai
+    model: gpt-4o-mini
+
+rails:
+  config:
+    sensitive_data_detection:
+      input:
+        score_threshold: 0.4
+        entities:
+          - PERSON
+          - EMAIL_ADDRESS
+          - PHONE_NUMBER
+          - CREDIT_CARD
+          - US_SSN
+          - LOCATION
+
+      output:
+        score_threshold: 0.4
+        entities:
+          - PERSON
+          - EMAIL_ADDRESS
+          - PHONE_NUMBER
+          - CREDIT_CARD
+          - US_SSN
+          - LOCATION
diff --git a/examples/configs/sensitive_data_detection_v2/flows.co b/examples/configs/sensitive_data_detection_v2/flows.co
@@ -0,0 +1,10 @@
+import guardrails
+import nemoguardrails.library.sensitive_data_detection
+
+flow input rails $input_text
+  """Check user utterances before they get further processed."""
+  await mask sensitive data on input
+
+flow output rails $output_text
+  """Check response before sending it to user."""
+  await mask sensitive data on output
diff --git a/examples/configs/sensitive_data_detection_v2/main.co b/examples/configs/sensitive_data_detection_v2/main.co
@@ -0,0 +1,5 @@
+import core
+import llm
+
+flow main
+  activate llm continuation
diff --git a/nemoguardrails/library/sensitive_data_detection/flows.co b/nemoguardrails/library/sensitive_data_detection/flows.co
@@ -11,6 +11,7 @@ flow detect sensitive data on input
 
 flow mask sensitive data on input
   """Mask any sensitive data found in the user input."""
+  global $user_message
   $user_message = await MaskSensitiveDataAction(source="input", text=$user_message)
 
 
@@ -28,10 +29,11 @@ flow detect sensitive data on output
 
 flow mask sensitive data on output
   """Mask any sensitive data found in the bot output."""
+  global $bot_message
   $bot_message = await MaskSensitiveDataAction(source="output", text=$bot_message)
 
 
-# RETRIVAL RAILS
+# RETRIEVAL RAILS
 
 
 flow detect sensitive data on retrieval
@@ -45,4 +47,5 @@ flow detect sensitive data on retrieval
 
 flow mask sensitive data on retrieval
   """Mask any sensitive data found in the relevant chunks from the knowledge base."""
+  global $relevant_chunks
   $relevant_chunks = await MaskSensitiveDataAction(source="retrieval", text=$relevant_chunks)