Spaces:
Running
Running
rename to be more appropriate and add better test cases for pii
Browse files- guardrails_genie/guardrails/{pii β entity_recognition}/__init__.py +0 -0
- guardrails_genie/guardrails/entity_recognition/llm_judge_entity_recognition_guardrail.py +3 -0
- guardrails_genie/guardrails/{banned_terms/llm_judge.py β entity_recognition/pii_examples/pii_benchmark.py} +0 -0
- guardrails_genie/guardrails/entity_recognition/pii_examples/pii_test_examples.py +150 -0
- guardrails_genie/guardrails/entity_recognition/pii_examples/run_presidio_model.py +42 -0
- guardrails_genie/guardrails/entity_recognition/pii_examples/run_regex_model.py +42 -0
- guardrails_genie/guardrails/entity_recognition/pii_examples/run_transformers.py +43 -0
- guardrails_genie/guardrails/{pii/presidio_pii_guardrail.py β entity_recognition/presidio_entity_recognition_guardrail.py} +27 -27
- guardrails_genie/guardrails/{pii/regex_pii_guardrail.py β entity_recognition/regex_entity_recognition_guardrail.py} +26 -26
- guardrails_genie/guardrails/{pii/transformers_pipeline_guardrail.py β entity_recognition/transformers_entity_recognition_guardrail.py} +34 -34
- guardrails_genie/guardrails/pii/run_presidio_model.py +0 -36
- guardrails_genie/guardrails/pii/run_regex_model.py +0 -21
- guardrails_genie/guardrails/pii/run_transformers.py +0 -35
guardrails_genie/guardrails/{pii β entity_recognition}/__init__.py
RENAMED
File without changes
|
guardrails_genie/guardrails/entity_recognition/llm_judge_entity_recognition_guardrail.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
## Word conssitentcy
|
2 |
+
# - Scent -> Odor
|
3 |
+
# - odour -> Odor
|
guardrails_genie/guardrails/{banned_terms/llm_judge.py β entity_recognition/pii_examples/pii_benchmark.py}
RENAMED
File without changes
|
guardrails_genie/guardrails/entity_recognition/pii_examples/pii_test_examples.py
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Collection of PII test examples with expected outcomes for entity recognition testing.
|
3 |
+
Each example includes the input text and expected entities to be detected.
|
4 |
+
"""
|
5 |
+
|
6 |
+
PII_TEST_EXAMPLES = [
|
7 |
+
{
|
8 |
+
"description": "Business Context - Employee Record",
|
9 |
+
"input_text": """
|
10 |
+
Please update our records for employee John Smith:
|
11 |
+
Email: [email protected]
|
12 |
+
Phone: 123-456-7890
|
13 |
+
SSN: 123-45-6789
|
14 |
+
Emergency Contact: Mary Johnson (Tel: 098-765-4321)
|
15 |
+
""",
|
16 |
+
"expected_entities": {
|
17 |
+
"GIVENNAME": ["John", "Mary"],
|
18 |
+
"SURNAME": ["Smith", "Johnson"],
|
19 |
+
"EMAIL": ["[email protected]"],
|
20 |
+
"PHONE_NUMBER": ["123-456-7890", "098-765-4321"],
|
21 |
+
"SOCIALNUM": ["123-45-6789"]
|
22 |
+
}
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"description": "Meeting Notes with Attendees",
|
26 |
+
"input_text": """
|
27 |
+
Meeting Notes - Project Alpha
|
28 |
+
Date: 2024-03-15
|
29 |
+
Attendees:
|
30 |
+
- Sarah Williams ([email protected])
|
31 |
+
- Robert Brown ([email protected])
|
32 |
+
- Tom Wilson (555-0123-4567)
|
33 |
+
|
34 |
+
Action Items:
|
35 |
+
1. Sarah to review documentation
|
36 |
+
2. Contact Bob at his alternate number: 777-888-9999
|
37 |
+
""",
|
38 |
+
"expected_entities": {
|
39 |
+
"GIVENNAME": ["Sarah", "Robert", "Tom", "Bob"],
|
40 |
+
"SURNAME": ["Williams", "Brown", "Wilson"],
|
41 |
+
"EMAIL": ["[email protected]", "[email protected]"],
|
42 |
+
"PHONE_NUMBER": ["555-0123-4567", "777-888-9999"]
|
43 |
+
}
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"description": "Medical Record",
|
47 |
+
"input_text": """
|
48 |
+
Patient: Emma Thompson
|
49 |
+
DOB: 05/15/1980
|
50 |
+
Medical Record #: MR-12345
|
51 |
+
Primary Care: Dr. James Wilson
|
52 |
+
Contact: [email protected]
|
53 |
+
Insurance ID: INS-987654321
|
54 |
+
Emergency Contact: Michael Thompson (555-123-4567)
|
55 |
+
""",
|
56 |
+
"expected_entities": {
|
57 |
+
"GIVENNAME": ["Emma", "James", "Michael"],
|
58 |
+
"SURNAME": ["Thompson", "Wilson", "Thompson"],
|
59 |
+
"EMAIL": ["[email protected]"],
|
60 |
+
"PHONE_NUMBER": ["555-123-4567"]
|
61 |
+
}
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"description": "No PII Content",
|
65 |
+
"input_text": """
|
66 |
+
Project Status Update:
|
67 |
+
- All deliverables are on track
|
68 |
+
- Budget is within limits
|
69 |
+
- Next review scheduled for next week
|
70 |
+
""",
|
71 |
+
"expected_entities": {}
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"description": "Mixed Format Phone Numbers",
|
75 |
+
"input_text": """
|
76 |
+
Contact Directory:
|
77 |
+
Main Office: (555) 123-4567
|
78 |
+
Support: 555.987.6543
|
79 |
+
International: +1-555-321-7890
|
80 |
+
Emergency: 555 444 3333
|
81 |
+
""",
|
82 |
+
"expected_entities": {
|
83 |
+
"PHONE_NUMBER": [
|
84 |
+
"(555) 123-4567",
|
85 |
+
"555.987.6543",
|
86 |
+
"+1-555-321-7890",
|
87 |
+
"555 444 3333"
|
88 |
+
]
|
89 |
+
}
|
90 |
+
}
|
91 |
+
]
|
92 |
+
|
93 |
+
# Additional examples can be added to test specific edge cases or formats
|
94 |
+
EDGE_CASE_EXAMPLES = [
|
95 |
+
{
|
96 |
+
"description": "Mixed Case and Special Characters",
|
97 |
+
"input_text": """
|
98 | |
99 | |
100 | |
101 |
+
""",
|
102 |
+
"expected_entities": {
|
103 |
+
"EMAIL": [
|
104 |
+
"[email protected]",
|
105 |
+
"[email protected]",
|
106 | |
107 |
+
],
|
108 |
+
"GIVENNAME": ["John", "Jane", "Bob"],
|
109 |
+
"SURNAME": ["Doe", "Smith", "Jones"]
|
110 |
+
}
|
111 |
+
}
|
112 |
+
]
|
113 |
+
|
114 |
+
def validate_entities(detected: dict, expected: dict) -> bool:
|
115 |
+
"""Compare detected entities with expected entities"""
|
116 |
+
if set(detected.keys()) != set(expected.keys()):
|
117 |
+
return False
|
118 |
+
return all(set(detected[k]) == set(expected[k]) for k in expected.keys())
|
119 |
+
|
120 |
+
def run_test_case(guardrail, test_case, test_type="Main"):
|
121 |
+
"""Run a single test case and print results"""
|
122 |
+
print(f"\n{test_type} Test Case: {test_case['description']}")
|
123 |
+
print("-" * 50)
|
124 |
+
|
125 |
+
result = guardrail.guard(test_case['input_text'])
|
126 |
+
expected = test_case['expected_entities']
|
127 |
+
|
128 |
+
# Validate results
|
129 |
+
matches = validate_entities(result.detected_entities, expected)
|
130 |
+
|
131 |
+
print(f"Test Status: {'β PASS' if matches else 'β FAIL'}")
|
132 |
+
print(f"Contains PII: {result.contains_entities}")
|
133 |
+
|
134 |
+
if not matches:
|
135 |
+
print("\nEntity Comparison:")
|
136 |
+
all_entity_types = set(list(result.detected_entities.keys()) + list(expected.keys()))
|
137 |
+
for entity_type in all_entity_types:
|
138 |
+
detected = set(result.detected_entities.get(entity_type, []))
|
139 |
+
expected_set = set(expected.get(entity_type, []))
|
140 |
+
print(f"\nEntity Type: {entity_type}")
|
141 |
+
print(f" Expected: {sorted(expected_set)}")
|
142 |
+
print(f" Detected: {sorted(detected)}")
|
143 |
+
if detected != expected_set:
|
144 |
+
print(f" Missing: {sorted(expected_set - detected)}")
|
145 |
+
print(f" Extra: {sorted(detected - expected_set)}")
|
146 |
+
|
147 |
+
if result.anonymized_text:
|
148 |
+
print(f"\nAnonymized Text:\n{result.anonymized_text}")
|
149 |
+
|
150 |
+
return matches
|
guardrails_genie/guardrails/entity_recognition/pii_examples/run_presidio_model.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from guardrails_genie.guardrails.entity_recognition.presidio_entity_recognition_guardrail import PresidioEntityRecognitionGuardrail
|
2 |
+
from guardrails_genie.guardrails.entity_recognition.pii_examples.pii_test_examples import PII_TEST_EXAMPLES, EDGE_CASE_EXAMPLES, run_test_case, validate_entities
|
3 |
+
import weave
|
4 |
+
|
5 |
+
def test_pii_detection():
|
6 |
+
"""Test PII detection scenarios using predefined test cases"""
|
7 |
+
weave.init("guardrails-genie-pii-presidio-model")
|
8 |
+
|
9 |
+
# Create the guardrail with default entities and anonymization enabled
|
10 |
+
pii_guardrail = PresidioEntityRecognitionGuardrail(
|
11 |
+
should_anonymize=True,
|
12 |
+
show_available_entities=True
|
13 |
+
)
|
14 |
+
|
15 |
+
# Test statistics
|
16 |
+
total_tests = len(PII_TEST_EXAMPLES) + len(EDGE_CASE_EXAMPLES)
|
17 |
+
passed_tests = 0
|
18 |
+
|
19 |
+
# Test main PII examples
|
20 |
+
print("\nRunning Main PII Tests")
|
21 |
+
print("=" * 80)
|
22 |
+
for test_case in PII_TEST_EXAMPLES:
|
23 |
+
if run_test_case(pii_guardrail, test_case):
|
24 |
+
passed_tests += 1
|
25 |
+
|
26 |
+
# Test edge cases
|
27 |
+
print("\nRunning Edge Cases")
|
28 |
+
print("=" * 80)
|
29 |
+
for test_case in EDGE_CASE_EXAMPLES:
|
30 |
+
if run_test_case(pii_guardrail, test_case, "Edge"):
|
31 |
+
passed_tests += 1
|
32 |
+
|
33 |
+
# Print summary
|
34 |
+
print("\nTest Summary")
|
35 |
+
print("=" * 80)
|
36 |
+
print(f"Total Tests: {total_tests}")
|
37 |
+
print(f"Passed: {passed_tests}")
|
38 |
+
print(f"Failed: {total_tests - passed_tests}")
|
39 |
+
print(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%")
|
40 |
+
|
41 |
+
if __name__ == "__main__":
|
42 |
+
test_pii_detection()
|
guardrails_genie/guardrails/entity_recognition/pii_examples/run_regex_model.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from guardrails_genie.guardrails.entity_recognition.regex_entity_recognition_guardrail import RegexEntityRecognitionGuardrail
|
2 |
+
from guardrails_genie.guardrails.entity_recognition.pii_examples.pii_test_examples import PII_TEST_EXAMPLES, EDGE_CASE_EXAMPLES, run_test_case, validate_entities
|
3 |
+
import weave
|
4 |
+
|
5 |
+
def test_pii_detection():
|
6 |
+
"""Test PII detection scenarios using predefined test cases"""
|
7 |
+
weave.init("guardrails-genie-pii-regex-model")
|
8 |
+
|
9 |
+
# Create the guardrail with default entities and anonymization enabled
|
10 |
+
pii_guardrail = RegexEntityRecognitionGuardrail(
|
11 |
+
should_anonymize=True,
|
12 |
+
show_available_entities=True
|
13 |
+
)
|
14 |
+
|
15 |
+
# Test statistics
|
16 |
+
total_tests = len(PII_TEST_EXAMPLES) + len(EDGE_CASE_EXAMPLES)
|
17 |
+
passed_tests = 0
|
18 |
+
|
19 |
+
# Test main PII examples
|
20 |
+
print("\nRunning Main PII Tests")
|
21 |
+
print("=" * 80)
|
22 |
+
for test_case in PII_TEST_EXAMPLES:
|
23 |
+
if run_test_case(pii_guardrail, test_case):
|
24 |
+
passed_tests += 1
|
25 |
+
|
26 |
+
# Test edge cases
|
27 |
+
print("\nRunning Edge Cases")
|
28 |
+
print("=" * 80)
|
29 |
+
for test_case in EDGE_CASE_EXAMPLES:
|
30 |
+
if run_test_case(pii_guardrail, test_case, "Edge"):
|
31 |
+
passed_tests += 1
|
32 |
+
|
33 |
+
# Print summary
|
34 |
+
print("\nTest Summary")
|
35 |
+
print("=" * 80)
|
36 |
+
print(f"Total Tests: {total_tests}")
|
37 |
+
print(f"Passed: {passed_tests}")
|
38 |
+
print(f"Failed: {total_tests - passed_tests}")
|
39 |
+
print(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%")
|
40 |
+
|
41 |
+
if __name__ == "__main__":
|
42 |
+
test_pii_detection()
|
guardrails_genie/guardrails/entity_recognition/pii_examples/run_transformers.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from guardrails_genie.guardrails.entity_recognition.transformers_entity_recognition_guardrail import TransformersEntityRecognitionGuardrail
|
2 |
+
from guardrails_genie.guardrails.entity_recognition.pii_examples.pii_test_examples import PII_TEST_EXAMPLES, EDGE_CASE_EXAMPLES, run_test_case, validate_entities
|
3 |
+
import weave
|
4 |
+
|
5 |
+
def test_pii_detection():
|
6 |
+
"""Test PII detection scenarios using predefined test cases"""
|
7 |
+
weave.init("guardrails-genie-pii-transformers-pipeline-model")
|
8 |
+
|
9 |
+
# Create the guardrail with default entities and anonymization enabled
|
10 |
+
pii_guardrail = TransformersEntityRecognitionGuardrail(
|
11 |
+
selected_entities=["GIVENNAME", "SURNAME", "EMAIL", "PHONE_NUMBER", "SOCIALNUM"],
|
12 |
+
should_anonymize=True,
|
13 |
+
show_available_entities=True
|
14 |
+
)
|
15 |
+
|
16 |
+
# Test statistics
|
17 |
+
total_tests = len(PII_TEST_EXAMPLES) + len(EDGE_CASE_EXAMPLES)
|
18 |
+
passed_tests = 0
|
19 |
+
|
20 |
+
# Test main PII examples
|
21 |
+
print("\nRunning Main PII Tests")
|
22 |
+
print("=" * 80)
|
23 |
+
for test_case in PII_TEST_EXAMPLES:
|
24 |
+
if run_test_case(pii_guardrail, test_case):
|
25 |
+
passed_tests += 1
|
26 |
+
|
27 |
+
# Test edge cases
|
28 |
+
print("\nRunning Edge Cases")
|
29 |
+
print("=" * 80)
|
30 |
+
for test_case in EDGE_CASE_EXAMPLES:
|
31 |
+
if run_test_case(pii_guardrail, test_case, "Edge"):
|
32 |
+
passed_tests += 1
|
33 |
+
|
34 |
+
# Print summary
|
35 |
+
print("\nTest Summary")
|
36 |
+
print("=" * 80)
|
37 |
+
print(f"Total Tests: {total_tests}")
|
38 |
+
print(f"Passed: {passed_tests}")
|
39 |
+
print(f"Failed: {total_tests - passed_tests}")
|
40 |
+
print(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%")
|
41 |
+
|
42 |
+
if __name__ == "__main__":
|
43 |
+
test_pii_detection()
|
guardrails_genie/guardrails/{pii/presidio_pii_guardrail.py β entity_recognition/presidio_entity_recognition_guardrail.py}
RENAMED
@@ -7,19 +7,19 @@ from presidio_anonymizer import AnonymizerEngine
|
|
7 |
|
8 |
from ..base import Guardrail
|
9 |
|
10 |
-
class
|
11 |
-
|
12 |
-
|
13 |
explanation: str
|
14 |
anonymized_text: Optional[str] = None
|
15 |
|
16 |
-
class
|
17 |
-
|
18 |
explanation: str
|
19 |
anonymized_text: Optional[str] = None
|
20 |
|
21 |
#TODO: Add support for transformers workflow and not just Spacy
|
22 |
-
class
|
23 |
@staticmethod
|
24 |
def get_available_entities() -> List[str]:
|
25 |
registry = RecognizerRegistry()
|
@@ -103,15 +103,15 @@ class PresidioPIIGuardrail(Guardrail):
|
|
103 |
)
|
104 |
|
105 |
@weave.op()
|
106 |
-
def guard(self, prompt: str, return_detected_types: bool = True, **kwargs) ->
|
107 |
"""
|
108 |
-
Check if the input prompt contains any
|
109 |
|
110 |
Args:
|
111 |
prompt: The text to analyze
|
112 |
-
return_detected_types: If True, returns detailed
|
113 |
"""
|
114 |
-
# Analyze text for
|
115 |
analyzer_results = self.analyzer.analyze(
|
116 |
text=prompt,
|
117 |
entities=self.selected_entities,
|
@@ -119,31 +119,31 @@ class PresidioPIIGuardrail(Guardrail):
|
|
119 |
)
|
120 |
|
121 |
# Group results by entity type
|
122 |
-
|
123 |
for result in analyzer_results:
|
124 |
entity_type = result.entity_type
|
125 |
text_slice = prompt[result.start:result.end]
|
126 |
-
if entity_type not in
|
127 |
-
|
128 |
-
|
129 |
|
130 |
# Create explanation
|
131 |
explanation_parts = []
|
132 |
-
if
|
133 |
-
explanation_parts.append("Found the following
|
134 |
-
for
|
135 |
-
explanation_parts.append(f"- {
|
136 |
else:
|
137 |
-
explanation_parts.append("No
|
138 |
|
139 |
# Add information about what was checked
|
140 |
-
explanation_parts.append("\nChecked for these
|
141 |
for entity in self.selected_entities:
|
142 |
explanation_parts.append(f"- {entity}")
|
143 |
|
144 |
# Anonymize if requested
|
145 |
anonymized_text = None
|
146 |
-
if self.should_anonymize and
|
147 |
anonymized_result = self.anonymizer.anonymize(
|
148 |
text=prompt,
|
149 |
analyzer_results=analyzer_results
|
@@ -151,19 +151,19 @@ class PresidioPIIGuardrail(Guardrail):
|
|
151 |
anonymized_text = anonymized_result.text
|
152 |
|
153 |
if return_detected_types:
|
154 |
-
return
|
155 |
-
|
156 |
-
|
157 |
explanation="\n".join(explanation_parts),
|
158 |
anonymized_text=anonymized_text
|
159 |
)
|
160 |
else:
|
161 |
-
return
|
162 |
-
|
163 |
explanation="\n".join(explanation_parts),
|
164 |
anonymized_text=anonymized_text
|
165 |
)
|
166 |
|
167 |
@weave.op()
|
168 |
-
def predict(self, prompt: str, return_detected_types: bool = True, **kwargs) ->
|
169 |
return self.guard(prompt, return_detected_types=return_detected_types, **kwargs)
|
|
|
7 |
|
8 |
from ..base import Guardrail
|
9 |
|
10 |
+
class PresidioEntityRecognitionResponse(BaseModel):
|
11 |
+
contains_entities: bool
|
12 |
+
detected_entities: Dict[str, List[str]]
|
13 |
explanation: str
|
14 |
anonymized_text: Optional[str] = None
|
15 |
|
16 |
+
class PresidioEntityRecognitionSimpleResponse(BaseModel):
|
17 |
+
contains_entities: bool
|
18 |
explanation: str
|
19 |
anonymized_text: Optional[str] = None
|
20 |
|
21 |
#TODO: Add support for transformers workflow and not just Spacy
|
22 |
+
class PresidioEntityRecognitionGuardrail(Guardrail):
|
23 |
@staticmethod
|
24 |
def get_available_entities() -> List[str]:
|
25 |
registry = RecognizerRegistry()
|
|
|
103 |
)
|
104 |
|
105 |
@weave.op()
|
106 |
+
def guard(self, prompt: str, return_detected_types: bool = True, **kwargs) -> PresidioEntityRecognitionResponse | PresidioEntityRecognitionSimpleResponse:
|
107 |
"""
|
108 |
+
Check if the input prompt contains any entities using Presidio.
|
109 |
|
110 |
Args:
|
111 |
prompt: The text to analyze
|
112 |
+
return_detected_types: If True, returns detailed entity type information
|
113 |
"""
|
114 |
+
# Analyze text for entities
|
115 |
analyzer_results = self.analyzer.analyze(
|
116 |
text=prompt,
|
117 |
entities=self.selected_entities,
|
|
|
119 |
)
|
120 |
|
121 |
# Group results by entity type
|
122 |
+
detected_entities = {}
|
123 |
for result in analyzer_results:
|
124 |
entity_type = result.entity_type
|
125 |
text_slice = prompt[result.start:result.end]
|
126 |
+
if entity_type not in detected_entities:
|
127 |
+
detected_entities[entity_type] = []
|
128 |
+
detected_entities[entity_type].append(text_slice)
|
129 |
|
130 |
# Create explanation
|
131 |
explanation_parts = []
|
132 |
+
if detected_entities:
|
133 |
+
explanation_parts.append("Found the following entities in the text:")
|
134 |
+
for entity_type, instances in detected_entities.items():
|
135 |
+
explanation_parts.append(f"- {entity_type}: {len(instances)} instance(s)")
|
136 |
else:
|
137 |
+
explanation_parts.append("No entities detected in the text.")
|
138 |
|
139 |
# Add information about what was checked
|
140 |
+
explanation_parts.append("\nChecked for these entity types:")
|
141 |
for entity in self.selected_entities:
|
142 |
explanation_parts.append(f"- {entity}")
|
143 |
|
144 |
# Anonymize if requested
|
145 |
anonymized_text = None
|
146 |
+
if self.should_anonymize and detected_entities:
|
147 |
anonymized_result = self.anonymizer.anonymize(
|
148 |
text=prompt,
|
149 |
analyzer_results=analyzer_results
|
|
|
151 |
anonymized_text = anonymized_result.text
|
152 |
|
153 |
if return_detected_types:
|
154 |
+
return PresidioEntityRecognitionResponse(
|
155 |
+
contains_entities=bool(detected_entities),
|
156 |
+
detected_entities=detected_entities,
|
157 |
explanation="\n".join(explanation_parts),
|
158 |
anonymized_text=anonymized_text
|
159 |
)
|
160 |
else:
|
161 |
+
return PresidioEntityRecognitionSimpleResponse(
|
162 |
+
contains_entities=bool(detected_entities),
|
163 |
explanation="\n".join(explanation_parts),
|
164 |
anonymized_text=anonymized_text
|
165 |
)
|
166 |
|
167 |
@weave.op()
|
168 |
+
def predict(self, prompt: str, return_detected_types: bool = True, **kwargs) -> PresidioEntityRecognitionResponse | PresidioEntityRecognitionSimpleResponse:
|
169 |
return self.guard(prompt, return_detected_types=return_detected_types, **kwargs)
|
guardrails_genie/guardrails/{pii/regex_pii_guardrail.py β entity_recognition/regex_entity_recognition_guardrail.py}
RENAMED
@@ -7,25 +7,25 @@ from ...regex_model import RegexModel
|
|
7 |
from ..base import Guardrail
|
8 |
|
9 |
|
10 |
-
class
|
11 |
-
|
12 |
-
|
13 |
explanation: str
|
14 |
anonymized_text: Optional[str] = None
|
15 |
|
16 |
|
17 |
-
class
|
18 |
-
|
19 |
explanation: str
|
20 |
anonymized_text: Optional[str] = None
|
21 |
|
22 |
|
23 |
-
class
|
24 |
regex_model: RegexModel
|
25 |
patterns: Dict[str, str] = {}
|
26 |
should_anonymize: bool = False
|
27 |
|
28 |
-
|
29 |
"email": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
|
30 |
"phone_number": r"\b(?:\+?1[-.]?)?\(?(?:[0-9]{3})\)?[-.]?(?:[0-9]{3})[-.]?(?:[0-9]{4})\b",
|
31 |
"ssn": r"\b\d{3}[-]?\d{2}[-]?\d{4}\b",
|
@@ -41,7 +41,7 @@ class RegexPIIGuardrail(Guardrail):
|
|
41 |
def __init__(self, use_defaults: bool = True, should_anonymize: bool = False, **kwargs):
|
42 |
patterns = {}
|
43 |
if use_defaults:
|
44 |
-
patterns = self.
|
45 |
if kwargs.get("patterns"):
|
46 |
patterns.update(kwargs["patterns"])
|
47 |
|
@@ -56,30 +56,30 @@ class RegexPIIGuardrail(Guardrail):
|
|
56 |
)
|
57 |
|
58 |
@weave.op()
|
59 |
-
def guard(self, prompt: str, return_detected_types: bool = True, **kwargs) ->
|
60 |
"""
|
61 |
-
Check if the input prompt contains any
|
62 |
|
63 |
Args:
|
64 |
-
prompt: Input text to check for
|
65 |
-
return_detected_types: If True, returns detailed
|
66 |
|
67 |
Returns:
|
68 |
-
|
69 |
"""
|
70 |
result = self.regex_model.check(prompt)
|
71 |
|
72 |
# Create detailed explanation
|
73 |
explanation_parts = []
|
74 |
if result.matched_patterns:
|
75 |
-
explanation_parts.append("Found the following
|
76 |
-
for
|
77 |
-
explanation_parts.append(f"- {
|
78 |
else:
|
79 |
-
explanation_parts.append("No
|
80 |
|
81 |
if result.failed_patterns:
|
82 |
-
explanation_parts.append("\nChecked but did not find these
|
83 |
for pattern in result.failed_patterns:
|
84 |
explanation_parts.append(f"- {pattern}")
|
85 |
|
@@ -87,25 +87,25 @@ class RegexPIIGuardrail(Guardrail):
|
|
87 |
anonymized_text = None
|
88 |
if getattr(self, 'should_anonymize', False) and result.matched_patterns:
|
89 |
anonymized_text = prompt
|
90 |
-
for
|
91 |
for match in matches:
|
92 |
-
replacement = f"[{
|
93 |
anonymized_text = anonymized_text.replace(match, replacement)
|
94 |
|
95 |
if return_detected_types:
|
96 |
-
return
|
97 |
-
|
98 |
-
|
99 |
explanation="\n".join(explanation_parts),
|
100 |
anonymized_text=anonymized_text
|
101 |
)
|
102 |
else:
|
103 |
-
return
|
104 |
-
|
105 |
explanation="\n".join(explanation_parts),
|
106 |
anonymized_text=anonymized_text
|
107 |
)
|
108 |
|
109 |
@weave.op()
|
110 |
-
def predict(self, prompt: str, return_detected_types: bool = True, **kwargs) ->
|
111 |
return self.guard(prompt, return_detected_types=return_detected_types, **kwargs)
|
|
|
7 |
from ..base import Guardrail
|
8 |
|
9 |
|
10 |
+
class RegexEntityRecognitionResponse(BaseModel):
|
11 |
+
contains_entities: bool
|
12 |
+
detected_entities: Dict[str, list[str]]
|
13 |
explanation: str
|
14 |
anonymized_text: Optional[str] = None
|
15 |
|
16 |
|
17 |
+
class RegexEntityRecognitionSimpleResponse(BaseModel):
|
18 |
+
contains_entities: bool
|
19 |
explanation: str
|
20 |
anonymized_text: Optional[str] = None
|
21 |
|
22 |
|
23 |
+
class RegexEntityRecognitionGuardrail(Guardrail):
|
24 |
regex_model: RegexModel
|
25 |
patterns: Dict[str, str] = {}
|
26 |
should_anonymize: bool = False
|
27 |
|
28 |
+
DEFAULT_PATTERNS: ClassVar[Dict[str, str]] = {
|
29 |
"email": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
|
30 |
"phone_number": r"\b(?:\+?1[-.]?)?\(?(?:[0-9]{3})\)?[-.]?(?:[0-9]{3})[-.]?(?:[0-9]{4})\b",
|
31 |
"ssn": r"\b\d{3}[-]?\d{2}[-]?\d{4}\b",
|
|
|
41 |
def __init__(self, use_defaults: bool = True, should_anonymize: bool = False, **kwargs):
|
42 |
patterns = {}
|
43 |
if use_defaults:
|
44 |
+
patterns = self.DEFAULT_PATTERNS.copy()
|
45 |
if kwargs.get("patterns"):
|
46 |
patterns.update(kwargs["patterns"])
|
47 |
|
|
|
56 |
)
|
57 |
|
58 |
@weave.op()
|
59 |
+
def guard(self, prompt: str, return_detected_types: bool = True, **kwargs) -> RegexEntityRecognitionResponse | RegexEntityRecognitionSimpleResponse:
|
60 |
"""
|
61 |
+
Check if the input prompt contains any entities based on the regex patterns.
|
62 |
|
63 |
Args:
|
64 |
+
prompt: Input text to check for entities
|
65 |
+
return_detected_types: If True, returns detailed entity type information
|
66 |
|
67 |
Returns:
|
68 |
+
RegexEntityRecognitionResponse or RegexEntityRecognitionSimpleResponse containing detection results
|
69 |
"""
|
70 |
result = self.regex_model.check(prompt)
|
71 |
|
72 |
# Create detailed explanation
|
73 |
explanation_parts = []
|
74 |
if result.matched_patterns:
|
75 |
+
explanation_parts.append("Found the following entities in the text:")
|
76 |
+
for entity_type, matches in result.matched_patterns.items():
|
77 |
+
explanation_parts.append(f"- {entity_type}: {len(matches)} instance(s)")
|
78 |
else:
|
79 |
+
explanation_parts.append("No entities detected in the text.")
|
80 |
|
81 |
if result.failed_patterns:
|
82 |
+
explanation_parts.append("\nChecked but did not find these entity types:")
|
83 |
for pattern in result.failed_patterns:
|
84 |
explanation_parts.append(f"- {pattern}")
|
85 |
|
|
|
87 |
anonymized_text = None
|
88 |
if getattr(self, 'should_anonymize', False) and result.matched_patterns:
|
89 |
anonymized_text = prompt
|
90 |
+
for entity_type, matches in result.matched_patterns.items():
|
91 |
for match in matches:
|
92 |
+
replacement = f"[{entity_type.upper()}]"
|
93 |
anonymized_text = anonymized_text.replace(match, replacement)
|
94 |
|
95 |
if return_detected_types:
|
96 |
+
return RegexEntityRecognitionResponse(
|
97 |
+
contains_entities=not result.passed,
|
98 |
+
detected_entities=result.matched_patterns,
|
99 |
explanation="\n".join(explanation_parts),
|
100 |
anonymized_text=anonymized_text
|
101 |
)
|
102 |
else:
|
103 |
+
return RegexEntityRecognitionSimpleResponse(
|
104 |
+
contains_entities=not result.passed,
|
105 |
explanation="\n".join(explanation_parts),
|
106 |
anonymized_text=anonymized_text
|
107 |
)
|
108 |
|
109 |
@weave.op()
|
110 |
+
def predict(self, prompt: str, return_detected_types: bool = True, **kwargs) -> RegexEntityRecognitionResponse | RegexEntityRecognitionSimpleResponse:
|
111 |
return self.guard(prompt, return_detected_types=return_detected_types, **kwargs)
|
guardrails_genie/guardrails/{pii/transformers_pipeline_guardrail.py β entity_recognition/transformers_entity_recognition_guardrail.py}
RENAMED
@@ -5,19 +5,19 @@ from pydantic import BaseModel
|
|
5 |
from ..base import Guardrail
|
6 |
import weave
|
7 |
|
8 |
-
class
|
9 |
-
|
10 |
-
|
11 |
explanation: str
|
12 |
anonymized_text: Optional[str] = None
|
13 |
|
14 |
-
class
|
15 |
-
|
16 |
explanation: str
|
17 |
anonymized_text: Optional[str] = None
|
18 |
|
19 |
-
class
|
20 |
-
"""Generic guardrail for detecting
|
21 |
|
22 |
_pipeline: Optional[object] = None
|
23 |
selected_entities: List[str]
|
@@ -82,7 +82,7 @@ class TransformersPipelinePIIGuardrail(Guardrail):
|
|
82 |
|
83 |
def _print_available_entities(self, entities: List[str]):
|
84 |
"""Print all available entity types that can be detected by the model."""
|
85 |
-
print("\nAvailable
|
86 |
print("=" * 25)
|
87 |
for entity in entities:
|
88 |
print(f"- {entity}")
|
@@ -92,23 +92,23 @@ class TransformersPipelinePIIGuardrail(Guardrail):
|
|
92 |
"""Print all available entity types that can be detected by the model."""
|
93 |
self._print_available_entities(self.available_entities)
|
94 |
|
95 |
-
def
|
96 |
-
"""Detect
|
97 |
results = self._pipeline(text)
|
98 |
|
99 |
# Group findings by entity type
|
100 |
-
|
101 |
for entity in results:
|
102 |
entity_type = entity['entity_group']
|
103 |
if entity_type in self.selected_entities:
|
104 |
-
if entity_type not in
|
105 |
-
|
106 |
-
|
107 |
|
108 |
-
return
|
109 |
|
110 |
def _anonymize_text(self, text: str, aggregate_redaction: bool = True) -> str:
|
111 |
-
"""Anonymize detected
|
112 |
results = self._pipeline(text)
|
113 |
|
114 |
# Sort entities by start position in reverse order to avoid offset issues
|
@@ -131,49 +131,49 @@ class TransformersPipelinePIIGuardrail(Guardrail):
|
|
131 |
return ' '.join(result.split())
|
132 |
|
133 |
@weave.op()
|
134 |
-
def guard(self, prompt: str, return_detected_types: bool = True, aggregate_redaction: bool = True) ->
|
135 |
-
"""Check if the input prompt contains any
|
136 |
|
137 |
Args:
|
138 |
prompt: The text to analyze
|
139 |
-
return_detected_types: If True, returns detailed
|
140 |
aggregate_redaction: If True, uses generic [redacted] instead of entity type
|
141 |
"""
|
142 |
-
# Detect
|
143 |
-
|
144 |
|
145 |
# Create explanation
|
146 |
explanation_parts = []
|
147 |
-
if
|
148 |
-
explanation_parts.append("Found the following
|
149 |
-
for
|
150 |
-
explanation_parts.append(f"- {
|
151 |
else:
|
152 |
-
explanation_parts.append("No
|
153 |
|
154 |
-
explanation_parts.append("\nChecked for these
|
155 |
for entity in self.selected_entities:
|
156 |
explanation_parts.append(f"- {entity}")
|
157 |
|
158 |
# Anonymize if requested
|
159 |
anonymized_text = None
|
160 |
-
if self.should_anonymize and
|
161 |
anonymized_text = self._anonymize_text(prompt, aggregate_redaction)
|
162 |
|
163 |
if return_detected_types:
|
164 |
-
return
|
165 |
-
|
166 |
-
|
167 |
explanation="\n".join(explanation_parts),
|
168 |
anonymized_text=anonymized_text
|
169 |
)
|
170 |
else:
|
171 |
-
return
|
172 |
-
|
173 |
explanation="\n".join(explanation_parts),
|
174 |
anonymized_text=anonymized_text
|
175 |
)
|
176 |
|
177 |
@weave.op()
|
178 |
-
def predict(self, prompt: str, return_detected_types: bool = True, aggregate_redaction: bool = True, **kwargs) ->
|
179 |
return self.guard(prompt, return_detected_types=return_detected_types, aggregate_redaction=aggregate_redaction, **kwargs)
|
|
|
5 |
from ..base import Guardrail
|
6 |
import weave
|
7 |
|
8 |
+
class TransformersEntityRecognitionResponse(BaseModel):
|
9 |
+
contains_entities: bool
|
10 |
+
detected_entities: Dict[str, List[str]]
|
11 |
explanation: str
|
12 |
anonymized_text: Optional[str] = None
|
13 |
|
14 |
+
class TransformersEntityRecognitionSimpleResponse(BaseModel):
|
15 |
+
contains_entities: bool
|
16 |
explanation: str
|
17 |
anonymized_text: Optional[str] = None
|
18 |
|
19 |
+
class TransformersEntityRecognitionGuardrail(Guardrail):
|
20 |
+
"""Generic guardrail for detecting entities using any token classification model."""
|
21 |
|
22 |
_pipeline: Optional[object] = None
|
23 |
selected_entities: List[str]
|
|
|
82 |
|
83 |
def _print_available_entities(self, entities: List[str]):
|
84 |
"""Print all available entity types that can be detected by the model."""
|
85 |
+
print("\nAvailable entity types:")
|
86 |
print("=" * 25)
|
87 |
for entity in entities:
|
88 |
print(f"- {entity}")
|
|
|
92 |
"""Print all available entity types that can be detected by the model."""
|
93 |
self._print_available_entities(self.available_entities)
|
94 |
|
95 |
+
def _detect_entities(self, text: str) -> Dict[str, List[str]]:
|
96 |
+
"""Detect entities in the text using the pipeline."""
|
97 |
results = self._pipeline(text)
|
98 |
|
99 |
# Group findings by entity type
|
100 |
+
detected_entities = {}
|
101 |
for entity in results:
|
102 |
entity_type = entity['entity_group']
|
103 |
if entity_type in self.selected_entities:
|
104 |
+
if entity_type not in detected_entities:
|
105 |
+
detected_entities[entity_type] = []
|
106 |
+
detected_entities[entity_type].append(entity['word'])
|
107 |
|
108 |
+
return detected_entities
|
109 |
|
110 |
def _anonymize_text(self, text: str, aggregate_redaction: bool = True) -> str:
|
111 |
+
"""Anonymize detected entities in text using the pipeline."""
|
112 |
results = self._pipeline(text)
|
113 |
|
114 |
# Sort entities by start position in reverse order to avoid offset issues
|
|
|
131 |
return ' '.join(result.split())
|
132 |
|
133 |
@weave.op()
|
134 |
+
def guard(self, prompt: str, return_detected_types: bool = True, aggregate_redaction: bool = True) -> TransformersEntityRecognitionResponse | TransformersEntityRecognitionSimpleResponse:
|
135 |
+
"""Check if the input prompt contains any entities using the transformer pipeline.
|
136 |
|
137 |
Args:
|
138 |
prompt: The text to analyze
|
139 |
+
return_detected_types: If True, returns detailed entity type information
|
140 |
aggregate_redaction: If True, uses generic [redacted] instead of entity type
|
141 |
"""
|
142 |
+
# Detect entities
|
143 |
+
detected_entities = self._detect_entities(prompt)
|
144 |
|
145 |
# Create explanation
|
146 |
explanation_parts = []
|
147 |
+
if detected_entities:
|
148 |
+
explanation_parts.append("Found the following entities in the text:")
|
149 |
+
for entity_type, instances in detected_entities.items():
|
150 |
+
explanation_parts.append(f"- {entity_type}: {len(instances)} instance(s)")
|
151 |
else:
|
152 |
+
explanation_parts.append("No entities detected in the text.")
|
153 |
|
154 |
+
explanation_parts.append("\nChecked for these entities:")
|
155 |
for entity in self.selected_entities:
|
156 |
explanation_parts.append(f"- {entity}")
|
157 |
|
158 |
# Anonymize if requested
|
159 |
anonymized_text = None
|
160 |
+
if self.should_anonymize and detected_entities:
|
161 |
anonymized_text = self._anonymize_text(prompt, aggregate_redaction)
|
162 |
|
163 |
if return_detected_types:
|
164 |
+
return TransformersEntityRecognitionResponse(
|
165 |
+
contains_entities=bool(detected_entities),
|
166 |
+
detected_entities=detected_entities,
|
167 |
explanation="\n".join(explanation_parts),
|
168 |
anonymized_text=anonymized_text
|
169 |
)
|
170 |
else:
|
171 |
+
return TransformersEntityRecognitionSimpleResponse(
|
172 |
+
contains_entities=bool(detected_entities),
|
173 |
explanation="\n".join(explanation_parts),
|
174 |
anonymized_text=anonymized_text
|
175 |
)
|
176 |
|
177 |
@weave.op()
|
178 |
+
def predict(self, prompt: str, return_detected_types: bool = True, aggregate_redaction: bool = True, **kwargs) -> TransformersEntityRecognitionResponse | TransformersEntityRecognitionSimpleResponse:
|
179 |
return self.guard(prompt, return_detected_types=return_detected_types, aggregate_redaction=aggregate_redaction, **kwargs)
|
guardrails_genie/guardrails/pii/run_presidio_model.py
DELETED
@@ -1,36 +0,0 @@
|
|
1 |
-
from guardrails_genie.guardrails.pii.presidio_pii_guardrail import PresidioPIIGuardrail
|
2 |
-
import weave
|
3 |
-
|
4 |
-
def run_presidio_model():
|
5 |
-
weave.init("guardrails-genie-pii-presidio-model")
|
6 |
-
|
7 |
-
# Create the guardrail with default entities and anonymization enabled
|
8 |
-
pii_guardrail = PresidioPIIGuardrail(
|
9 |
-
selected_entities=["PERSON", "EMAIL_ADDRESS", "PHONE_NUMBER"],
|
10 |
-
should_anonymize=True
|
11 |
-
)
|
12 |
-
|
13 |
-
# Check a prompt
|
14 |
-
prompt = "Please contact [email protected] or call 123-456-7890. My SSN is 123-45-6789"
|
15 |
-
result = pii_guardrail.guard(prompt)
|
16 |
-
print(result)
|
17 |
-
|
18 |
-
# Result will contain:
|
19 |
-
# - contains_pii: True
|
20 |
-
# - detected_pii_types: {
|
21 |
-
# "EMAIL_ADDRESS": ["[email protected]"],
|
22 |
-
# "PHONE_NUMBER": ["123-456-7890"],
|
23 |
-
# "US_SSN": ["123-45-6789"]
|
24 |
-
# }
|
25 |
-
# - safe_to_process: False
|
26 |
-
# - explanation: Detailed explanation of findings
|
27 |
-
# - anonymized_text: "Please contact <EMAIL_ADDRESS> or call <PHONE_NUMBER>. My SSN is <US_SSN>"
|
28 |
-
|
29 |
-
# Example with no PII
|
30 |
-
safe_prompt = "The weather is nice today"
|
31 |
-
safe_result = pii_guardrail.guard(safe_prompt)
|
32 |
-
print("\nSafe prompt result:")
|
33 |
-
print(safe_result)
|
34 |
-
|
35 |
-
if __name__ == "__main__":
|
36 |
-
run_presidio_model()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
guardrails_genie/guardrails/pii/run_regex_model.py
DELETED
@@ -1,21 +0,0 @@
|
|
1 |
-
from guardrails_genie.guardrails.pii.regex_pii_guardrail import RegexPIIGuardrail
|
2 |
-
import weave
|
3 |
-
|
4 |
-
def run_regex_model():
|
5 |
-
weave.init("guardrails-genie-pii-regex-model")
|
6 |
-
# Create the guardrail
|
7 |
-
pii_guardrail = RegexPIIGuardrail(use_defaults=True, should_anonymize=True)
|
8 |
-
|
9 |
-
# Check a prompt
|
10 |
-
prompt = "Please contact [email protected] or call 123-456-7890"
|
11 |
-
result = pii_guardrail.guard(prompt)
|
12 |
-
print(result)
|
13 |
-
|
14 |
-
# Result will contain:
|
15 |
-
# - contains_pii: True
|
16 |
-
# - detected_pii_types: {"email": ["[email protected]"], "phone_number": ["123-456-7890"]}
|
17 |
-
# - safe_to_process: False
|
18 |
-
# - explanation: Detailed explanation of findings
|
19 |
-
|
20 |
-
if __name__ == "__main__":
|
21 |
-
run_regex_model()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
guardrails_genie/guardrails/pii/run_transformers.py
DELETED
@@ -1,35 +0,0 @@
|
|
1 |
-
from guardrails_genie.guardrails.pii.transformers_pipeline_guardrail import TransformersPipelinePIIGuardrail
|
2 |
-
import weave
|
3 |
-
|
4 |
-
def run_transformers_pipeline():
|
5 |
-
weave.init("guardrails-genie-pii-transformers-pipeline-model")
|
6 |
-
|
7 |
-
# Create the guardrail with default entities and anonymization enabled
|
8 |
-
pii_guardrail = TransformersPipelinePIIGuardrail(
|
9 |
-
selected_entities=["GIVENNAME", "SURNAME", "EMAIL", "TELEPHONENUM", "SOCIALNUM", "PHONE_NUMBER"],
|
10 |
-
should_anonymize=True,
|
11 |
-
model_name="lakshyakh93/deberta_finetuned_pii",
|
12 |
-
show_available_entities=True
|
13 |
-
)
|
14 |
-
|
15 |
-
# Check a prompt
|
16 |
-
prompt = "Please contact John Smith at [email protected] or call 123-456-7890. My SSN is 123-45-6789"
|
17 |
-
result = pii_guardrail.guard(prompt, aggregate_redaction=False)
|
18 |
-
print(result)
|
19 |
-
|
20 |
-
# Result will contain:
|
21 |
-
# - contains_pii: True
|
22 |
-
# - detected_pii_types: {
|
23 |
-
# "GIVENNAME": ["John"],
|
24 |
-
# "SURNAME": ["Smith"],
|
25 |
-
# "EMAIL": ["[email protected]"],
|
26 |
-
# "TELEPHONENUM": ["123-456-7890"],
|
27 |
-
# "SOCIALNUM": ["123-45-6789"]
|
28 |
-
# }
|
29 |
-
# - safe_to_process: False
|
30 |
-
# - explanation: Detailed explanation of findings
|
31 |
-
# - anonymized_text: "Please contact [redacted] [redacted] at [redacted] or call [redacted]. My SSN is [redacted]"
|
32 |
-
|
33 |
-
|
34 |
-
if __name__ == "__main__":
|
35 |
-
run_transformers_pipeline()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|