geekyrakshit's picture
update: docs
2633ee9
raw
history blame
6.31 kB
import hashlib
import json
import pathlib
from enum import Enum
from typing import Optional, Union
import weave
from pydantic import BaseModel
from guardrails_genie.guardrails.base import Guardrail
from guardrails_genie.regex_model import RegexModel
def load_secrets_patterns() -> dict[str, list[str]]:
"""
Load secret patterns from a JSONL file and return them as a dictionary.
Returns:
dict: A dictionary where keys are pattern names and values are lists of regex patterns.
"""
default_patterns = {}
patterns = (
pathlib.Path(__file__).parent.absolute() / "secrets_patterns.jsonl"
).read_text()
for pattern in patterns.splitlines():
pattern = json.loads(pattern)
default_patterns[pattern["name"]] = [rf"{pat}" for pat in pattern["patterns"]]
return default_patterns
# Load default secret patterns from the JSONL file
DEFAULT_SECRETS_PATTERNS = load_secrets_patterns()
class REDACTION(str, Enum):
"""
Enum for different types of redaction methods.
"""
REDACT_PARTIAL = "REDACT_PARTIAL"
REDACT_ALL = "REDACT_ALL"
REDACT_HASH = "REDACT_HASH"
REDACT_NONE = "REDACT_NONE"
def redact(text: str, matches: list[str], redaction_type: REDACTION) -> str:
"""
Redact the given matches in the text based on the redaction type.
Args:
text (str): The input text to redact.
matches (list[str]): List of strings to be redacted.
redaction_type (REDACTION): The type of redaction to apply.
Returns:
str: The redacted text.
"""
for match in matches:
if redaction_type == REDACTION.REDACT_PARTIAL:
replacement = "[REDACTED:]" + match[:2] + ".." + match[-2:] + "[:REDACTED]"
elif redaction_type == REDACTION.REDACT_ALL:
replacement = "[REDACTED:]" + ("*" * len(match)) + "[:REDACTED]"
elif redaction_type == REDACTION.REDACT_HASH:
replacement = (
"[REDACTED:]" + hashlib.md5(match.encode()).hexdigest() + "[:REDACTED]"
)
else:
replacement = match
text = text.replace(match, replacement)
return text
class SecretsDetectionSimpleResponse(BaseModel):
"""
A simple response model for secrets detection.
Attributes:
contains_secrets (bool): Indicates if secrets were detected.
explanation (str): Explanation of the detection result.
redacted_text (Optional[str]): The redacted text if secrets were found.
"""
contains_secrets: bool
explanation: str
redacted_text: Optional[str] = None
@property
def safe(self) -> bool:
"""
Property to check if the text is safe (no secrets detected).
Returns:
bool: True if no secrets were detected, False otherwise.
"""
return not self.contains_secrets
class SecretsDetectionResponse(SecretsDetectionSimpleResponse):
"""
A detailed response model for secrets detection.
Attributes:
detected_secrets (dict[str, list[str]]): Dictionary of detected secrets.
"""
detected_secrets: dict[str, list[str]]
class SecretsDetectionGuardrail(Guardrail):
"""
A guardrail for detecting secrets in text using regex patterns.
reference: SecretBench: A Dataset of Software Secrets
https://arxiv.org/abs/2303.06729
Attributes:
regex_model (RegexModel): The regex model used for detection.
patterns (Union[dict[str, str], dict[str, list[str]]]): The patterns used for detection.
redaction (REDACTION): The type of redaction to apply.
"""
regex_model: RegexModel
patterns: Union[dict[str, str], dict[str, list[str]]] = {}
redaction: REDACTION
def __init__(
self,
use_defaults: bool = True,
redaction: REDACTION = REDACTION.REDACT_ALL,
**kwargs,
):
"""
Initialize the SecretsDetectionGuardrail.
Args:
use_defaults (bool): Whether to use default patterns.
redaction (REDACTION): The type of redaction to apply.
**kwargs: Additional keyword arguments.
"""
patterns = {}
if use_defaults:
patterns = DEFAULT_SECRETS_PATTERNS.copy()
if kwargs.get("patterns"):
patterns.update(kwargs["patterns"])
regex_model = RegexModel(patterns=patterns)
super().__init__(
regex_model=regex_model,
patterns=patterns,
redaction=redaction,
)
@weave.op()
def guard(
self,
prompt: str,
return_detected_secrets: bool = True,
**kwargs,
) -> SecretsDetectionResponse | SecretsDetectionResponse:
"""
Check if the input prompt contains any secrets based on the regex patterns.
Args:
prompt (str): Input text to check for secrets.
return_detected_secrets (bool): If True, returns detailed secrets type information.
Returns:
SecretsDetectionResponse or SecretsDetectionResponse: Detection results.
"""
result = self.regex_model.check(prompt)
explanation_parts = []
if result.matched_patterns:
explanation_parts.append("Found the following secrets in the text:")
for secret_type, matches in result.matched_patterns.items():
explanation_parts.append(f"- {secret_type}: {len(matches)} instance(s)")
else:
explanation_parts.append("No secrets detected in the text.")
redacted_text = prompt
if result.matched_patterns:
for secret_type, matches in result.matched_patterns.items():
redacted_text = redact(redacted_text, matches, self.redaction)
if return_detected_secrets:
return SecretsDetectionResponse(
contains_secrets=not result.passed,
detected_secrets=result.matched_patterns,
explanation="\n".join(explanation_parts),
redacted_text=redacted_text,
)
else:
return SecretsDetectionSimpleResponse(
contains_secrets=not result.passed,
explanation="\n".join(explanation_parts),
redacted_text=redacted_text,
)