Spaces:

wandb
/

guardrails-genie

Running

App Files Files Community

param-bharat commited on Dec 3, 2024

Commit

993988f

1 Parent(s): 46466a5

feat: implement secrets detection guardrail

Browse files

Files changed (2) hide show

guardrails_genie/guardrails/secrets_detection/__init__.py +17 -0
guardrails_genie/guardrails/secrets_detection/secrets_detection.py +183 -0

guardrails_genie/guardrails/secrets_detection/__init__.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from guardrails_genie.guardrails.secrets_detection.secrets_detection import (
+    DEFAULT_SECRETS_PATTERNS,
+    SecretsDetectionGuardrail,
+    SecretsDetectionSimpleResponse,
+    SecretsDetectionResponse,
+    REDACTION,
+    redact,
+)
+__all__ = [
+    "DEFAULT_SECRETS_PATTERNS",
+    "SecretsDetectionGuardrail",
+    "SecretsDetectionSimpleResponse",
+    "SecretsDetectionResponse",
+    "REDACTION",
+    "redact",
+]

guardrails_genie/guardrails/secrets_detection/secrets_detection.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import hashlib
+import json
+import pathlib
+from enum import Enum
+from typing import Union, Optional
+import weave
+from pydantic import BaseModel
+from guardrails_genie.guardrails.base import Guardrail
+from guardrails_genie.regex_model import RegexModel
+def load_secrets_patterns():
+    default_patterns = {}
+    patterns = (
+        pathlib.Path(__file__).parent.absolute() / "secrets_patterns.jsonl"
+    ).read_text()
+    for pattern in patterns.splitlines():
+        pattern = json.loads(pattern)
+        default_patterns[pattern["name"]] = [rf"{pat}" for pat in pattern["patterns"]]
+    return default_patterns
+DEFAULT_SECRETS_PATTERNS = load_secrets_patterns()
+class REDACTION(str, Enum):
+    REDACT_PARTIAL = "REDACT_PARTIAL"
+    REDACT_ALL = "REDACT_ALL"
+    REDACT_HASH = "REDACT_HASH"
+    REDACT_NONE = "REDACT_NONE"
+def redact(text: str, matches: list[str], redaction_type: REDACTION) -> str:
+    for match in matches:
+        if redaction_type == REDACTION.REDACT_PARTIAL:
+            replacement = "[REDACTED:]" + match[:2] + ".." + match[-2:] + "[:REDACTED]"
+        elif redaction_type == REDACTION.REDACT_ALL:
+            replacement = "[REDACTED:]" + ("*" * len(match)) + "[:REDACTED]"
+        elif redaction_type == REDACTION.REDACT_HASH:
+            replacement = (
+                "[REDACTED:]" + hashlib.md5(match.encode()).hexdigest() + "[:REDACTED]"
+            )
+        else:
+            replacement = match
+        text = text.replace(match, replacement)
+    return text
+class SecretsDetectionSimpleResponse(BaseModel):
+    contains_secrets: bool
+    explanation: str
+    redacted_text: Optional[str] = None
+    @property
+    def safe(self) -> bool:
+        return not self.contains_entities
+class SecretsDetectionResponse(SecretsDetectionSimpleResponse):
+    detected_secrets: dict[str, list[str]]
+class SecretsDetectionGuardrail(Guardrail):
+    regex_model: RegexModel
+    patterns: Union[dict[str, str], dict[str, list[str]]] = {}
+    redaction: REDACTION
+    def __init__(
+        self,
+        use_defaults: bool = True,
+        redaction: REDACTION = REDACTION.REDACT_ALL,
+        **kwargs,
+    ):
+        patterns = {}
+        if use_defaults:
+            patterns = DEFAULT_SECRETS_PATTERNS.copy()
+        if kwargs.get("patterns"):
+            patterns.update(kwargs["patterns"])
+        # Create the RegexModel instance
+        regex_model = RegexModel(patterns=patterns)
+        # Initialize the base class with both the regex_model and patterns
+        super().__init__(
+            regex_model=regex_model,
+            patterns=patterns,
+            redaction=redaction,
+        )
+    @weave.op()
+    def guard(
+        self,
+        prompt: str,
+        return_detected_types: bool = True,
+        **kwargs,
+    ) -> SecretsDetectionResponse | SecretsDetectionResponse:
+        """
+        Check if the input prompt contains any entities based on the regex patterns.
+        Args:
+            prompt: Input text to check for entities
+            return_detected_types: If True, returns detailed entity type information
+        Returns:
+            SecretsDetectionResponse or SecretsDetectionResponse containing detection results
+        """
+        result = self.regex_model.check(prompt)
+        # Create detailed explanation
+        explanation_parts = []
+        if result.matched_patterns:
+            explanation_parts.append("Found the following secrets in the text:")
+            for secret_type, matches in result.matched_patterns.items():
+                explanation_parts.append(f"- {secret_type}: {len(matches)} instance(s)")
+        else:
+            explanation_parts.append("No secrets detected in the text.")
+        redacted_text = prompt
+        if result.matched_patterns:
+            for secret_type, matches in result.matched_patterns.items():
+                redacted_text = redact(redacted_text, matches, self.redaction)
+        if return_detected_types:
+            return SecretsDetectionResponse(
+                contains_secrets=not result.passed,
+                detected_secrets=result.matched_patterns,
+                explanation="\n".join(explanation_parts),
+                redacted_text=redacted_text,
+            )
+        else:
+            return SecretsDetectionSimpleResponse(
+                contains_entities=not result.passed,
+                explanation="\n".join(explanation_parts),
+                redacted_text=redacted_text,
+            )
+def main():
+    weave.init(project_name="parambharat/guardrails-genie")
+    guardrail = SecretsDetectionGuardrail(redaction=REDACTION.REDACT_ALL)
+    dataset = [
+        {
+            "input": 'I need to pass a key\naws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"',
+        },
+        {
+            "input": "My github token is: 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",
+        },
+        {
+            "input": "My JWT token is: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c",
+        },
+    ]
+    for item in dataset:
+        # Check text for entities
+        result = guardrail.guard(prompt=item["input"])
+        # Access results
+        print(f"Contains entities: {result.contains_secrets}")
+        print(f"Detected entities: {result.detected_secrets}")
+        print(f"Explanation: {result.explanation}")
+        print(f"Anonymized text: {result.redacted_text}")
+    # import regex as re
+    #
+    # sample_input = "My JWT token is: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c"
+    # jwt_pattern = DEFAULT_SECRETS_PATTERNS["JwtToken"][0]
+    # print(jwt_pattern)
+    # pattern = re.compile(jwt_pattern)
+    # print(pattern)
+    # print(pattern.findall(sample_input))
+    # import pandas as pd
+    #
+    # df = pd.read_json("secrets_patterns_bak.jsonl", lines=True)
+    # df.loc[:, "patterns"] = df["patterns"].map(lambda x: [i[2:-1] for i in x])
+    # df.to_json("secrets_patterns.jsonl", orient="records", lines=True)
+if __name__ == "__main__":
+    main()