Spaces:
Running
Running
geekyrakshit
commited on
Commit
·
b077b7d
1
Parent(s):
a1c5338
add: guardrails manager
Browse files
guardrails_genie/guardrails/__init__.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
from .injection import SurveyGuardrail
|
|
|
2 |
|
3 |
-
__all__ = ["SurveyGuardrail"]
|
|
|
1 |
from .injection import SurveyGuardrail
|
2 |
+
from .manager import GuardrailManager
|
3 |
|
4 |
+
__all__ = ["SurveyGuardrail", "GuardrailManager"]
|
guardrails_genie/guardrails/base.py
CHANGED
@@ -11,7 +11,3 @@ class Guardrail(weave.Model):
|
|
11 |
@weave.op()
|
12 |
def guard(self, prompt: str, **kwargs) -> list[str]:
|
13 |
pass
|
14 |
-
|
15 |
-
@weave.op()
|
16 |
-
def predict(self, prompt: str, **kwargs) -> list[str]:
|
17 |
-
return self.guard(prompt, **kwargs)
|
|
|
11 |
@weave.op()
|
12 |
def guard(self, prompt: str, **kwargs) -> list[str]:
|
13 |
pass
|
|
|
|
|
|
|
|
guardrails_genie/guardrails/injection/survey_guardrail.py
CHANGED
@@ -17,7 +17,7 @@ class SurveyGuardrailResponse(BaseModel):
|
|
17 |
|
18 |
class SurveyGuardrail(Guardrail):
|
19 |
llm_model: OpenAIModel
|
20 |
-
|
21 |
@weave.op()
|
22 |
def load_prompt_injection_survey(self) -> str:
|
23 |
prompt_injection_survey_path = os.path.join(
|
@@ -61,7 +61,7 @@ Here are some strict instructions that you must follow:
|
|
61 |
return user_prompt, system_prompt
|
62 |
|
63 |
@weave.op()
|
64 |
-
def
|
65 |
user_prompt, system_prompt = self.format_prompts(prompt)
|
66 |
chat_completion = self.llm_model.predict(
|
67 |
user_prompts=user_prompt,
|
@@ -70,3 +70,8 @@ Here are some strict instructions that you must follow:
|
|
70 |
**kwargs,
|
71 |
)
|
72 |
return chat_completion.choices[0].message.parsed
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
class SurveyGuardrail(Guardrail):
|
19 |
llm_model: OpenAIModel
|
20 |
+
|
21 |
@weave.op()
|
22 |
def load_prompt_injection_survey(self) -> str:
|
23 |
prompt_injection_survey_path = os.path.join(
|
|
|
61 |
return user_prompt, system_prompt
|
62 |
|
63 |
@weave.op()
|
64 |
+
def predict(self, prompt: str, **kwargs) -> list[str]:
|
65 |
user_prompt, system_prompt = self.format_prompts(prompt)
|
66 |
chat_completion = self.llm_model.predict(
|
67 |
user_prompts=user_prompt,
|
|
|
70 |
**kwargs,
|
71 |
)
|
72 |
return chat_completion.choices[0].message.parsed
|
73 |
+
|
74 |
+
@weave.op()
|
75 |
+
def guard(self, prompt: str, **kwargs) -> list[str]:
|
76 |
+
response = self.predict(prompt, **kwargs)
|
77 |
+
return {"verdict": response.injection_prompt}
|
guardrails_genie/guardrails/manager.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import weave
|
2 |
+
from weave.flow.obj import Object as WeaveObject
|
3 |
+
|
4 |
+
from .base import Guardrail
|
5 |
+
|
6 |
+
|
7 |
+
class GuardrailManager(WeaveObject):
|
8 |
+
guardrails: list[Guardrail]
|
9 |
+
|
10 |
+
@weave.op()
|
11 |
+
def guard(self, prompt: str, **kwargs) -> dict:
|
12 |
+
alerts = []
|
13 |
+
for guardrail in self.guardrails:
|
14 |
+
response = guardrail.guard(prompt, **kwargs)
|
15 |
+
alerts.append({guardrail.name: response})
|
16 |
+
return alerts
|