Spaces:

wandb
/

guardrails-genie

Running

App Files Files Community

geekyrakshit commited on Dec 4, 2024

Commit

8647e3b

1 Parent(s): a70d6a8

refactor: classifier training

Browse files

Files changed (5) hide show

application_pages/train_classifier.py +1 -1
guardrails_genie/guardrails/entity_recognition/pii_examples/pii_benchmark_weave.py +1 -1
guardrails_genie/guardrails/injection/classifier_guardrail.py +1 -2
guardrails_genie/{train_classifier.py → train/train_classifier.py} +2 -39
guardrails_genie/utils.py +43 -0

application_pages/train_classifier.py CHANGED Viewed

@@ -3,7 +3,7 @@ import os
 import streamlit as st
 from dotenv import load_dotenv
-from guardrails_genie.train_classifier import train_binary_classifier
 def initialize_session_state():

 import streamlit as st
 from dotenv import load_dotenv
+from guardrails_genie.train.train_classifier import train_binary_classifier
 def initialize_session_state():

guardrails_genie/guardrails/entity_recognition/pii_examples/pii_benchmark_weave.py CHANGED Viewed

@@ -362,7 +362,7 @@ def main():
             preprocess_model_input=preprocess_model_input,
         )
-        results = asyncio.run(evaluation.evaluate(guardrail))
 if __name__ == "__main__":

             preprocess_model_input=preprocess_model_input,
         )
+        asyncio.run(evaluation.evaluate(guardrail))
 if __name__ == "__main__":

guardrails_genie/guardrails/injection/classifier_guardrail.py CHANGED Viewed

@@ -1,12 +1,11 @@
 from typing import Optional
 import torch
 import weave
 from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
 from transformers.pipelines.base import Pipeline
-import wandb
 from ..base import Guardrail

 from typing import Optional
 import torch
+import wandb
 import weave
 from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
 from transformers.pipelines.base import Pipeline
 from ..base import Guardrail

guardrails_genie/{train_classifier.py → train/train_classifier.py} RENAMED Viewed

@@ -1,54 +1,17 @@
 import evaluate
 import numpy as np
 import streamlit as st
 from datasets import load_dataset
 from transformers import (
     AutoModelForSequenceClassification,
     AutoTokenizer,
     DataCollatorWithPadding,
     Trainer,
-    TrainerCallback,
     TrainingArguments,
 )
-from transformers.trainer_callback import TrainerControl, TrainerState
-import wandb
-class StreamlitProgressbarCallback(TrainerCallback):
-    """
-    StreamlitProgressbarCallback is a custom callback for the Hugging Face Trainer
-    that integrates a progress bar into a Streamlit application. This class updates
-    the progress bar at each training step, providing real-time feedback on the
-    training process within the Streamlit interface.
-    Attributes:
-        progress_bar (streamlit.delta_generator.DeltaGenerator): A Streamlit progress
-            bar object initialized to 0 with the text "Training".
-    Methods:
-        on_step_begin(args, state, control, **kwargs):
-            Updates the progress bar at the beginning of each training step. The progress
-            is calculated as the percentage of completed steps out of the total steps.
-            The progress bar text is updated to show the current step and the total steps.
-    """
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.progress_bar = st.progress(0, text="Training")
-    def on_step_begin(
-        self,
-        args: TrainingArguments,
-        state: TrainerState,
-        control: TrainerControl,
-        **kwargs,
-    ):
-        super().on_step_begin(args, state, control, **kwargs)
-        self.progress_bar.progress(
-            (state.global_step * 100 // state.max_steps) + 1,
-            text=f"Training {state.global_step} / {state.max_steps}",
-        )
 def train_binary_classifier(

 import evaluate
 import numpy as np
 import streamlit as st
+import wandb
 from datasets import load_dataset
 from transformers import (
     AutoModelForSequenceClassification,
     AutoTokenizer,
     DataCollatorWithPadding,
     Trainer,
     TrainingArguments,
 )
+from guardrails_genie.utils import StreamlitProgressbarCallback
 def train_binary_classifier(

guardrails_genie/utils.py CHANGED Viewed

@@ -1,5 +1,12 @@
 import pandas as pd
 import weave
 class EvaluationCallManager:
@@ -91,3 +98,39 @@ class EvaluationCallManager:
                 call["score"]["correct"] for call in guardrail_call["calls"]
             ]
         return pd.DataFrame(dataframe)

 import pandas as pd
+import streamlit as st
 import weave
+from transformers.trainer_callback import (
+    TrainerCallback,
+    TrainerControl,
+    TrainerState,
+    TrainingArguments,
+)
 class EvaluationCallManager:
                 call["score"]["correct"] for call in guardrail_call["calls"]
             ]
         return pd.DataFrame(dataframe)
+class StreamlitProgressbarCallback(TrainerCallback):
+    """
+    StreamlitProgressbarCallback is a custom callback for the Hugging Face Trainer
+    that integrates a progress bar into a Streamlit application. This class updates
+    the progress bar at each training step, providing real-time feedback on the
+    training process within the Streamlit interface.
+    Attributes:
+        progress_bar (streamlit.delta_generator.DeltaGenerator): A Streamlit progress
+            bar object initialized to 0 with the text "Training".
+    Methods:
+        on_step_begin(args, state, control, **kwargs):
+            Updates the progress bar at the beginning of each training step. The progress
+            is calculated as the percentage of completed steps out of the total steps.
+            The progress bar text is updated to show the current step and the total steps.
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.progress_bar = st.progress(0, text="Training")
+    def on_step_begin(
+        self,
+        args: TrainingArguments,
+        state: TrainerState,
+        control: TrainerControl,
+        **kwargs,
+    ):
+        super().on_step_begin(args, state, control, **kwargs)
+        self.progress_bar.progress(
+            (state.global_step * 100 // state.max_steps) + 1,
+            text=f"Training {state.global_step} / {state.max_steps}",
+        )