emmatliu commited on
Commit
694a2f7
·
verified ·
1 Parent(s): 8a51291

Update agentic_classifier.py

Browse files
Files changed (1) hide show
  1. agentic_classifier.py +24 -32
agentic_classifier.py CHANGED
@@ -36,39 +36,31 @@ def run_inference(df, INPUT, TASK, classifier, label_mapping, rev_map, task_labe
36
 
37
  return inferences
38
 
39
- # TODO: remove when model is fixed :/
40
- def compute_agentic_communal(df, hallucination=False):
41
- df['per_ac'] = np.random.rand(len(df))
42
- df['con_ac'] = np.random.rand(len(df))
43
- return df
44
 
45
- # Need clarification on model lol
46
- # def compute_agentic_communal(df,hallucination=False):
47
- # model_path = "./checkpoints/checkpoint-48" #
48
- # tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
49
- # model = AutoModelForSequenceClassification.from_pretrained(model_path)
50
- # classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
51
- # rev_map = {v: k for k, v in model.config.id2label.items()}
52
 
53
- # if hallucination:
54
- # INPUT = "hallucination"
55
- # else:
56
- # INPUT = "TEXT" # need to tell users what this should be called TODO: change this to the correct column name
 
 
 
 
 
 
 
57
 
58
- # TASK = "ac_classifier"
59
- # task_label_mapping = {
60
- # # Track percentage agentic / percentage agentic + percentage communal
61
- # "ac_classifier": ("agentic", "communal"),
62
- # }
63
- # label_mapping = {
64
- # "ac_classifier": {
65
- # 0: "communal",
66
- # 1: "agentic",
67
- # }
68
- # }
69
 
70
- # inferences = run_inference(df, INPUT, TASK, classifier, label_mapping, rev_map, task_label_mapping)
71
- # df["per_ac"] = [i[0] for i in inferences]
72
- # df["con_ac"] = [i[1] for i in inferences]
73
-
74
- # return df
 
36
 
37
  return inferences
38
 
39
+ def compute_agentic_communal(df,hallucination=False):
40
+ tokenizer = AutoTokenizer.from_pretrained("emmatliu/language-agency-classifier")
41
+ model = AutoModelForSequenceClassification.from_pretrained("emmatliu/language-agency-classifier")
42
+ classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
43
+ rev_map = {v: k for k, v in model.config.id2label.items()}
44
 
45
+ if hallucination:
46
+ INPUT = "hallucination"
47
+ else:
48
+ INPUT = "text"
 
 
 
49
 
50
+ TASK = "ac_classifier"
51
+ task_label_mapping = {
52
+ # Track percentage agentic / percentage agentic + percentage communal
53
+ "ac_classifier": ("agentic", "communal"),
54
+ }
55
+ label_mapping = {
56
+ "ac_classifier": {
57
+ 0: "communal",
58
+ 1: "agentic",
59
+ }
60
+ }
61
 
62
+ inferences = run_inference(df, INPUT, TASK, classifier, label_mapping, rev_map, task_label_mapping)
63
+ df["per_ac"] = [i[0] for i in inferences]
64
+ df["con_ac"] = [i[1] for i in inferences]
 
 
 
 
 
 
 
 
65
 
66
+ return df