Spaces:
Runtime error
Runtime error
import pandas as pd | |
import numpy as np | |
from tqdm import tqdm | |
from collections import Counter | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
from transformers import pipeline | |
def run_inference(df, INPUT, TASK, classifier, label_mapping, rev_map, task_label_mapping, is_sentencelevel=True): | |
inferences = [] | |
for i in tqdm(range(len(df)), ascii=True): | |
if is_sentencelevel: | |
labels = [] | |
scores = [] | |
sentences = df.iloc[i, :][INPUT].split(".") | |
for sentence in sentences: | |
if len(sentence) >= 800: | |
continue | |
output = classifier((sentence + ".").lower())[0] | |
labels.append(label_mapping[TASK][rev_map[output["label"]]]) | |
scores.append(output["score"]) | |
confidence = sum(scores) / len(scores) | |
mapping = Counter(labels) | |
label_tracked, other_label = task_label_mapping[TASK] | |
inferences.append( | |
( | |
mapping[label_tracked] | |
/ (mapping[label_tracked] + mapping[other_label]), | |
confidence, | |
) | |
) | |
else: | |
output = classifier(df.iloc[i, :][INPUT])[0] | |
inferences.append( | |
(label_mapping[TASK][rev_map[output["label"]]], output["score"]) | |
) | |
return inferences | |
def compute_agentic_communal(df,hallucination=False): | |
tokenizer = AutoTokenizer.from_pretrained("emmatliu/language-agency-classifier") | |
model = AutoModelForSequenceClassification.from_pretrained("emmatliu/language-agency-classifier") | |
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer) | |
rev_map = {v: k for k, v in model.config.id2label.items()} | |
if hallucination: | |
INPUT = "hallucination" | |
else: | |
INPUT = "text" | |
TASK = "ac_classifier" | |
task_label_mapping = { | |
# Track percentage agentic / percentage agentic + percentage communal | |
"ac_classifier": ("agentic", "communal"), | |
} | |
label_mapping = { | |
"ac_classifier": { | |
0: "communal", | |
1: "agentic", | |
} | |
} | |
inferences = run_inference(df, INPUT, TASK, classifier, label_mapping, rev_map, task_label_mapping) | |
df["per_ac"] = [i[0] for i in inferences] | |
df["con_ac"] = [i[1] for i in inferences] | |
return df |