Spaces:
Runtime error
Runtime error
File size: 2,565 Bytes
a269338 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
# Run example: `python3 classifier.py --task formality`
import pandas as pd
from transformers import pipeline
from collections import Counter
# (label tracked, other labels)
task_label_mapping = {
"sentiment": ("POSITIVE", "NEGATIVE"),
# "sentiment": ("positive", "neutral", "negative"),
"formality": ("formal", "informal"),
}
# Define a function to perform sentiment analysis on each row of the dataframe
def predict(text, classifier, task, output_type="csv", is_sentencelevel=True):
if is_sentencelevel:
labels = []
scores = []
text = text
sentences = text.split(".")
for sentence in sentences:
if len(sentence) >= 800:
continue
result = classifier((sentence + "."))[0]
labels.append(result["label"])
scores.append(result["score"])
confidence = sum(scores) / len(scores)
if output_type == "csv":
mapping = Counter(labels)
label_tracked, other_label = task_label_mapping[task]
return (
mapping[label_tracked]
/ (mapping[label_tracked] + mapping[other_label]),
confidence,
)
# Get the most common word
return max(set(labels), key=labels.count), confidence
result = classifier(text)[0]
return result["label"], result["score"]
def compute_sentiment_and_formality(df,hallucination=False):
if hallucination:
INPUT = 'hallucination'
else:
INPUT = 'text'
# https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english?text=I+like+you.+I+love+you
classifier_sentiment = pipeline("sentiment-analysis")
# https://huggingface.co/s-nlp/xlmr_formality_classifier
classifier_formality = pipeline(
"text-classification", "s-nlp/roberta-base-formality-ranker"
)
# Apply the sentiment analysis function to each row of the dataframe
sentiment_outputs = None
formality_outputs = None
formality_outputs = df[INPUT].apply(
(lambda x: predict(x, classifier_formality, "formality"))
)
sentiment_outputs = df[INPUT].apply(
(lambda x: predict(x, classifier_sentiment, "sentiment"))
)
if sentiment_outputs is not None:
df["per_pos"] = [s[0] for s in sentiment_outputs]
df["con_pos"] = [s[1] for s in sentiment_outputs]
if formality_outputs is not None:
df["per_for"] = [s[0] for s in formality_outputs]
df["con_for"] = [s[1] for s in formality_outputs]
return df |