Spaces:
Runtime error
Runtime error
# Run example: `python3 classifier.py --task formality` | |
import pandas as pd | |
from transformers import pipeline | |
from collections import Counter | |
# (label tracked, other labels) | |
task_label_mapping = { | |
"sentiment": ("POSITIVE", "NEGATIVE"), | |
# "sentiment": ("positive", "neutral", "negative"), | |
"formality": ("formal", "informal"), | |
} | |
# Define a function to perform sentiment analysis on each row of the dataframe | |
def predict(text, classifier, task, output_type="csv", is_sentencelevel=True): | |
if is_sentencelevel: | |
labels = [] | |
scores = [] | |
text = text | |
sentences = text.split(".") | |
for sentence in sentences: | |
if len(sentence) >= 800: | |
continue | |
result = classifier((sentence + "."))[0] | |
labels.append(result["label"]) | |
scores.append(result["score"]) | |
confidence = sum(scores) / len(scores) | |
if output_type == "csv": | |
mapping = Counter(labels) | |
label_tracked, other_label = task_label_mapping[task] | |
return ( | |
mapping[label_tracked] | |
/ (mapping[label_tracked] + mapping[other_label]), | |
confidence, | |
) | |
# Get the most common word | |
return max(set(labels), key=labels.count), confidence | |
result = classifier(text)[0] | |
return result["label"], result["score"] | |
def compute_sentiment_and_formality(df,hallucination=False): | |
if hallucination: | |
INPUT = 'hallucination' | |
else: | |
INPUT = 'text' | |
# https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english?text=I+like+you.+I+love+you | |
classifier_sentiment = pipeline("sentiment-analysis") | |
# https://huggingface.co/s-nlp/xlmr_formality_classifier | |
classifier_formality = pipeline( | |
"text-classification", "s-nlp/roberta-base-formality-ranker" | |
) | |
# Apply the sentiment analysis function to each row of the dataframe | |
sentiment_outputs = None | |
formality_outputs = None | |
formality_outputs = df[INPUT].apply( | |
(lambda x: predict(x, classifier_formality, "formality")) | |
) | |
sentiment_outputs = df[INPUT].apply( | |
(lambda x: predict(x, classifier_sentiment, "sentiment")) | |
) | |
if sentiment_outputs is not None: | |
df["per_pos"] = [s[0] for s in sentiment_outputs] | |
df["con_pos"] = [s[1] for s in sentiment_outputs] | |
if formality_outputs is not None: | |
df["per_for"] = [s[0] for s in formality_outputs] | |
df["con_for"] = [s[1] for s in formality_outputs] | |
return df |