from .cleaning import remove_citations, split_data, split_text, chunk_data | |
import pandas as pd | |
import numpy as np | |
import json | |
with open("utils/id2label.json", "r") as j: | |
id2label = json.loads(j.read()) | |
with open("utils/label2id.json", "r") as j: | |
label2id = json.loads(j.read()) | |
def average_text(text, model): | |
# result = classifier(df_train[(df_train.case_name==case) & (df_train.category=='per_curiam')]['clean_text'].to_list()) | |
result = model(text) | |
pred = {} | |
for c in result: | |
for d in c: | |
if d["label"] not in pred: | |
pred[d["label"]] = [round(d["score"], 2)] | |
else: | |
pred[d["label"]].append(round(d["score"], 2)) | |
sumary = {k: round(sum(v) / len(v), 2) for k, v in pred.items()} | |
result = [{dct["label"]: round(dct["score"], 2) for dct in lst} for lst in result] | |
return dict(sorted(sumary.items(), key=lambda x: x[1], reverse=True)), result | |
# def find_case_by_name(df, name): | |
# return display( | |
# HTML( | |
# df[df["case_name"].str.contains(name)] | |
# .iloc[:, :-1] | |
# .to_html(render_links=True, escape=False) | |
# ) | |
# ) | |
# def head_df(df): | |
# return display( | |
# HTML(df.iloc[:, :-1].head().to_html(render_links=True, escape=False)) | |
# ) | |