File size: 1,487 Bytes
81d4aee
 
 
 
 
8ddc567
 
81d4aee
8ddc567
 
81d4aee
 
fb7fb6c
 
 
 
 
 
 
8ddc567
fb7fb6c
 
 
 
 
 
 
 
 
8ddc567
fb7fb6c
 
 
 
8ddc567
fb7fb6c
8ddc567
fb7fb6c
 
8ddc567
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from .cleaning import remove_citations, split_data, split_text, chunk_data
import pandas as pd
import numpy as np
import json

with open("utils/id2label.json", "r") as j:
    id2label = json.loads(j.read())

with open("utils/label2id.json", "r") as j:
    label2id = json.loads(j.read())


def normaliz_dict(d, target=1.0):
    raw = sum(d.values())
    factor = target / raw
    return {key: value * factor for key, value in d.items()}


def average_text(text, model, judges):
    result = model(text)
    new_res = []
    for d in result:
        p = {}
        for dicts in d:
            if dicts["label"] in judges:
                p[dicts["label"]] = round(dicts["score"], 2)
        p = normaliz_dict(p)
        new_res.append(p)

    pred = {}
    for c in new_res:
        for k, v in c.items():
            if k not in pred:
                pred[k] = [round(v, 2)]
            else:
                pred[k].append(round(v, 2))
    sumary = {k: round(sum(v) / len(v), 2) for k, v in pred.items()}
    sumary = normaliz_dict(sumary)
    return dict(sorted(sumary.items(), key=lambda x: x[1], reverse=True)), new_res


# def find_case_by_name(df, name):
#     return display(
#         HTML(
#             df[df["case_name"].str.contains(name)]
#             .iloc[:, :-1]
#             .to_html(render_links=True, escape=False)
#         )
#     )


# def head_df(df):
#     return display(
#         HTML(df.iloc[:, :-1].head().to_html(render_links=True, escape=False))
#     )