File size: 3,077 Bytes
c3dcec1
09d10e0
c3dcec1
 
 
09d10e0
 
 
 
def1d66
af9d904
09d10e0
 
af9d904
09d10e0
 
 
def1d66
09d10e0
 
 
def1d66
 
09d10e0
 
def1d66
09d10e0
 
 
 
 
 
 
c3dcec1
 
 
 
09d10e0
c3dcec1
def1d66
 
c3dcec1
 
09d10e0
49fe7f1
09d10e0
af9d904
 
 
c3dcec1
 
09d10e0
c3dcec1
 
af9d904
 
 
c3dcec1
 
 
 
 
 
 
 
 
 
def1d66
 
c3dcec1
49fe7f1
c3dcec1
af9d904
 
 
c3dcec1
 
def1d66
af9d904
c3dcec1
af9d904
 
 
c3dcec1
af9d904
c3dcec1
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import copy
import numpy as np
import pandas as pd


def process_plot_data(df, flag=False):
    # 保留"Model"和"Domain"列,删除其他列
    df2 = df[["Model", "Domain"]].copy()

    columns_names = ["Model", "Domain", "AR", "ER", "NER", "JS", "CR", "CFM", "SCM",
                     "CP", "PTP", "CTP", "LQA", "JRG", "CU", "LC", "JRG-TAG", "LC-TAG"]
    # 计算新的列的值
    for col in columns_names[2:]:
        if col in ["AR", "ER", "CR", "CFM", "SCM", "CP", "PTP", "CTP", "LQA"]:
            df2[col] = df[f"{col}-F1"] * 100
        if col == "NER":
            df2[col] = df[f"{col}-Acc"] * 100
        if col in ["JRG", "LC", "JS", "CU", "JRG-TAG", "LC-TAG"]:
            rouge_mean = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].replace('-', np.nan).mean(axis=1)
            df2.loc[df[f"{col}-ROUGE-1"] == '-', col] = '-'
            df2.loc[df[f"{col}-ROUGE-1"] != '-', col] = rouge_mean * 100
        # if col in ["JS", "CU"]:
        #     df2[col] = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].mean(axis=1) * 100
    df2.reindex(columns=columns_names)

    flag = True
    if flag:
        # 保存到Excel文件
        with pd.ExcelWriter('scores.xlsx') as writer:
            df2.to_excel(writer, sheet_name="Sheet1", index=False)

    return df2

def plot_data():
    # read df and replace NaN values with an empty string
    leaderboard_df = pd.read_excel(
        'leaderboard.xlsx',
        sheet_name='Sheet2',
        header=0,
        usecols='A:BE',
        nrows=18)
    leaderboard_df.fillna("-")

    df = process_plot_data(leaderboard_df)
    # df.drop(df[df['Model'] == 'Baichuan-7B'].index, inplace=True)

    df_BIR = df.iloc[:, [0] + list(range(2, 7))]
    df_LFI = df.iloc[:, [0] + list(range(7, 13))]
    df_CLA = df.iloc[:, [0] + list(range(13, 16))]

    # Get df_overall
    df_overall = df.iloc[:, [0] + list(range(2, 15))]
    plot_df_dict = {
        "Overall": df_overall,
        "Basic Information Retrieval": df_BIR,
        "Legal Foundation Inference": df_LFI,
        "Complex Legal Application": df_CLA,
    }
    return plot_df_dict


def tab_data():
    # read df and replace NaN values with an empty string
    leaderboard_df = pd.read_excel(
        'leaderboard.xlsx',
        sheet_name='Sheet2',
        header=0,
        usecols='A:BE',
        nrows=18)
    leaderboard_df.fillna("-")
    # leaderboard_df.drop(leaderboard_df[leaderboard_df['Model'] == 'Baichuan-7B'].index, inplace=True)

    df_BIR = leaderboard_df.iloc[:, list(range(0, 18))]
    df_LFI = leaderboard_df.iloc[:, list(range(0, 2)) + list(range(18, 42))]
    df_CLA = leaderboard_df.iloc[:, list(range(0, 2)) + list(range(42, 56))]

    # Get df_overall
    df_overall = leaderboard_df.iloc[:, list(range(0, 56))]
    table_df_dict = {
        "Overall": df_overall,
        "Basic Information Retrieval": df_BIR,
        "Legal Foundation Inference": df_LFI,
        "Complex Legal Application": df_CLA,
    }
    return table_df_dict


if __name__ == "__main__":
    df1 = plot_data()
    df2 = tab_data()