Spaces:
Sleeping
Sleeping
File size: 3,077 Bytes
c3dcec1 09d10e0 c3dcec1 09d10e0 def1d66 af9d904 09d10e0 af9d904 09d10e0 def1d66 09d10e0 def1d66 09d10e0 def1d66 09d10e0 c3dcec1 09d10e0 c3dcec1 def1d66 c3dcec1 09d10e0 49fe7f1 09d10e0 af9d904 c3dcec1 09d10e0 c3dcec1 af9d904 c3dcec1 def1d66 c3dcec1 49fe7f1 c3dcec1 af9d904 c3dcec1 def1d66 af9d904 c3dcec1 af9d904 c3dcec1 af9d904 c3dcec1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import copy
import numpy as np
import pandas as pd
def process_plot_data(df, flag=False):
# 保留"Model"和"Domain"列,删除其他列
df2 = df[["Model", "Domain"]].copy()
columns_names = ["Model", "Domain", "AR", "ER", "NER", "JS", "CR", "CFM", "SCM",
"CP", "PTP", "CTP", "LQA", "JRG", "CU", "LC", "JRG-TAG", "LC-TAG"]
# 计算新的列的值
for col in columns_names[2:]:
if col in ["AR", "ER", "CR", "CFM", "SCM", "CP", "PTP", "CTP", "LQA"]:
df2[col] = df[f"{col}-F1"] * 100
if col == "NER":
df2[col] = df[f"{col}-Acc"] * 100
if col in ["JRG", "LC", "JS", "CU", "JRG-TAG", "LC-TAG"]:
rouge_mean = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].replace('-', np.nan).mean(axis=1)
df2.loc[df[f"{col}-ROUGE-1"] == '-', col] = '-'
df2.loc[df[f"{col}-ROUGE-1"] != '-', col] = rouge_mean * 100
# if col in ["JS", "CU"]:
# df2[col] = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].mean(axis=1) * 100
df2.reindex(columns=columns_names)
flag = True
if flag:
# 保存到Excel文件
with pd.ExcelWriter('scores.xlsx') as writer:
df2.to_excel(writer, sheet_name="Sheet1", index=False)
return df2
def plot_data():
# read df and replace NaN values with an empty string
leaderboard_df = pd.read_excel(
'leaderboard.xlsx',
sheet_name='Sheet2',
header=0,
usecols='A:BE',
nrows=18)
leaderboard_df.fillna("-")
df = process_plot_data(leaderboard_df)
# df.drop(df[df['Model'] == 'Baichuan-7B'].index, inplace=True)
df_BIR = df.iloc[:, [0] + list(range(2, 7))]
df_LFI = df.iloc[:, [0] + list(range(7, 13))]
df_CLA = df.iloc[:, [0] + list(range(13, 16))]
# Get df_overall
df_overall = df.iloc[:, [0] + list(range(2, 15))]
plot_df_dict = {
"Overall": df_overall,
"Basic Information Retrieval": df_BIR,
"Legal Foundation Inference": df_LFI,
"Complex Legal Application": df_CLA,
}
return plot_df_dict
def tab_data():
# read df and replace NaN values with an empty string
leaderboard_df = pd.read_excel(
'leaderboard.xlsx',
sheet_name='Sheet2',
header=0,
usecols='A:BE',
nrows=18)
leaderboard_df.fillna("-")
# leaderboard_df.drop(leaderboard_df[leaderboard_df['Model'] == 'Baichuan-7B'].index, inplace=True)
df_BIR = leaderboard_df.iloc[:, list(range(0, 18))]
df_LFI = leaderboard_df.iloc[:, list(range(0, 2)) + list(range(18, 42))]
df_CLA = leaderboard_df.iloc[:, list(range(0, 2)) + list(range(42, 56))]
# Get df_overall
df_overall = leaderboard_df.iloc[:, list(range(0, 56))]
table_df_dict = {
"Overall": df_overall,
"Basic Information Retrieval": df_BIR,
"Legal Foundation Inference": df_LFI,
"Complex Legal Application": df_CLA,
}
return table_df_dict
if __name__ == "__main__":
df1 = plot_data()
df2 = tab_data()
|