Spaces:
Sleeping
Sleeping
piotr-szleg-bards-ai
commited on
Commit
·
b7b9e52
1
Parent(s):
fc8c467
2024-03-05 12:06:07 Publish script update
Browse files- app.py +78 -78
- app_constants.py +71 -0
- data/combined_plots.csv +0 -0
- data/general_plots.csv +173 -126
- data/summary_metrics_plots.csv +0 -0
- pipeline/config.py +9 -0
app.py
CHANGED
@@ -9,47 +9,7 @@ import plotly.express as px
|
|
9 |
from pandas.api.types import is_numeric_dtype
|
10 |
|
11 |
from pipeline.config import LLMBoardConfig, QueriesConfig
|
12 |
-
|
13 |
-
README = """
|
14 |
-
Projects compares different large language models and their providers for real time applications and mass data processing.
|
15 |
-
While other benchmarks compare LLMs on different human intelligence tasks this benchmark focus on features related to business and engineering aspects such as response times, pricing and data streaming capabilities.
|
16 |
-
|
17 |
-
To preform evaluation we chose a task of newspaper articles summarization from [GEM/xlsum](https://huggingface.co/datasets/GEM/xlsum) dataset as it represents a very standard type of task where model has to understand unstructured natural language text, process it and output text in a specified format.
|
18 |
-
For this version we chose English and Japanese languages, with Japanese representing languages using logographic alphabets. This enable us also validate the effectiveness of the LLM for different language groups.
|
19 |
-
|
20 |
-
Each of the models was asked to summarize the text using the following prompt:
|
21 |
-
|
22 |
-
```
|
23 |
-
{}
|
24 |
-
```
|
25 |
-
|
26 |
-
Where {{language}} stands for original language of the text as we wanted to avoid the model translating the text to English during summarization.
|
27 |
-
|
28 |
-
LLM was asked to return the output in three formats: markdown, json and function call. Note that currently function calls are only supported by Open AI API.
|
29 |
-
To do that we added following text to the query:
|
30 |
-
|
31 |
-
{}
|
32 |
-
|
33 |
-
All of the call were made from the same machine with the same internet connection with usage of the LiteLLM library which may adds some time overhead compared to pure curl calls. Call were made from Poland, UTC +1.
|
34 |
-
|
35 |
-
Please take a look at the following project and let us know if you have any questions or suggestions.
|
36 |
-
"""
|
37 |
-
|
38 |
-
time_periods_explanation_df = pd.DataFrame(
|
39 |
-
{
|
40 |
-
"time_of_day": [
|
41 |
-
"early morning",
|
42 |
-
"morning",
|
43 |
-
"afternoon",
|
44 |
-
"late afternoon",
|
45 |
-
"evening",
|
46 |
-
"late evening",
|
47 |
-
"midnight",
|
48 |
-
"night",
|
49 |
-
],
|
50 |
-
"hour_range": ["6-8", "9-11", "12-14", "15-17", "18-20", "21-23", "0-2", "3-5"],
|
51 |
-
}
|
52 |
-
)
|
53 |
|
54 |
queries_config = QueriesConfig()
|
55 |
|
@@ -62,7 +22,9 @@ time_of_day_comparison_df = pd.read_csv("data/time_of_day_comparison.csv")
|
|
62 |
general_plots = pd.read_csv("data/general_plots.csv")
|
63 |
model_costs_df = pd.read_csv("data/model_costs.csv")
|
64 |
time_of_day_plots = pd.read_csv("data/time_of_day_plots.csv")
|
|
|
65 |
output_plots = pd.read_csv("data/output_plots.csv")
|
|
|
66 |
|
67 |
searched_query = ""
|
68 |
collapse_languages = False
|
@@ -73,7 +35,7 @@ def filter_dataframes(input: str):
|
|
73 |
global searched_query
|
74 |
input = input.lower()
|
75 |
searched_query = input
|
76 |
-
return
|
77 |
|
78 |
|
79 |
def collapse_languages_toggle():
|
@@ -84,7 +46,7 @@ def collapse_languages_toggle():
|
|
84 |
else:
|
85 |
collapse_languages = True
|
86 |
button_text = "Un-collapse languages"
|
87 |
-
return
|
88 |
|
89 |
|
90 |
def collapse_output_method_toggle():
|
@@ -95,9 +57,9 @@ def collapse_output_method_toggle():
|
|
95 |
else:
|
96 |
collapse_output_method = True
|
97 |
button_text = "Un-collapse output method"
|
98 |
-
return
|
99 |
|
100 |
-
def
|
101 |
if not searched_model_names:
|
102 |
return df
|
103 |
filter_series = df.model == "" # False values
|
@@ -105,7 +67,7 @@ def filter_dataframe(df, searched_model_names):
|
|
105 |
filter_series = filter_series | df.model.str.lower().str.contains(n)
|
106 |
return df[filter_series]
|
107 |
|
108 |
-
def
|
109 |
global collapse_languages, collapse_output_method, searched_query, summary_df, time_of_day_comparison_df, model_costs_df
|
110 |
|
111 |
summary_df_columns = summary_df.columns.to_list()
|
@@ -124,7 +86,7 @@ def dataframes():
|
|
124 |
searched_model_names = [n for n in searched_model_names if n]
|
125 |
|
126 |
def for_dataframe(df):
|
127 |
-
return dataframe_style(
|
128 |
|
129 |
return (
|
130 |
for_dataframe(summary_df_processed),
|
@@ -155,22 +117,26 @@ def dataframe_style(df: pd.DataFrame):
|
|
155 |
df = df.style.format(column_formats, na_rep="")
|
156 |
return df
|
157 |
|
158 |
-
|
159 |
def snake_case_to_title(text):
|
160 |
# Convert snake_case to title-case
|
161 |
words = re.split(r"_", text)
|
162 |
title_words = [word.capitalize() for word in words]
|
163 |
return " ".join(title_words)
|
164 |
|
165 |
-
|
166 |
-
filter_textbox = gr.Textbox(label="Model name parts *", scale=2)
|
167 |
-
filter_button = gr.Button("Filter", scale=1)
|
168 |
-
collapse_languages_button = gr.Button("Collapse languages")
|
169 |
-
collapse_output_method_button = gr.Button("Collapse output method")
|
170 |
-
last_textbox = 0
|
171 |
plots = []
|
172 |
-
single_model_plots = []
|
173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
|
175 |
def filter_plots(searched_query: str):
|
176 |
searched_model_names = searched_query.split("|")
|
@@ -183,8 +149,12 @@ def filter_plots(searched_query: str):
|
|
183 |
if "df" in row and pd.notna(row["df"]):
|
184 |
buffer = io.StringIO(row["df"])
|
185 |
df = pd.read_csv(buffer)
|
186 |
-
df =
|
187 |
-
|
|
|
|
|
|
|
|
|
188 |
plot.update_layout(autosize=True)
|
189 |
elif "for model" in row["header"] and searched_model_names:
|
190 |
plot_model = row["header"].split("for model")[1].lower()
|
@@ -195,22 +165,16 @@ def filter_plots(searched_query: str):
|
|
195 |
|
196 |
return results
|
197 |
|
198 |
-
|
199 |
-
def display_plot(plot_df_row):
|
200 |
-
row = dict(plot_df_row)
|
201 |
-
plot = plotly.io.from_json(row["plot_json"])
|
202 |
-
plot.update_layout(autosize=True)
|
203 |
-
plots.append((gr.Plot(plot, label=row["header"], scale=1), plot, row))
|
204 |
-
if "description" in row and pd.notna(row["description"]):
|
205 |
-
gr.Markdown(str(row["description"]))
|
206 |
-
|
207 |
-
|
208 |
-
with gr.Blocks(theme=gr.themes.Default(text_size="lg")) as demo:
|
209 |
gr.HTML("<h1>Performance LLM Board</h1>")
|
210 |
|
211 |
with gr.Row():
|
212 |
-
filter_textbox.
|
213 |
-
filter_button.
|
|
|
|
|
|
|
|
|
214 |
gr.Markdown(
|
215 |
' \* You can use `|` operator to display multiple models at once, for example "gpt|mistral|zephyr"'
|
216 |
)
|
@@ -223,9 +187,9 @@ with gr.Blocks(theme=gr.themes.Default(text_size="lg")) as demo:
|
|
223 |
)
|
224 |
with gr.Tab("Performance by time of the day"):
|
225 |
# display only first plot for all models
|
226 |
-
time_of_day_plots[0:1].apply(
|
227 |
time_periods_explanation_ui = gr.DataFrame(
|
228 |
-
dataframe_style(
|
229 |
)
|
230 |
time_of_day_comparison_ui = gr.DataFrame(dataframe_style(time_of_day_comparison_df), label="Time of day")
|
231 |
gr.Markdown(
|
@@ -240,11 +204,11 @@ Measurements were made during a normal work week.
|
|
240 |
"""
|
241 |
)
|
242 |
# display rest of the plots
|
243 |
-
time_of_day_plots[1:].apply(
|
244 |
with gr.Tab("Output characteristics"):
|
245 |
with gr.Row():
|
246 |
-
collapse_languages_button.
|
247 |
-
collapse_output_method_button.
|
248 |
summary_ui = gr.DataFrame(dataframe_style(summary_df), label="Output characteristics")
|
249 |
gr.Markdown(
|
250 |
"""\
|
@@ -256,7 +220,7 @@ To count words we split the output string by whitespace `\w` regex character.
|
|
256 |
|
257 |
Chunk sizes are measured in the characters count."""
|
258 |
)
|
259 |
-
output_plots.apply(
|
260 |
with gr.Tab("Costs comparison"):
|
261 |
models_costs_ui = gr.DataFrame(dataframe_style(model_costs_df), label="Costs comparison")
|
262 |
gr.Markdown(
|
@@ -269,9 +233,11 @@ for models hosted this way we calculated "Cost Per Token" column using data coll
|
|
269 |
Note that pause and resume time cost was not included in the "Cost Per Token" column calculation.
|
270 |
"""
|
271 |
)
|
272 |
-
general_plots[general_plots.plot_name == "execution_costs"].apply(
|
|
|
|
|
273 |
with gr.Tab("Context length and parameters count"):
|
274 |
-
general_plots[general_plots.plot_name != "execution_costs"].apply(
|
275 |
gr.Markdown(
|
276 |
"""
|
277 |
LLM models context length and parameters count are based on release blogs and documentation of their respective developers.
|
@@ -281,6 +247,40 @@ A lot of models had to be omitted due to their developers not disclosing their p
|
|
281 |
Mainly OpenAI's GPT models and Google's Palm 2.
|
282 |
"""
|
283 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
filter_button.click(
|
285 |
fn=filter_dataframes,
|
286 |
inputs=filter_textbox,
|
|
|
9 |
from pandas.api.types import is_numeric_dtype
|
10 |
|
11 |
from pipeline.config import LLMBoardConfig, QueriesConfig
|
12 |
+
from app_constants import README, JS, TIME_PERIODS_EXPLANATION_DF
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
queries_config = QueriesConfig()
|
15 |
|
|
|
22 |
general_plots = pd.read_csv("data/general_plots.csv")
|
23 |
model_costs_df = pd.read_csv("data/model_costs.csv")
|
24 |
time_of_day_plots = pd.read_csv("data/time_of_day_plots.csv")
|
25 |
+
summary_metrics_plots = pd.read_csv("data/summary_metrics_plots.csv")
|
26 |
output_plots = pd.read_csv("data/output_plots.csv")
|
27 |
+
combined_plots = pd.read_csv("data/combined_plots.csv")
|
28 |
|
29 |
searched_query = ""
|
30 |
collapse_languages = False
|
|
|
35 |
global searched_query
|
36 |
input = input.lower()
|
37 |
searched_query = input
|
38 |
+
return get_updated_dataframes()
|
39 |
|
40 |
|
41 |
def collapse_languages_toggle():
|
|
|
46 |
else:
|
47 |
collapse_languages = True
|
48 |
button_text = "Un-collapse languages"
|
49 |
+
return get_updated_dataframes()[0], button_text
|
50 |
|
51 |
|
52 |
def collapse_output_method_toggle():
|
|
|
57 |
else:
|
58 |
collapse_output_method = True
|
59 |
button_text = "Un-collapse output method"
|
60 |
+
return get_updated_dataframes()[0], button_text
|
61 |
|
62 |
+
def filter_dataframe_by_models(df, searched_model_names):
|
63 |
if not searched_model_names:
|
64 |
return df
|
65 |
filter_series = df.model == "" # False values
|
|
|
67 |
filter_series = filter_series | df.model.str.lower().str.contains(n)
|
68 |
return df[filter_series]
|
69 |
|
70 |
+
def get_updated_dataframes():
|
71 |
global collapse_languages, collapse_output_method, searched_query, summary_df, time_of_day_comparison_df, model_costs_df
|
72 |
|
73 |
summary_df_columns = summary_df.columns.to_list()
|
|
|
86 |
searched_model_names = [n for n in searched_model_names if n]
|
87 |
|
88 |
def for_dataframe(df):
|
89 |
+
return dataframe_style(filter_dataframe_by_models(df, searched_model_names))
|
90 |
|
91 |
return (
|
92 |
for_dataframe(summary_df_processed),
|
|
|
117 |
df = df.style.format(column_formats, na_rep="")
|
118 |
return df
|
119 |
|
|
|
120 |
def snake_case_to_title(text):
|
121 |
# Convert snake_case to title-case
|
122 |
words = re.split(r"_", text)
|
123 |
title_words = [word.capitalize() for word in words]
|
124 |
return " ".join(title_words)
|
125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
plots = []
|
|
|
127 |
|
128 |
+
def display_plot(plot_df_row):
|
129 |
+
row = dict(plot_df_row)
|
130 |
+
plot = plotly.io.from_json(row["plot_json"])
|
131 |
+
plot.update_layout(autosize=True)
|
132 |
+
return (gr.Plot(plot, label=row["header"], scale=1), plot)
|
133 |
+
|
134 |
+
def display_filtered_plot(plot_df_row):
|
135 |
+
row = dict(plot_df_row)
|
136 |
+
plot_element, plot = display_plot(plot_df_row)
|
137 |
+
plots.append((plot_element, plot, row))
|
138 |
+
if "description" in row and pd.notna(row["description"]):
|
139 |
+
gr.Markdown(str(row["description"]))
|
140 |
|
141 |
def filter_plots(searched_query: str):
|
142 |
searched_model_names = searched_query.split("|")
|
|
|
149 |
if "df" in row and pd.notna(row["df"]):
|
150 |
buffer = io.StringIO(row["df"])
|
151 |
df = pd.read_csv(buffer)
|
152 |
+
df = filter_dataframe_by_models(df, searched_model_names)
|
153 |
+
plot_constructor = px.bar
|
154 |
+
if "plot_type" in row and pd.notna(row["plot_type"]) and row["plot_type"]:
|
155 |
+
if row["plot_type"] == "scatter":
|
156 |
+
plot_constructor = px.scatter
|
157 |
+
plot = plot_constructor(df, **json.loads(row["arguments"]))
|
158 |
plot.update_layout(autosize=True)
|
159 |
elif "for model" in row["header"] and searched_model_names:
|
160 |
plot_model = row["header"].split("for model")[1].lower()
|
|
|
165 |
|
166 |
return results
|
167 |
|
168 |
+
with gr.Blocks(theme=gr.themes.Default(text_size="lg"), js=JS) as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
gr.HTML("<h1>Performance LLM Board</h1>")
|
170 |
|
171 |
with gr.Row():
|
172 |
+
filter_textbox = gr.Textbox(label="Model name parts *", scale=2, elem_id="filter-textbox")
|
173 |
+
filter_button = gr.Button("Filter", scale=1, elem_id="filter-button")
|
174 |
+
with gr.Column(scale=1):
|
175 |
+
open_ai_button = gr.Button("Compare Open AI models", elem_id="open-ai-button", scale=1)
|
176 |
+
google_button = gr.Button("Compare Google Models", elem_id="google-button", scale=1)
|
177 |
+
# gr.Button("Open Models", size="sm")
|
178 |
gr.Markdown(
|
179 |
' \* You can use `|` operator to display multiple models at once, for example "gpt|mistral|zephyr"'
|
180 |
)
|
|
|
187 |
)
|
188 |
with gr.Tab("Performance by time of the day"):
|
189 |
# display only first plot for all models
|
190 |
+
time_of_day_plots[0:1].apply(display_filtered_plot, axis=1)
|
191 |
time_periods_explanation_ui = gr.DataFrame(
|
192 |
+
dataframe_style(TIME_PERIODS_EXPLANATION_DF), label="Times of day ranges"
|
193 |
)
|
194 |
time_of_day_comparison_ui = gr.DataFrame(dataframe_style(time_of_day_comparison_df), label="Time of day")
|
195 |
gr.Markdown(
|
|
|
204 |
"""
|
205 |
)
|
206 |
# display rest of the plots
|
207 |
+
time_of_day_plots[1:].apply(display_filtered_plot, axis=1)
|
208 |
with gr.Tab("Output characteristics"):
|
209 |
with gr.Row():
|
210 |
+
collapse_languages_button = gr.Button("Collapse languages")
|
211 |
+
collapse_output_method_button = gr.Button("Collapse output method")
|
212 |
summary_ui = gr.DataFrame(dataframe_style(summary_df), label="Output characteristics")
|
213 |
gr.Markdown(
|
214 |
"""\
|
|
|
220 |
|
221 |
Chunk sizes are measured in the characters count."""
|
222 |
)
|
223 |
+
output_plots.apply(display_filtered_plot, axis=1)
|
224 |
with gr.Tab("Costs comparison"):
|
225 |
models_costs_ui = gr.DataFrame(dataframe_style(model_costs_df), label="Costs comparison")
|
226 |
gr.Markdown(
|
|
|
233 |
Note that pause and resume time cost was not included in the "Cost Per Token" column calculation.
|
234 |
"""
|
235 |
)
|
236 |
+
general_plots[general_plots.plot_name == "execution_costs"].apply(display_filtered_plot, axis=1)
|
237 |
+
with gr.Tab("Summary metrics"):
|
238 |
+
summary_metrics_plots.apply(display_filtered_plot, axis=1)
|
239 |
with gr.Tab("Context length and parameters count"):
|
240 |
+
general_plots[general_plots.plot_name != "execution_costs"].apply(display_filtered_plot, axis=1)
|
241 |
gr.Markdown(
|
242 |
"""
|
243 |
LLM models context length and parameters count are based on release blogs and documentation of their respective developers.
|
|
|
247 |
Mainly OpenAI's GPT models and Google's Palm 2.
|
248 |
"""
|
249 |
)
|
250 |
+
with gr.Tab("Combined plots"):
|
251 |
+
with gr.Row():
|
252 |
+
choices = combined_plots.header
|
253 |
+
choices = choices[choices.str.contains("for model")]
|
254 |
+
choices = choices.str.split("for model").apply(lambda x: x[1])
|
255 |
+
def handle_dropdown(dropdown, plot_element):
|
256 |
+
def dropdown_change_handler(value):
|
257 |
+
for _, row in combined_plots.iterrows():
|
258 |
+
if value in row["header"]:
|
259 |
+
return display_plot(row)[0]
|
260 |
+
dropdown.change(
|
261 |
+
fn=dropdown_change_handler,
|
262 |
+
inputs=[dropdown],
|
263 |
+
outputs=[plot_element],
|
264 |
+
api_name="dropdown_change_handler",
|
265 |
+
)
|
266 |
+
with gr.Column():
|
267 |
+
dropdown = gr.Dropdown(choices.tolist(), label="First model for comparison", value=choices.iloc[0])
|
268 |
+
plot_element, plot = display_plot(combined_plots.iloc[0])
|
269 |
+
handle_dropdown(dropdown, plot_element)
|
270 |
+
with gr.Column():
|
271 |
+
dropdown = gr.Dropdown(choices.tolist(), label="Second model for comparison", value=choices.iloc[1])
|
272 |
+
plot_element, plot = display_plot(combined_plots.iloc[1])
|
273 |
+
handle_dropdown(dropdown, plot_element)
|
274 |
+
gr.Markdown("""
|
275 |
+
Radial plots are used to compare the most important aspects of each model researched on this board using single images.
|
276 |
+
|
277 |
+
All values are normalized and scaled into 0.25 to 1 range, 0 is left for unknown values.
|
278 |
+
|
279 |
+
To compare the parameters more thoroughly use the filtering box on top of this page and inspect individual tabs.
|
280 |
+
|
281 |
+
In addition to side by side comparison all of the radial plots are displayed below.
|
282 |
+
""")
|
283 |
+
combined_plots.apply(display_filtered_plot, axis=1)
|
284 |
filter_button.click(
|
285 |
fn=filter_dataframes,
|
286 |
inputs=filter_textbox,
|
app_constants.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
README = """
|
4 |
+
This project compares different large language models and their providers for real time applications and mass data processing.
|
5 |
+
While other benchmarks compare LLMs on different human intelligence tasks this benchmark focus on features related to business and engineering aspects such as response times, pricing and data streaming capabilities.
|
6 |
+
|
7 |
+
To preform evaluation we chose a task of newspaper articles summarization from [GEM/xlsum](https://huggingface.co/datasets/GEM/xlsum) dataset as it represents a very standard type of task where model has to understand unstructured natural language text, process it and output text in a specified format.
|
8 |
+
For this version we chose English and Japanese languages, with Japanese representing languages using logographic alphabets. This enable us also validate the effectiveness of the LLM for different language groups.
|
9 |
+
|
10 |
+
Each of the models was asked to summarize the text using the following prompt:
|
11 |
+
|
12 |
+
```
|
13 |
+
{}
|
14 |
+
```
|
15 |
+
|
16 |
+
Where {{language}} stands for original language of the text as we wanted to avoid the model translating the text to English during summarization.
|
17 |
+
|
18 |
+
LLM was asked to return the output in three formats: markdown, json and function call. Note that currently function calls are only supported by Open AI API.
|
19 |
+
To do that we added following text to the query:
|
20 |
+
|
21 |
+
{}
|
22 |
+
|
23 |
+
All of the call were made from the same machine with the same internet connection with usage of the LiteLLM library which may adds some time overhead compared to pure curl calls. Call were made from Poland, UTC +1.
|
24 |
+
|
25 |
+
Please take a look at the following project and let us know if you have any questions or suggestions.
|
26 |
+
"""
|
27 |
+
|
28 |
+
JS = """
|
29 |
+
function test() {
|
30 |
+
var google_button = document.querySelector('#google-button')
|
31 |
+
var open_ai_button = document.querySelector('#open-ai-button')
|
32 |
+
var filter_textbox = document.querySelector('#filter-textbox textarea')
|
33 |
+
var filter_button = document.querySelector('#filter-button')
|
34 |
+
|
35 |
+
console.log(google_button, filter_textbox, filter_button)
|
36 |
+
function for_button(button, search_query) {
|
37 |
+
button.onclick = function() {
|
38 |
+
filter_textbox.value = search_query
|
39 |
+
|
40 |
+
var input_event = new InputEvent('input', {
|
41 |
+
bubbles: true,
|
42 |
+
cancelable: true,
|
43 |
+
composed: true
|
44 |
+
})
|
45 |
+
filter_textbox.dispatchEvent(input_event);
|
46 |
+
setTimeout(
|
47 |
+
()=>filter_button.click(),
|
48 |
+
1000
|
49 |
+
)
|
50 |
+
}
|
51 |
+
}
|
52 |
+
for_button(google_button, "gemini-pro | PaLM 2")
|
53 |
+
for_button(open_ai_button, "gpt-4 | gpt-4-turbo | gpt-3.5-turbo")
|
54 |
+
}
|
55 |
+
"""
|
56 |
+
|
57 |
+
TIME_PERIODS_EXPLANATION_DF = pd.DataFrame(
|
58 |
+
{
|
59 |
+
"time_of_day": [
|
60 |
+
"early morning",
|
61 |
+
"morning",
|
62 |
+
"afternoon",
|
63 |
+
"late afternoon",
|
64 |
+
"evening",
|
65 |
+
"late evening",
|
66 |
+
"midnight",
|
67 |
+
"night",
|
68 |
+
],
|
69 |
+
"hour_range": ["6-8", "9-11", "12-14", "15-17", "18-20", "21-23", "0-2", "3-5"],
|
70 |
+
}
|
71 |
+
)
|
data/combined_plots.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/general_plots.csv
CHANGED
@@ -10,7 +10,7 @@ execution_costs,./html/plots/execution_costs.html,"Figure({
|
|
10 |
'showlegend': True,
|
11 |
'textposition': 'auto',
|
12 |
'type': 'bar',
|
13 |
-
'x': array([
|
14 |
'xaxis': 'x',
|
15 |
'y': array(['gpt-4'], dtype=object),
|
16 |
'yaxis': 'y'},
|
@@ -24,261 +24,303 @@ execution_costs,./html/plots/execution_costs.html,"Figure({
|
|
24 |
'showlegend': True,
|
25 |
'textposition': 'auto',
|
26 |
'type': 'bar',
|
27 |
-
'x': array([
|
28 |
'xaxis': 'x',
|
29 |
'y': array(['gpt-4-turbo'], dtype=object),
|
30 |
'yaxis': 'y'},
|
31 |
{'alignmentgroup': 'True',
|
32 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
33 |
-
'legendgroup': '
|
34 |
'marker': {'color': '#00cc96', 'pattern': {'shape': ''}},
|
35 |
-
'name': '
|
36 |
-
'offsetgroup': '
|
37 |
'orientation': 'h',
|
38 |
'showlegend': True,
|
39 |
'textposition': 'auto',
|
40 |
'type': 'bar',
|
41 |
-
'x': array([
|
42 |
'xaxis': 'x',
|
43 |
-
'y': array(['
|
44 |
'yaxis': 'y'},
|
45 |
{'alignmentgroup': 'True',
|
46 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
47 |
-
'legendgroup': '
|
48 |
'marker': {'color': '#ab63fa', 'pattern': {'shape': ''}},
|
49 |
-
'name': '
|
50 |
-
'offsetgroup': '
|
51 |
'orientation': 'h',
|
52 |
'showlegend': True,
|
53 |
'textposition': 'auto',
|
54 |
'type': 'bar',
|
55 |
-
'x': array([0.
|
56 |
'xaxis': 'x',
|
57 |
-
'y': array(['
|
58 |
'yaxis': 'y'},
|
59 |
{'alignmentgroup': 'True',
|
60 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
61 |
-
'legendgroup': '
|
62 |
'marker': {'color': '#FFA15A', 'pattern': {'shape': ''}},
|
63 |
-
'name': '
|
64 |
-
'offsetgroup': '
|
65 |
'orientation': 'h',
|
66 |
'showlegend': True,
|
67 |
'textposition': 'auto',
|
68 |
'type': 'bar',
|
69 |
-
'x': array([0.
|
70 |
'xaxis': 'x',
|
71 |
-
'y': array(['
|
72 |
'yaxis': 'y'},
|
73 |
{'alignmentgroup': 'True',
|
74 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
75 |
-
'legendgroup': '
|
76 |
'marker': {'color': '#19d3f3', 'pattern': {'shape': ''}},
|
77 |
-
'name': '
|
78 |
-
'offsetgroup': '
|
79 |
'orientation': 'h',
|
80 |
'showlegend': True,
|
81 |
'textposition': 'auto',
|
82 |
'type': 'bar',
|
83 |
-
'x': array([0.
|
84 |
'xaxis': 'x',
|
85 |
-
'y': array(['
|
86 |
'yaxis': 'y'},
|
87 |
{'alignmentgroup': 'True',
|
88 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
89 |
-
'legendgroup': '
|
90 |
'marker': {'color': '#FF6692', 'pattern': {'shape': ''}},
|
91 |
-
'name': '
|
92 |
-
'offsetgroup': '
|
93 |
'orientation': 'h',
|
94 |
'showlegend': True,
|
95 |
'textposition': 'auto',
|
96 |
'type': 'bar',
|
97 |
-
'x': array([0.
|
98 |
'xaxis': 'x',
|
99 |
-
'y': array(['
|
100 |
'yaxis': 'y'},
|
101 |
{'alignmentgroup': 'True',
|
102 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
103 |
-
'legendgroup': '
|
104 |
'marker': {'color': '#B6E880', 'pattern': {'shape': ''}},
|
105 |
-
'name': '
|
106 |
-
'offsetgroup': '
|
107 |
'orientation': 'h',
|
108 |
'showlegend': True,
|
109 |
'textposition': 'auto',
|
110 |
'type': 'bar',
|
111 |
-
'x': array([0.
|
112 |
'xaxis': 'x',
|
113 |
-
'y': array(['
|
114 |
'yaxis': 'y'},
|
115 |
{'alignmentgroup': 'True',
|
116 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
117 |
-
'legendgroup': '
|
118 |
'marker': {'color': '#FF97FF', 'pattern': {'shape': ''}},
|
119 |
-
'name': '
|
120 |
-
'offsetgroup': '
|
121 |
'orientation': 'h',
|
122 |
'showlegend': True,
|
123 |
'textposition': 'auto',
|
124 |
'type': 'bar',
|
125 |
-
'x': array([0.
|
126 |
'xaxis': 'x',
|
127 |
-
'y': array(['
|
128 |
'yaxis': 'y'},
|
129 |
{'alignmentgroup': 'True',
|
130 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
131 |
-
'legendgroup': '
|
132 |
'marker': {'color': '#FECB52', 'pattern': {'shape': ''}},
|
133 |
-
'name': '
|
134 |
-
'offsetgroup': '
|
135 |
'orientation': 'h',
|
136 |
'showlegend': True,
|
137 |
'textposition': 'auto',
|
138 |
'type': 'bar',
|
139 |
-
'x': array([0.
|
140 |
'xaxis': 'x',
|
141 |
-
'y': array(['
|
142 |
'yaxis': 'y'},
|
143 |
{'alignmentgroup': 'True',
|
144 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
145 |
-
'legendgroup': '
|
146 |
'marker': {'color': '#636efa', 'pattern': {'shape': ''}},
|
147 |
-
'name': '
|
148 |
-
'offsetgroup': '
|
149 |
'orientation': 'h',
|
150 |
'showlegend': True,
|
151 |
'textposition': 'auto',
|
152 |
'type': 'bar',
|
153 |
-
'x': array([0.
|
154 |
'xaxis': 'x',
|
155 |
-
'y': array(['
|
156 |
'yaxis': 'y'},
|
157 |
{'alignmentgroup': 'True',
|
158 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
159 |
-
'legendgroup': '
|
160 |
'marker': {'color': '#EF553B', 'pattern': {'shape': ''}},
|
161 |
-
'name': '
|
162 |
-
'offsetgroup': '
|
163 |
'orientation': 'h',
|
164 |
'showlegend': True,
|
165 |
'textposition': 'auto',
|
166 |
'type': 'bar',
|
167 |
-
'x': array([0.
|
168 |
'xaxis': 'x',
|
169 |
-
'y': array(['
|
170 |
'yaxis': 'y'},
|
171 |
{'alignmentgroup': 'True',
|
172 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
173 |
-
'legendgroup': '
|
174 |
'marker': {'color': '#00cc96', 'pattern': {'shape': ''}},
|
175 |
-
'name': '
|
176 |
-
'offsetgroup': '
|
177 |
'orientation': 'h',
|
178 |
'showlegend': True,
|
179 |
'textposition': 'auto',
|
180 |
'type': 'bar',
|
181 |
-
'x': array([0.
|
182 |
'xaxis': 'x',
|
183 |
-
'y': array(['
|
184 |
'yaxis': 'y'},
|
185 |
{'alignmentgroup': 'True',
|
186 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
187 |
-
'legendgroup': '
|
188 |
'marker': {'color': '#ab63fa', 'pattern': {'shape': ''}},
|
189 |
-
'name': '
|
190 |
-
'offsetgroup': '
|
191 |
'orientation': 'h',
|
192 |
'showlegend': True,
|
193 |
'textposition': 'auto',
|
194 |
'type': 'bar',
|
195 |
-
'x': array([0.
|
196 |
'xaxis': 'x',
|
197 |
-
'y': array(['
|
198 |
'yaxis': 'y'},
|
199 |
{'alignmentgroup': 'True',
|
200 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
201 |
-
'legendgroup': '
|
202 |
'marker': {'color': '#FFA15A', 'pattern': {'shape': ''}},
|
203 |
-
'name': '
|
204 |
-
'offsetgroup': '
|
205 |
'orientation': 'h',
|
206 |
'showlegend': True,
|
207 |
'textposition': 'auto',
|
208 |
'type': 'bar',
|
209 |
-
'x': array([0.
|
210 |
'xaxis': 'x',
|
211 |
-
'y': array(['
|
212 |
'yaxis': 'y'},
|
213 |
{'alignmentgroup': 'True',
|
214 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
215 |
-
'legendgroup': '
|
216 |
'marker': {'color': '#19d3f3', 'pattern': {'shape': ''}},
|
217 |
-
'name': '
|
218 |
-
'offsetgroup': '
|
219 |
'orientation': 'h',
|
220 |
'showlegend': True,
|
221 |
'textposition': 'auto',
|
222 |
'type': 'bar',
|
223 |
-
'x': array([0.
|
224 |
'xaxis': 'x',
|
225 |
-
'y': array(['
|
226 |
'yaxis': 'y'},
|
227 |
{'alignmentgroup': 'True',
|
228 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
229 |
-
'legendgroup': '
|
230 |
'marker': {'color': '#FF6692', 'pattern': {'shape': ''}},
|
231 |
-
'name': '
|
232 |
-
'offsetgroup': '
|
233 |
'orientation': 'h',
|
234 |
'showlegend': True,
|
235 |
'textposition': 'auto',
|
236 |
'type': 'bar',
|
237 |
-
'x': array([0.
|
238 |
'xaxis': 'x',
|
239 |
-
'y': array(['
|
240 |
'yaxis': 'y'},
|
241 |
{'alignmentgroup': 'True',
|
242 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
243 |
-
'legendgroup': '
|
244 |
'marker': {'color': '#B6E880', 'pattern': {'shape': ''}},
|
245 |
-
'name': '
|
246 |
-
'offsetgroup': '
|
247 |
'orientation': 'h',
|
248 |
'showlegend': True,
|
249 |
'textposition': 'auto',
|
250 |
'type': 'bar',
|
251 |
-
'x': array([0.
|
252 |
'xaxis': 'x',
|
253 |
-
'y': array(['
|
254 |
'yaxis': 'y'},
|
255 |
{'alignmentgroup': 'True',
|
256 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
257 |
-
'legendgroup': '
|
258 |
'marker': {'color': '#FF97FF', 'pattern': {'shape': ''}},
|
259 |
-
'name': '
|
260 |
-
'offsetgroup': '
|
261 |
'orientation': 'h',
|
262 |
'showlegend': True,
|
263 |
'textposition': 'auto',
|
264 |
'type': 'bar',
|
265 |
-
'x': array([0.
|
266 |
'xaxis': 'x',
|
267 |
-
'y': array(['
|
268 |
'yaxis': 'y'},
|
269 |
{'alignmentgroup': 'True',
|
270 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
271 |
-
'legendgroup': '
|
272 |
'marker': {'color': '#FECB52', 'pattern': {'shape': ''}},
|
273 |
-
'name': '
|
274 |
-
'offsetgroup': '
|
275 |
'orientation': 'h',
|
276 |
'showlegend': True,
|
277 |
'textposition': 'auto',
|
278 |
'type': 'bar',
|
279 |
-
'x': array([0.
|
280 |
'xaxis': 'x',
|
281 |
-
'y': array(['
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
282 |
'yaxis': 'y'}],
|
283 |
'layout': {'barmode': 'relative',
|
284 |
'legend': {'title': {'text': 'Model'}, 'tracegroupgap': 0},
|
@@ -286,43 +328,48 @@ execution_costs,./html/plots/execution_costs.html,"Figure({
|
|
286 |
'title': {'text': 'Costs of execution of 6660 test queries per model'},
|
287 |
'xaxis': {'anchor': 'y', 'domain': [0.0, 1.0], 'title': {'text': 'Execution cost ($)'}},
|
288 |
'yaxis': {'anchor': 'x',
|
289 |
-
'categoryarray': [
|
290 |
-
|
291 |
-
|
292 |
-
(
|
293 |
-
Instruct
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
(
|
299 |
-
|
300 |
-
llama-2-70b-chat,
|
301 |
-
|
|
|
|
|
302 |
'categoryorder': 'array',
|
303 |
'domain': [0.0, 1.0],
|
304 |
'title': {'text': 'Model'}}}
|
305 |
-
})",Costs of execution of 6660 test queries per model,,"{""data"":[{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gpt-4"",""marker"":{""color"":""#636efa"",""pattern"":{""shape"":""""}},""name"":""gpt-4"",""offsetgroup"":""gpt-4"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[45.870000000000005],""xaxis"":""x"",""y"":[""gpt-4""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gpt-4-turbo"",""marker"":{""color"":""#EF553B"",""pattern"":{""shape"":""""}},""name"":""gpt-4-turbo"",""offsetgroup"":""gpt-4-turbo"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[19.2168],""xaxis"":""x"",""y"":[""gpt-4-turbo""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gpt-3.5-turbo"",""marker"":{""color"":""#00cc96"",""pattern"":{""shape"":""""}},""name"":""gpt-3.5-turbo"",""offsetgroup"":""gpt-3.5-turbo"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[1.75176],""xaxis"":""x"",""y"":[""gpt-3.5-turbo""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""llama-2-70b-chat"",""marker"":{""color"":""#ab63fa"",""pattern"":{""shape"":""""}},""name"":""llama-2-70b-chat"",""offsetgroup"":""llama-2-70b-chat"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.65934],""xaxis"":""x"",""y"":[""llama-2-70b-chat""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Mixtral-8x7B-Instruct-v0.1"",""marker"":{""color"":""#FFA15A"",""pattern"":{""shape"":""""}},""name"":""Mixtral-8x7B-Instruct-v0.1"",""offsetgroup"":""Mixtral-8x7B-Instruct-v0.1"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.65934],""xaxis"":""x"",""y"":[""Mixtral-8x7B-Instruct-v0.1""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""01-ai Yi Chat (34B)"",""marker"":{""color"":""#19d3f3"",""pattern"":{""shape"":""""}},""name"":""01-ai Yi Chat (34B)"",""offsetgroup"":""01-ai Yi Chat (34B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.5818400000000001],""xaxis"":""x"",""y"":[""01-ai Yi Chat (34B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Snorkel Mistral PairRM DPO (7B)"",""marker"":{""color"":""#FF6692"",""pattern"":{""shape"":""""}},""name"":""Snorkel Mistral PairRM DPO (7B)"",""offsetgroup"":""Snorkel Mistral PairRM DPO (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.334256],""xaxis"":""x"",""y"":[""Snorkel Mistral PairRM DPO (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Chronos Hermes (13B)"",""marker"":{""color"":""#B6E880"",""pattern"":{""shape"":""""}},""name"":""Chronos Hermes (13B)"",""offsetgroup"":""Chronos Hermes (13B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.27396],""xaxis"":""x"",""y"":[""Chronos Hermes (13B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""WizardLM v1.2 (13B)"",""marker"":{""color"":""#FF97FF"",""pattern"":{""shape"":""""}},""name"":""WizardLM v1.2 (13B)"",""offsetgroup"":""WizardLM v1.2 (13B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.21207],""xaxis"":""x"",""y"":[""WizardLM v1.2 (13B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gemini-pro"",""marker"":{""color"":""#FECB52"",""pattern"":{""shape"":""""}},""name"":""gemini-pro"",""offsetgroup"":""gemini-pro"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.18315],""xaxis"":""x"",""y"":[""gemini-pro""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""chat-bison (PaLM 2)"",""marker"":{""color"":""#636efa"",""pattern"":{""shape"":""""}},""name"":""chat-bison (PaLM 2)"",""offsetgroup"":""chat-bison (PaLM 2)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.18315],""xaxis"":""x"",""y"":[""chat-bison (PaLM 2)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""chat-bison-32k (PaLM 2 32K)"",""marker"":{""color"":""#EF553B"",""pattern"":{""shape"":""""}},""name"":""chat-bison-32k (PaLM 2 32K)"",""offsetgroup"":""chat-bison-32k (PaLM 2 32K)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.18315],""xaxis"":""x"",""y"":[""chat-bison-32k (PaLM 2 32K)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Upstage SOLAR Instruct v1 (11B)"",""marker"":{""color"":""#00cc96"",""pattern"":{""shape"":""""}},""name"":""Upstage SOLAR Instruct v1 (11B)"",""offsetgroup"":""Upstage SOLAR Instruct v1 (11B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.180288],""xaxis"":""x"",""y"":[""Upstage SOLAR Instruct v1 (11B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Mistral (7B) Instruct v0.2 (Together AI)"",""marker"":{""color"":""#ab63fa"",""pattern"":{""shape"":""""}},""name"":""Mistral (7B) Instruct v0.2 (Together AI)"",""offsetgroup"":""Mistral (7B) Instruct v0.2 (Together AI)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.16515400000000002],""xaxis"":""x"",""y"":[""Mistral (7B) Instruct v0.2 (Together AI)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""LLaMA-2 Chat (7B)"",""marker"":{""color"":""#FFA15A"",""pattern"":{""shape"":""""}},""name"":""LLaMA-2 Chat (7B)"",""offsetgroup"":""LLaMA-2 Chat (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.16329600000000002],""xaxis"":""x"",""y"":[""LLaMA-2 Chat (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""OpenHermes-2.5-Mistral (7B)"",""marker"":{""color"":""#19d3f3"",""pattern"":{""shape"":""""}},""name"":""OpenHermes-2.5-Mistral (7B)"",""offsetgroup"":""OpenHermes-2.5-Mistral (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.14182000000000003],""xaxis"":""x"",""y"":[""OpenHermes-2.5-Mistral (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Qwen 1.5 Chat (7B)"",""marker"":{""color"":""#FF6692"",""pattern"":{""shape"":""""}},""name"":""Qwen 1.5 Chat (7B)"",""offsetgroup"":""Qwen 1.5 Chat (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.13759200000000002],""xaxis"":""x"",""y"":[""Qwen 1.5 Chat (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Vicuna v1.5 (7B)"",""marker"":{""color"":""#B6E880"",""pattern"":{""shape"":""""}},""name"":""Vicuna v1.5 (7B)"",""offsetgroup"":""Vicuna v1.5 (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.12588],""xaxis"":""x"",""y"":[""Vicuna v1.5 (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Falcon Instruct (7B)"",""marker"":{""color"":""#FF97FF"",""pattern"":{""shape"":""""}},""name"":""Falcon Instruct (7B)"",""offsetgroup"":""Falcon Instruct (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.12476800000000002],""xaxis"":""x"",""y"":[""Falcon Instruct (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""RedPajama-INCITE Chat (7B)"",""marker"":{""color"":""#FECB52"",""pattern"":{""shape"":""""}},""name"":""RedPajama-INCITE Chat (7B)"",""offsetgroup"":""RedPajama-INCITE Chat (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.12342400000000002],""xaxis"":""x"",""y"":[""RedPajama-INCITE Chat (7B)""],""yaxis"":""y"",""type"":""bar""}],""layout"":{""template"":{""data"":{""histogram2dcontour"":[{""type"":""histogram2dcontour"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""choropleth"":[{""type"":""choropleth"",""colorbar"":{""outlinewidth"":0,""ticks"":""""}}],""histogram2d"":[{""type"":""histogram2d"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""heatmap"":[{""type"":""heatmap"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""heatmapgl"":[{""type"":""heatmapgl"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""contourcarpet"":[{""type"":""contourcarpet"",""colorbar"":{""outlinewidth"":0,""ticks"":""""}}],""contour"":[{""type"":""contour"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""surface"":[{""type"":""surface"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""mesh3d"":[{""type"":""mesh3d"",""colorbar"":{""outlinewidth"":0,""ticks"":""""}}],""scatter"":[{""fillpattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2},""type"":""scatter""}],""parcoords"":[{""type"":""parcoords"",""line"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatterpolargl"":[{""type"":""scatterpolargl"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""bar"":[{""error_x"":{""color"":""#2a3f5f""},""error_y"":{""color"":""#2a3f5f""},""marker"":{""line"":{""color"":""#E5ECF6"",""width"":0.5},""pattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2}},""type"":""bar""}],""scattergeo"":[{""type"":""scattergeo"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatterpolar"":[{""type"":""scatterpolar"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""histogram"":[{""marker"":{""pattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2}},""type"":""histogram""}],""scattergl"":[{""type"":""scattergl"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatter3d"":[{""type"":""scatter3d"",""line"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}},""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scattermapbox"":[{""type"":""scattermapbox"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatterternary"":[{""type"":""scatterternary"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scattercarpet"":[{""type"":""scattercarpet"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""carpet"":[{""aaxis"":{""endlinecolor"":""#2a3f5f"",""gridcolor"":""white"",""linecolor"":""white"",""minorgridcolor"":""white"",""startlinecolor"":""#2a3f5f""},""baxis"":{""endlinecolor"":""#2a3f5f"",""gridcolor"":""white"",""linecolor"":""white"",""minorgridcolor"":""white"",""startlinecolor"":""#2a3f5f""},""type"":""carpet""}],""table"":[{""cells"":{""fill"":{""color"":""#EBF0F8""},""line"":{""color"":""white""}},""header"":{""fill"":{""color"":""#C8D4E3""},""line"":{""color"":""white""}},""type"":""table""}],""barpolar"":[{""marker"":{""line"":{""color"":""#E5ECF6"",""width"":0.5},""pattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2}},""type"":""barpolar""}],""pie"":[{""automargin"":true,""type"":""pie""}]},""layout"":{""autotypenumbers"":""strict"",""colorway"":[""#636efa"",""#EF553B"",""#00cc96"",""#ab63fa"",""#FFA15A"",""#19d3f3"",""#FF6692"",""#B6E880"",""#FF97FF"",""#FECB52""],""font"":{""color"":""#2a3f5f""},""hovermode"":""closest"",""hoverlabel"":{""align"":""left""},""paper_bgcolor"":""white"",""plot_bgcolor"":""#E5ECF6"",""polar"":{""bgcolor"":""#E5ECF6"",""angularaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""},""radialaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""}},""ternary"":{""bgcolor"":""#E5ECF6"",""aaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""},""baxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""},""caxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""}},""coloraxis"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}},""colorscale"":{""sequential"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]],""sequentialminus"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]],""diverging"":[[0,""#8e0152""],[0.1,""#c51b7d""],[0.2,""#de77ae""],[0.3,""#f1b6da""],[0.4,""#fde0ef""],[0.5,""#f7f7f7""],[0.6,""#e6f5d0""],[0.7,""#b8e186""],[0.8,""#7fbc41""],[0.9,""#4d9221""],[1,""#276419""]]},""xaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":"""",""title"":{""standoff"":15},""zerolinecolor"":""white"",""automargin"":true,""zerolinewidth"":2},""yaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":"""",""title"":{""standoff"":15},""zerolinecolor"":""white"",""automargin"":true,""zerolinewidth"":2},""scene"":{""xaxis"":{""backgroundcolor"":""#E5ECF6"",""gridcolor"":""white"",""linecolor"":""white"",""showbackground"":true,""ticks"":"""",""zerolinecolor"":""white"",""gridwidth"":2},""yaxis"":{""backgroundcolor"":""#E5ECF6"",""gridcolor"":""white"",""linecolor"":""white"",""showbackground"":true,""ticks"":"""",""zerolinecolor"":""white"",""gridwidth"":2},""zaxis"":{""backgroundcolor"":""#E5ECF6"",""gridcolor"":""white"",""linecolor"":""white"",""showbackground"":true,""ticks"":"""",""zerolinecolor"":""white"",""gridwidth"":2}},""shapedefaults"":{""line"":{""color"":""#2a3f5f""}},""annotationdefaults"":{""arrowcolor"":""#2a3f5f"",""arrowhead"":0,""arrowwidth"":1},""geo"":{""bgcolor"":""white"",""landcolor"":""#E5ECF6"",""subunitcolor"":""white"",""showland"":true,""showlakes"":true,""lakecolor"":""white""},""title"":{""x"":0.05},""mapbox"":{""style"":""light""}}},""xaxis"":{""anchor"":""y"",""domain"":[0.0,1.0],""title"":{""text"":""Execution cost ($)""}},""yaxis"":{""anchor"":""x"",""domain"":[0.0,1.0],""title"":{""text"":""Model""},""categoryorder"":""array"",""categoryarray"":[""RedPajama-INCITE Chat (7B)"",""Falcon Instruct (7B)"",""Vicuna v1.5 (7B)"",""Qwen 1.5 Chat (7B)"",""OpenHermes-2.5-Mistral (7B)"",""LLaMA-2 Chat (7B)"",""Mistral (7B) Instruct v0.2 (Together AI)"",""Upstage SOLAR Instruct v1 (11B)"",""chat-bison-32k (PaLM 2 32K)"",""chat-bison (PaLM 2)"",""gemini-pro"",""WizardLM v1.2 (13B)"",""Chronos Hermes (13B)"",""Snorkel Mistral PairRM DPO (7B)"",""01-ai Yi Chat (34B)"",""Mixtral-8x7B-Instruct-v0.1"",""llama-2-70b-chat"",""gpt-3.5-turbo"",""gpt-4-turbo"",""gpt-4""]},""legend"":{""title"":{""text"":""Model""},""tracegroupgap"":0},""title"":{""text"":""Costs of execution of 6660 test queries per model""},""barmode"":""relative""}}","{""y"": ""model"", ""x"": ""model_query_costs"", ""color"": ""model"", ""orientation"": ""h"", ""title"": ""Costs of execution of 6660 test queries per model"", ""labels"": {""model"": ""Model"", ""model_query_costs"": ""Execution cost ($)""}}",",model_query_costs,model
|
306 |
-
2,
|
307 |
-
1,
|
308 |
-
0
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
|
|
|
|
|
|
326 |
"
|
327 |
model_sizes,./html/plots/model_sizes.html,"Figure({
|
328 |
'data': [{'alignmentgroup': 'True',
|
|
|
10 |
'showlegend': True,
|
11 |
'textposition': 'auto',
|
12 |
'type': 'bar',
|
13 |
+
'x': array([9.1329]),
|
14 |
'xaxis': 'x',
|
15 |
'y': array(['gpt-4'], dtype=object),
|
16 |
'yaxis': 'y'},
|
|
|
24 |
'showlegend': True,
|
25 |
'textposition': 'auto',
|
26 |
'type': 'bar',
|
27 |
+
'x': array([6.7599]),
|
28 |
'xaxis': 'x',
|
29 |
'y': array(['gpt-4-turbo'], dtype=object),
|
30 |
'yaxis': 'y'},
|
31 |
{'alignmentgroup': 'True',
|
32 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
33 |
+
'legendgroup': 'Mixtral-8x7B-Instruct-v0.1',
|
34 |
'marker': {'color': '#00cc96', 'pattern': {'shape': ''}},
|
35 |
+
'name': 'Mixtral-8x7B-Instruct-v0.1',
|
36 |
+
'offsetgroup': 'Mixtral-8x7B-Instruct-v0.1',
|
37 |
'orientation': 'h',
|
38 |
'showlegend': True,
|
39 |
'textposition': 'auto',
|
40 |
'type': 'bar',
|
41 |
+
'x': array([0.539613]),
|
42 |
'xaxis': 'x',
|
43 |
+
'y': array(['Mixtral-8x7B-Instruct-v0.1'], dtype=object),
|
44 |
'yaxis': 'y'},
|
45 |
{'alignmentgroup': 'True',
|
46 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
47 |
+
'legendgroup': 'zephyr-7b-beta',
|
48 |
'marker': {'color': '#ab63fa', 'pattern': {'shape': ''}},
|
49 |
+
'name': 'zephyr-7b-beta',
|
50 |
+
'offsetgroup': 'zephyr-7b-beta',
|
51 |
'orientation': 'h',
|
52 |
'showlegend': True,
|
53 |
'textposition': 'auto',
|
54 |
'type': 'bar',
|
55 |
+
'x': array([0.49900073]),
|
56 |
'xaxis': 'x',
|
57 |
+
'y': array(['zephyr-7b-beta'], dtype=object),
|
58 |
'yaxis': 'y'},
|
59 |
{'alignmentgroup': 'True',
|
60 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
61 |
+
'legendgroup': '01-ai Yi Chat (34B)',
|
62 |
'marker': {'color': '#FFA15A', 'pattern': {'shape': ''}},
|
63 |
+
'name': '01-ai Yi Chat (34B)',
|
64 |
+
'offsetgroup': '01-ai Yi Chat (34B)',
|
65 |
'orientation': 'h',
|
66 |
'showlegend': True,
|
67 |
'textposition': 'auto',
|
68 |
'type': 'bar',
|
69 |
+
'x': array([0.45192]),
|
70 |
'xaxis': 'x',
|
71 |
+
'y': array(['01-ai Yi Chat (34B)'], dtype=object),
|
72 |
'yaxis': 'y'},
|
73 |
{'alignmentgroup': 'True',
|
74 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
75 |
+
'legendgroup': 'llama-2-70b-chat',
|
76 |
'marker': {'color': '#19d3f3', 'pattern': {'shape': ''}},
|
77 |
+
'name': 'llama-2-70b-chat',
|
78 |
+
'offsetgroup': 'llama-2-70b-chat',
|
79 |
'orientation': 'h',
|
80 |
'showlegend': True,
|
81 |
'textposition': 'auto',
|
82 |
'type': 'bar',
|
83 |
+
'x': array([0.355275]),
|
84 |
'xaxis': 'x',
|
85 |
+
'y': array(['llama-2-70b-chat'], dtype=object),
|
86 |
'yaxis': 'y'},
|
87 |
{'alignmentgroup': 'True',
|
88 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
89 |
+
'legendgroup': 'gpt-3.5-turbo',
|
90 |
'marker': {'color': '#FF6692', 'pattern': {'shape': ''}},
|
91 |
+
'name': 'gpt-3.5-turbo',
|
92 |
+
'offsetgroup': 'gpt-3.5-turbo',
|
93 |
'orientation': 'h',
|
94 |
'showlegend': True,
|
95 |
'textposition': 'auto',
|
96 |
'type': 'bar',
|
97 |
+
'x': array([0.33931]),
|
98 |
'xaxis': 'x',
|
99 |
+
'y': array(['gpt-3.5-turbo'], dtype=object),
|
100 |
'yaxis': 'y'},
|
101 |
{'alignmentgroup': 'True',
|
102 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
103 |
+
'legendgroup': 'Mistral-7B-Instruct-v0.2',
|
104 |
'marker': {'color': '#B6E880', 'pattern': {'shape': ''}},
|
105 |
+
'name': 'Mistral-7B-Instruct-v0.2',
|
106 |
+
'offsetgroup': 'Mistral-7B-Instruct-v0.2',
|
107 |
'orientation': 'h',
|
108 |
'showlegend': True,
|
109 |
'textposition': 'auto',
|
110 |
'type': 'bar',
|
111 |
+
'x': array([0.29065089]),
|
112 |
'xaxis': 'x',
|
113 |
+
'y': array(['Mistral-7B-Instruct-v0.2'], dtype=object),
|
114 |
'yaxis': 'y'},
|
115 |
{'alignmentgroup': 'True',
|
116 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
117 |
+
'legendgroup': 'Snorkel Mistral PairRM DPO (7B)',
|
118 |
'marker': {'color': '#FF97FF', 'pattern': {'shape': ''}},
|
119 |
+
'name': 'Snorkel Mistral PairRM DPO (7B)',
|
120 |
+
'offsetgroup': 'Snorkel Mistral PairRM DPO (7B)',
|
121 |
'orientation': 'h',
|
122 |
'showlegend': True,
|
123 |
'textposition': 'auto',
|
124 |
'type': 'bar',
|
125 |
+
'x': array([0.176236]),
|
126 |
'xaxis': 'x',
|
127 |
+
'y': array(['Snorkel Mistral PairRM DPO (7B)'], dtype=object),
|
128 |
'yaxis': 'y'},
|
129 |
{'alignmentgroup': 'True',
|
130 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
131 |
+
'legendgroup': 'Chronos Hermes (13B)',
|
132 |
'marker': {'color': '#FECB52', 'pattern': {'shape': ''}},
|
133 |
+
'name': 'Chronos Hermes (13B)',
|
134 |
+
'offsetgroup': 'Chronos Hermes (13B)',
|
135 |
'orientation': 'h',
|
136 |
'showlegend': True,
|
137 |
'textposition': 'auto',
|
138 |
'type': 'bar',
|
139 |
+
'x': array([0.158268]),
|
140 |
'xaxis': 'x',
|
141 |
+
'y': array(['Chronos Hermes (13B)'], dtype=object),
|
142 |
'yaxis': 'y'},
|
143 |
{'alignmentgroup': 'True',
|
144 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
145 |
+
'legendgroup': 'WizardLM v1.2 (13B)',
|
146 |
'marker': {'color': '#636efa', 'pattern': {'shape': ''}},
|
147 |
+
'name': 'WizardLM v1.2 (13B)',
|
148 |
+
'offsetgroup': 'WizardLM v1.2 (13B)',
|
149 |
'orientation': 'h',
|
150 |
'showlegend': True,
|
151 |
'textposition': 'auto',
|
152 |
'type': 'bar',
|
153 |
+
'x': array([0.147276]),
|
154 |
'xaxis': 'x',
|
155 |
+
'y': array(['WizardLM v1.2 (13B)'], dtype=object),
|
156 |
'yaxis': 'y'},
|
157 |
{'alignmentgroup': 'True',
|
158 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
159 |
+
'legendgroup': 'Upstage SOLAR Instruct v1 (11B)',
|
160 |
'marker': {'color': '#EF553B', 'pattern': {'shape': ''}},
|
161 |
+
'name': 'Upstage SOLAR Instruct v1 (11B)',
|
162 |
+
'offsetgroup': 'Upstage SOLAR Instruct v1 (11B)',
|
163 |
'orientation': 'h',
|
164 |
'showlegend': True,
|
165 |
'textposition': 'auto',
|
166 |
'type': 'bar',
|
167 |
+
'x': array([0.117306]),
|
168 |
'xaxis': 'x',
|
169 |
+
'y': array(['Upstage SOLAR Instruct v1 (11B)'], dtype=object),
|
170 |
'yaxis': 'y'},
|
171 |
{'alignmentgroup': 'True',
|
172 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
173 |
+
'legendgroup': 'LLaMA-2 Chat (7B)',
|
174 |
'marker': {'color': '#00cc96', 'pattern': {'shape': ''}},
|
175 |
+
'name': 'LLaMA-2 Chat (7B)',
|
176 |
+
'offsetgroup': 'LLaMA-2 Chat (7B)',
|
177 |
'orientation': 'h',
|
178 |
'showlegend': True,
|
179 |
'textposition': 'auto',
|
180 |
'type': 'bar',
|
181 |
+
'x': array([0.11668]),
|
182 |
'xaxis': 'x',
|
183 |
+
'y': array(['LLaMA-2 Chat (7B)'], dtype=object),
|
184 |
'yaxis': 'y'},
|
185 |
{'alignmentgroup': 'True',
|
186 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
187 |
+
'legendgroup': 'Qwen 1.5 Chat (7B)',
|
188 |
'marker': {'color': '#ab63fa', 'pattern': {'shape': ''}},
|
189 |
+
'name': 'Qwen 1.5 Chat (7B)',
|
190 |
+
'offsetgroup': 'Qwen 1.5 Chat (7B)',
|
191 |
'orientation': 'h',
|
192 |
'showlegend': True,
|
193 |
'textposition': 'auto',
|
194 |
'type': 'bar',
|
195 |
+
'x': array([0.10312]),
|
196 |
'xaxis': 'x',
|
197 |
+
'y': array(['Qwen 1.5 Chat (7B)'], dtype=object),
|
198 |
'yaxis': 'y'},
|
199 |
{'alignmentgroup': 'True',
|
200 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
201 |
+
'legendgroup': 'OpenHermes-2.5-Mistral (7B)',
|
202 |
'marker': {'color': '#FFA15A', 'pattern': {'shape': ''}},
|
203 |
+
'name': 'OpenHermes-2.5-Mistral (7B)',
|
204 |
+
'offsetgroup': 'OpenHermes-2.5-Mistral (7B)',
|
205 |
'orientation': 'h',
|
206 |
'showlegend': True,
|
207 |
'textposition': 'auto',
|
208 |
'type': 'bar',
|
209 |
+
'x': array([0.099956]),
|
210 |
'xaxis': 'x',
|
211 |
+
'y': array(['OpenHermes-2.5-Mistral (7B)'], dtype=object),
|
212 |
'yaxis': 'y'},
|
213 |
{'alignmentgroup': 'True',
|
214 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
215 |
+
'legendgroup': 'Vicuna v1.5 (7B)',
|
216 |
'marker': {'color': '#19d3f3', 'pattern': {'shape': ''}},
|
217 |
+
'name': 'Vicuna v1.5 (7B)',
|
218 |
+
'offsetgroup': 'Vicuna v1.5 (7B)',
|
219 |
'orientation': 'h',
|
220 |
'showlegend': True,
|
221 |
'textposition': 'auto',
|
222 |
'type': 'bar',
|
223 |
+
'x': array([0.085688]),
|
224 |
'xaxis': 'x',
|
225 |
+
'y': array(['Vicuna v1.5 (7B)'], dtype=object),
|
226 |
'yaxis': 'y'},
|
227 |
{'alignmentgroup': 'True',
|
228 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
229 |
+
'legendgroup': 'Falcon Instruct (7B)',
|
230 |
'marker': {'color': '#FF6692', 'pattern': {'shape': ''}},
|
231 |
+
'name': 'Falcon Instruct (7B)',
|
232 |
+
'offsetgroup': 'Falcon Instruct (7B)',
|
233 |
'orientation': 'h',
|
234 |
'showlegend': True,
|
235 |
'textposition': 'auto',
|
236 |
'type': 'bar',
|
237 |
+
'x': array([0.08474]),
|
238 |
'xaxis': 'x',
|
239 |
+
'y': array(['Falcon Instruct (7B)'], dtype=object),
|
240 |
'yaxis': 'y'},
|
241 |
{'alignmentgroup': 'True',
|
242 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
243 |
+
'legendgroup': 'RedPajama-INCITE Chat (7B)',
|
244 |
'marker': {'color': '#B6E880', 'pattern': {'shape': ''}},
|
245 |
+
'name': 'RedPajama-INCITE Chat (7B)',
|
246 |
+
'offsetgroup': 'RedPajama-INCITE Chat (7B)',
|
247 |
'orientation': 'h',
|
248 |
'showlegend': True,
|
249 |
'textposition': 'auto',
|
250 |
'type': 'bar',
|
251 |
+
'x': array([0.082008]),
|
252 |
'xaxis': 'x',
|
253 |
+
'y': array(['RedPajama-INCITE Chat (7B)'], dtype=object),
|
254 |
'yaxis': 'y'},
|
255 |
{'alignmentgroup': 'True',
|
256 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
257 |
+
'legendgroup': 'chat-bison (PaLM 2)',
|
258 |
'marker': {'color': '#FF97FF', 'pattern': {'shape': ''}},
|
259 |
+
'name': 'chat-bison (PaLM 2)',
|
260 |
+
'offsetgroup': 'chat-bison (PaLM 2)',
|
261 |
'orientation': 'h',
|
262 |
'showlegend': True,
|
263 |
'textposition': 'auto',
|
264 |
'type': 'bar',
|
265 |
+
'x': array([0.0787475]),
|
266 |
'xaxis': 'x',
|
267 |
+
'y': array(['chat-bison (PaLM 2)'], dtype=object),
|
268 |
'yaxis': 'y'},
|
269 |
{'alignmentgroup': 'True',
|
270 |
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
271 |
+
'legendgroup': 'chat-bison-32k (PaLM 2 32K)',
|
272 |
'marker': {'color': '#FECB52', 'pattern': {'shape': ''}},
|
273 |
+
'name': 'chat-bison-32k (PaLM 2 32K)',
|
274 |
+
'offsetgroup': 'chat-bison-32k (PaLM 2 32K)',
|
275 |
'orientation': 'h',
|
276 |
'showlegend': True,
|
277 |
'textposition': 'auto',
|
278 |
'type': 'bar',
|
279 |
+
'x': array([0.0786175]),
|
280 |
'xaxis': 'x',
|
281 |
+
'y': array(['chat-bison-32k (PaLM 2 32K)'], dtype=object),
|
282 |
+
'yaxis': 'y'},
|
283 |
+
{'alignmentgroup': 'True',
|
284 |
+
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
285 |
+
'legendgroup': 'gemini-pro',
|
286 |
+
'marker': {'color': '#636efa', 'pattern': {'shape': ''}},
|
287 |
+
'name': 'gemini-pro',
|
288 |
+
'offsetgroup': 'gemini-pro',
|
289 |
+
'orientation': 'h',
|
290 |
+
'showlegend': True,
|
291 |
+
'textposition': 'auto',
|
292 |
+
'type': 'bar',
|
293 |
+
'x': array([0.0775075]),
|
294 |
+
'xaxis': 'x',
|
295 |
+
'y': array(['gemini-pro'], dtype=object),
|
296 |
+
'yaxis': 'y'},
|
297 |
+
{'alignmentgroup': 'True',
|
298 |
+
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
299 |
+
'legendgroup': 'TinyLlama/TinyLlama-1.1B-Chat-v1.0',
|
300 |
+
'marker': {'color': '#EF553B', 'pattern': {'shape': ''}},
|
301 |
+
'name': 'TinyLlama/TinyLlama-1.1B-Chat-v1.0',
|
302 |
+
'offsetgroup': 'TinyLlama/TinyLlama-1.1B-Chat-v1.0',
|
303 |
+
'orientation': 'h',
|
304 |
+
'showlegend': True,
|
305 |
+
'textposition': 'auto',
|
306 |
+
'type': 'bar',
|
307 |
+
'x': array([0.0661168]),
|
308 |
+
'xaxis': 'x',
|
309 |
+
'y': array(['TinyLlama/TinyLlama-1.1B-Chat-v1.0'], dtype=object),
|
310 |
+
'yaxis': 'y'},
|
311 |
+
{'alignmentgroup': 'True',
|
312 |
+
'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
|
313 |
+
'legendgroup': 'Mistral (7B) Instruct v0.2 (Together AI)',
|
314 |
+
'marker': {'color': '#00cc96', 'pattern': {'shape': ''}},
|
315 |
+
'name': 'Mistral (7B) Instruct v0.2 (Together AI)',
|
316 |
+
'offsetgroup': 'Mistral (7B) Instruct v0.2 (Together AI)',
|
317 |
+
'orientation': 'h',
|
318 |
+
'showlegend': True,
|
319 |
+
'textposition': 'auto',
|
320 |
+
'type': 'bar',
|
321 |
+
'x': array([0.059762]),
|
322 |
+
'xaxis': 'x',
|
323 |
+
'y': array(['Mistral (7B) Instruct v0.2 (Together AI)'], dtype=object),
|
324 |
'yaxis': 'y'}],
|
325 |
'layout': {'barmode': 'relative',
|
326 |
'legend': {'title': {'text': 'Model'}, 'tracegroupgap': 0},
|
|
|
328 |
'title': {'text': 'Costs of execution of 6660 test queries per model'},
|
329 |
'xaxis': {'anchor': 'y', 'domain': [0.0, 1.0], 'title': {'text': 'Execution cost ($)'}},
|
330 |
'yaxis': {'anchor': 'x',
|
331 |
+
'categoryarray': [Mistral (7B) Instruct v0.2 (Together
|
332 |
+
AI), TinyLlama/TinyLlama-1.1B-Chat-v1.0,
|
333 |
+
gemini-pro, chat-bison-32k (PaLM 2 32K),
|
334 |
+
chat-bison (PaLM 2), RedPajama-INCITE
|
335 |
+
Chat (7B), Falcon Instruct (7B), Vicuna
|
336 |
+
v1.5 (7B), OpenHermes-2.5-Mistral (7B),
|
337 |
+
Qwen 1.5 Chat (7B), LLaMA-2 Chat (7B),
|
338 |
+
Upstage SOLAR Instruct v1 (11B),
|
339 |
+
WizardLM v1.2 (13B), Chronos Hermes
|
340 |
+
(13B), Snorkel Mistral PairRM DPO (7B),
|
341 |
+
Mistral-7B-Instruct-v0.2, gpt-3.5-turbo,
|
342 |
+
llama-2-70b-chat, 01-ai Yi Chat (34B),
|
343 |
+
zephyr-7b-beta,
|
344 |
+
Mixtral-8x7B-Instruct-v0.1, gpt-4-turbo,
|
345 |
+
gpt-4],
|
346 |
'categoryorder': 'array',
|
347 |
'domain': [0.0, 1.0],
|
348 |
'title': {'text': 'Model'}}}
|
349 |
+
})",Costs of execution of 6660 test queries per model,,"{""data"":[{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gpt-4"",""marker"":{""color"":""#636efa"",""pattern"":{""shape"":""""}},""name"":""gpt-4"",""offsetgroup"":""gpt-4"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[9.1329],""xaxis"":""x"",""y"":[""gpt-4""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gpt-4-turbo"",""marker"":{""color"":""#EF553B"",""pattern"":{""shape"":""""}},""name"":""gpt-4-turbo"",""offsetgroup"":""gpt-4-turbo"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[6.7599],""xaxis"":""x"",""y"":[""gpt-4-turbo""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Mixtral-8x7B-Instruct-v0.1"",""marker"":{""color"":""#00cc96"",""pattern"":{""shape"":""""}},""name"":""Mixtral-8x7B-Instruct-v0.1"",""offsetgroup"":""Mixtral-8x7B-Instruct-v0.1"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.539613],""xaxis"":""x"",""y"":[""Mixtral-8x7B-Instruct-v0.1""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""zephyr-7b-beta"",""marker"":{""color"":""#ab63fa"",""pattern"":{""shape"":""""}},""name"":""zephyr-7b-beta"",""offsetgroup"":""zephyr-7b-beta"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.49900072815683155],""xaxis"":""x"",""y"":[""zephyr-7b-beta""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""01-ai Yi Chat (34B)"",""marker"":{""color"":""#FFA15A"",""pattern"":{""shape"":""""}},""name"":""01-ai Yi Chat (34B)"",""offsetgroup"":""01-ai Yi Chat (34B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.45192],""xaxis"":""x"",""y"":[""01-ai Yi Chat (34B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""llama-2-70b-chat"",""marker"":{""color"":""#19d3f3"",""pattern"":{""shape"":""""}},""name"":""llama-2-70b-chat"",""offsetgroup"":""llama-2-70b-chat"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.355275],""xaxis"":""x"",""y"":[""llama-2-70b-chat""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gpt-3.5-turbo"",""marker"":{""color"":""#FF6692"",""pattern"":{""shape"":""""}},""name"":""gpt-3.5-turbo"",""offsetgroup"":""gpt-3.5-turbo"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.33931],""xaxis"":""x"",""y"":[""gpt-3.5-turbo""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Mistral-7B-Instruct-v0.2"",""marker"":{""color"":""#B6E880"",""pattern"":{""shape"":""""}},""name"":""Mistral-7B-Instruct-v0.2"",""offsetgroup"":""Mistral-7B-Instruct-v0.2"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.29065088506539666],""xaxis"":""x"",""y"":[""Mistral-7B-Instruct-v0.2""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Snorkel Mistral PairRM DPO (7B)"",""marker"":{""color"":""#FF97FF"",""pattern"":{""shape"":""""}},""name"":""Snorkel Mistral PairRM DPO (7B)"",""offsetgroup"":""Snorkel Mistral PairRM DPO (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.176236],""xaxis"":""x"",""y"":[""Snorkel Mistral PairRM DPO (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Chronos Hermes (13B)"",""marker"":{""color"":""#FECB52"",""pattern"":{""shape"":""""}},""name"":""Chronos Hermes (13B)"",""offsetgroup"":""Chronos Hermes (13B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.158268],""xaxis"":""x"",""y"":[""Chronos Hermes (13B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""WizardLM v1.2 (13B)"",""marker"":{""color"":""#636efa"",""pattern"":{""shape"":""""}},""name"":""WizardLM v1.2 (13B)"",""offsetgroup"":""WizardLM v1.2 (13B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.147276],""xaxis"":""x"",""y"":[""WizardLM v1.2 (13B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Upstage SOLAR Instruct v1 (11B)"",""marker"":{""color"":""#EF553B"",""pattern"":{""shape"":""""}},""name"":""Upstage SOLAR Instruct v1 (11B)"",""offsetgroup"":""Upstage SOLAR Instruct v1 (11B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.117306],""xaxis"":""x"",""y"":[""Upstage SOLAR Instruct v1 (11B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""LLaMA-2 Chat (7B)"",""marker"":{""color"":""#00cc96"",""pattern"":{""shape"":""""}},""name"":""LLaMA-2 Chat (7B)"",""offsetgroup"":""LLaMA-2 Chat (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.11668],""xaxis"":""x"",""y"":[""LLaMA-2 Chat (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Qwen 1.5 Chat (7B)"",""marker"":{""color"":""#ab63fa"",""pattern"":{""shape"":""""}},""name"":""Qwen 1.5 Chat (7B)"",""offsetgroup"":""Qwen 1.5 Chat (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.10311999999999999],""xaxis"":""x"",""y"":[""Qwen 1.5 Chat (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""OpenHermes-2.5-Mistral (7B)"",""marker"":{""color"":""#FFA15A"",""pattern"":{""shape"":""""}},""name"":""OpenHermes-2.5-Mistral (7B)"",""offsetgroup"":""OpenHermes-2.5-Mistral (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.09995599999999999],""xaxis"":""x"",""y"":[""OpenHermes-2.5-Mistral (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Vicuna v1.5 (7B)"",""marker"":{""color"":""#19d3f3"",""pattern"":{""shape"":""""}},""name"":""Vicuna v1.5 (7B)"",""offsetgroup"":""Vicuna v1.5 (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.085688],""xaxis"":""x"",""y"":[""Vicuna v1.5 (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Falcon Instruct (7B)"",""marker"":{""color"":""#FF6692"",""pattern"":{""shape"":""""}},""name"":""Falcon Instruct (7B)"",""offsetgroup"":""Falcon Instruct (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.08474],""xaxis"":""x"",""y"":[""Falcon Instruct (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""RedPajama-INCITE Chat (7B)"",""marker"":{""color"":""#B6E880"",""pattern"":{""shape"":""""}},""name"":""RedPajama-INCITE Chat (7B)"",""offsetgroup"":""RedPajama-INCITE Chat (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.082008],""xaxis"":""x"",""y"":[""RedPajama-INCITE Chat (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""chat-bison (PaLM 2)"",""marker"":{""color"":""#FF97FF"",""pattern"":{""shape"":""""}},""name"":""chat-bison (PaLM 2)"",""offsetgroup"":""chat-bison (PaLM 2)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.0787475],""xaxis"":""x"",""y"":[""chat-bison (PaLM 2)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""chat-bison-32k (PaLM 2 32K)"",""marker"":{""color"":""#FECB52"",""pattern"":{""shape"":""""}},""name"":""chat-bison-32k (PaLM 2 32K)"",""offsetgroup"":""chat-bison-32k (PaLM 2 32K)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.07861749999999999],""xaxis"":""x"",""y"":[""chat-bison-32k (PaLM 2 32K)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gemini-pro"",""marker"":{""color"":""#636efa"",""pattern"":{""shape"":""""}},""name"":""gemini-pro"",""offsetgroup"":""gemini-pro"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.0775075],""xaxis"":""x"",""y"":[""gemini-pro""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0"",""marker"":{""color"":""#EF553B"",""pattern"":{""shape"":""""}},""name"":""TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0"",""offsetgroup"":""TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.06611679673194885],""xaxis"":""x"",""y"":[""TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Mistral (7B) Instruct v0.2 (Together AI)"",""marker"":{""color"":""#00cc96"",""pattern"":{""shape"":""""}},""name"":""Mistral (7B) Instruct v0.2 (Together AI)"",""offsetgroup"":""Mistral (7B) Instruct v0.2 (Together AI)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.059761999999999996],""xaxis"":""x"",""y"":[""Mistral (7B) Instruct v0.2 (Together AI)""],""yaxis"":""y"",""type"":""bar""}],""layout"":{""template"":{""data"":{""histogram2dcontour"":[{""type"":""histogram2dcontour"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""choropleth"":[{""type"":""choropleth"",""colorbar"":{""outlinewidth"":0,""ticks"":""""}}],""histogram2d"":[{""type"":""histogram2d"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""heatmap"":[{""type"":""heatmap"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""heatmapgl"":[{""type"":""heatmapgl"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""contourcarpet"":[{""type"":""contourcarpet"",""colorbar"":{""outlinewidth"":0,""ticks"":""""}}],""contour"":[{""type"":""contour"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""surface"":[{""type"":""surface"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""mesh3d"":[{""type"":""mesh3d"",""colorbar"":{""outlinewidth"":0,""ticks"":""""}}],""scatter"":[{""fillpattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2},""type"":""scatter""}],""parcoords"":[{""type"":""parcoords"",""line"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatterpolargl"":[{""type"":""scatterpolargl"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""bar"":[{""error_x"":{""color"":""#2a3f5f""},""error_y"":{""color"":""#2a3f5f""},""marker"":{""line"":{""color"":""#E5ECF6"",""width"":0.5},""pattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2}},""type"":""bar""}],""scattergeo"":[{""type"":""scattergeo"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatterpolar"":[{""type"":""scatterpolar"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""histogram"":[{""marker"":{""pattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2}},""type"":""histogram""}],""scattergl"":[{""type"":""scattergl"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatter3d"":[{""type"":""scatter3d"",""line"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}},""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scattermapbox"":[{""type"":""scattermapbox"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatterternary"":[{""type"":""scatterternary"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scattercarpet"":[{""type"":""scattercarpet"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""carpet"":[{""aaxis"":{""endlinecolor"":""#2a3f5f"",""gridcolor"":""white"",""linecolor"":""white"",""minorgridcolor"":""white"",""startlinecolor"":""#2a3f5f""},""baxis"":{""endlinecolor"":""#2a3f5f"",""gridcolor"":""white"",""linecolor"":""white"",""minorgridcolor"":""white"",""startlinecolor"":""#2a3f5f""},""type"":""carpet""}],""table"":[{""cells"":{""fill"":{""color"":""#EBF0F8""},""line"":{""color"":""white""}},""header"":{""fill"":{""color"":""#C8D4E3""},""line"":{""color"":""white""}},""type"":""table""}],""barpolar"":[{""marker"":{""line"":{""color"":""#E5ECF6"",""width"":0.5},""pattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2}},""type"":""barpolar""}],""pie"":[{""automargin"":true,""type"":""pie""}]},""layout"":{""autotypenumbers"":""strict"",""colorway"":[""#636efa"",""#EF553B"",""#00cc96"",""#ab63fa"",""#FFA15A"",""#19d3f3"",""#FF6692"",""#B6E880"",""#FF97FF"",""#FECB52""],""font"":{""color"":""#2a3f5f""},""hovermode"":""closest"",""hoverlabel"":{""align"":""left""},""paper_bgcolor"":""white"",""plot_bgcolor"":""#E5ECF6"",""polar"":{""bgcolor"":""#E5ECF6"",""angularaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""},""radialaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""}},""ternary"":{""bgcolor"":""#E5ECF6"",""aaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""},""baxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""},""caxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""}},""coloraxis"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}},""colorscale"":{""sequential"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]],""sequentialminus"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]],""diverging"":[[0,""#8e0152""],[0.1,""#c51b7d""],[0.2,""#de77ae""],[0.3,""#f1b6da""],[0.4,""#fde0ef""],[0.5,""#f7f7f7""],[0.6,""#e6f5d0""],[0.7,""#b8e186""],[0.8,""#7fbc41""],[0.9,""#4d9221""],[1,""#276419""]]},""xaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":"""",""title"":{""standoff"":15},""zerolinecolor"":""white"",""automargin"":true,""zerolinewidth"":2},""yaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":"""",""title"":{""standoff"":15},""zerolinecolor"":""white"",""automargin"":true,""zerolinewidth"":2},""scene"":{""xaxis"":{""backgroundcolor"":""#E5ECF6"",""gridcolor"":""white"",""linecolor"":""white"",""showbackground"":true,""ticks"":"""",""zerolinecolor"":""white"",""gridwidth"":2},""yaxis"":{""backgroundcolor"":""#E5ECF6"",""gridcolor"":""white"",""linecolor"":""white"",""showbackground"":true,""ticks"":"""",""zerolinecolor"":""white"",""gridwidth"":2},""zaxis"":{""backgroundcolor"":""#E5ECF6"",""gridcolor"":""white"",""linecolor"":""white"",""showbackground"":true,""ticks"":"""",""zerolinecolor"":""white"",""gridwidth"":2}},""shapedefaults"":{""line"":{""color"":""#2a3f5f""}},""annotationdefaults"":{""arrowcolor"":""#2a3f5f"",""arrowhead"":0,""arrowwidth"":1},""geo"":{""bgcolor"":""white"",""landcolor"":""#E5ECF6"",""subunitcolor"":""white"",""showland"":true,""showlakes"":true,""lakecolor"":""white""},""title"":{""x"":0.05},""mapbox"":{""style"":""light""}}},""xaxis"":{""anchor"":""y"",""domain"":[0.0,1.0],""title"":{""text"":""Execution cost ($)""}},""yaxis"":{""anchor"":""x"",""domain"":[0.0,1.0],""title"":{""text"":""Model""},""categoryorder"":""array"",""categoryarray"":[""Mistral (7B) Instruct v0.2 (Together AI)"",""TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0"",""gemini-pro"",""chat-bison-32k (PaLM 2 32K)"",""chat-bison (PaLM 2)"",""RedPajama-INCITE Chat (7B)"",""Falcon Instruct (7B)"",""Vicuna v1.5 (7B)"",""OpenHermes-2.5-Mistral (7B)"",""Qwen 1.5 Chat (7B)"",""LLaMA-2 Chat (7B)"",""Upstage SOLAR Instruct v1 (11B)"",""WizardLM v1.2 (13B)"",""Chronos Hermes (13B)"",""Snorkel Mistral PairRM DPO (7B)"",""Mistral-7B-Instruct-v0.2"",""gpt-3.5-turbo"",""llama-2-70b-chat"",""01-ai Yi Chat (34B)"",""zephyr-7b-beta"",""Mixtral-8x7B-Instruct-v0.1"",""gpt-4-turbo"",""gpt-4""]},""legend"":{""title"":{""text"":""Model""},""tracegroupgap"":0},""title"":{""text"":""Costs of execution of 6660 test queries per model""},""barmode"":""relative""}}","{""y"": ""model"", ""x"": ""model_query_costs"", ""color"": ""model"", ""orientation"": ""h"", ""title"": ""Costs of execution of 6660 test queries per model"", ""labels"": {""model"": ""Model"", ""model_query_costs"": ""Execution cost ($)""}}",",model_query_costs,model
|
350 |
+
2,9.1329,gpt-4
|
351 |
+
1,6.7599,gpt-4-turbo
|
352 |
+
4,0.539613,Mixtral-8x7B-Instruct-v0.1
|
353 |
+
5,0.49900072815683155,zephyr-7b-beta
|
354 |
+
11,0.45192,01-ai Yi Chat (34B)
|
355 |
+
3,0.355275,llama-2-70b-chat
|
356 |
+
0,0.33931,gpt-3.5-turbo
|
357 |
+
6,0.29065088506539666,Mistral-7B-Instruct-v0.2
|
358 |
+
43,0.176236,Snorkel Mistral PairRM DPO (7B)
|
359 |
+
12,0.158268,Chronos Hermes (13B)
|
360 |
+
55,0.147276,WizardLM v1.2 (13B)
|
361 |
+
56,0.117306,Upstage SOLAR Instruct v1 (11B)
|
362 |
+
24,0.11668,LLaMA-2 Chat (7B)
|
363 |
+
40,0.10311999999999999,Qwen 1.5 Chat (7B)
|
364 |
+
46,0.09995599999999999,OpenHermes-2.5-Mistral (7B)
|
365 |
+
17,0.085688,Vicuna v1.5 (7B)
|
366 |
+
48,0.08474,Falcon Instruct (7B)
|
367 |
+
51,0.082008,RedPajama-INCITE Chat (7B)
|
368 |
+
9,0.0787475,chat-bison (PaLM 2)
|
369 |
+
10,0.07861749999999999,chat-bison-32k (PaLM 2 32K)
|
370 |
+
8,0.0775075,gemini-pro
|
371 |
+
7,0.06611679673194885,TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
372 |
+
26,0.059761999999999996,Mistral (7B) Instruct v0.2 (Together AI)
|
373 |
"
|
374 |
model_sizes,./html/plots/model_sizes.html,"Figure({
|
375 |
'data': [{'alignmentgroup': 'True',
|
data/summary_metrics_plots.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pipeline/config.py
CHANGED
@@ -64,3 +64,12 @@ class GeneralPlotConfig(Config):
|
|
64 |
seconds_per_token: float = 184 / 6
|
65 |
input_size: int = 100
|
66 |
expected_output_size: int = 50
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
seconds_per_token: float = 184 / 6
|
65 |
input_size: int = 100
|
66 |
expected_output_size: int = 50
|
67 |
+
|
68 |
+
|
69 |
+
class CombinedPlotsConfig(Config):
|
70 |
+
plots_dir: str = "./html/plots/"
|
71 |
+
saving_path: str = "data/"
|
72 |
+
scatter_plots: bool = False
|
73 |
+
|
74 |
+
class SummaryMetricsConfig(Config):
|
75 |
+
combined_score: bool = False
|