cyberosa commited on
Commit
a9bd212
·
1 Parent(s): 967e6fd

disabling temporarily the run_benchmark tab

Browse files
Files changed (3) hide show
  1. app.py +78 -78
  2. start.py +16 -16
  3. tabs/run_benchmark.py +2 -2
app.py CHANGED
@@ -13,7 +13,7 @@ from tabs.faq import (
13
  from tabs.howto_benchmark import how_to_run
14
 
15
  # disabling temporarily
16
- from tabs.run_benchmark import run_benchmark_main
17
 
18
  demo = gr.Blocks()
19
 
@@ -111,83 +111,83 @@ with demo:
111
  gr.Markdown(how_to_run)
112
 
113
  # fourth tab - run the benchmark
114
- with gr.TabItem("🔥 Run the Benchmark"):
115
- with gr.Row():
116
- tool_name = gr.Dropdown(
117
- [
118
- "prediction-offline",
119
- "prediction-online",
120
- # "prediction-online-summarized-info",
121
- # "prediction-offline-sme",
122
- # "prediction-online-sme",
123
- "prediction-request-rag",
124
- "prediction-request-reasoning",
125
- # "prediction-url-cot-claude",
126
- # "prediction-request-rag-cohere",
127
- # "prediction-with-research-conservative",
128
- # "prediction-with-research-bold",
129
- ],
130
- label="Tool Name",
131
- info="Choose the tool to run",
132
- )
133
- model_name = gr.Dropdown(
134
- [
135
- "gpt-3.5-turbo-0125",
136
- "gpt-4-0125-preview",
137
- "claude-3-haiku-20240307",
138
- "claude-3-sonnet-20240229",
139
- "claude-3-opus-20240229",
140
- "databricks/dbrx-instruct:nitro",
141
- "nousresearch/nous-hermes-2-mixtral-8x7b-sft",
142
- # "cohere/command-r-plus",
143
- ],
144
- label="Model Name",
145
- info="Choose the model to use",
146
- )
147
- with gr.Row():
148
- openai_api_key = gr.Textbox(
149
- label="OpenAI API Key",
150
- placeholder="Enter your OpenAI API key here",
151
- type="password",
152
- )
153
- anthropic_api_key = gr.Textbox(
154
- label="Anthropic API Key",
155
- placeholder="Enter your Anthropic API key here",
156
- type="password",
157
- )
158
- openrouter_api_key = gr.Textbox(
159
- label="OpenRouter API Key",
160
- placeholder="Enter your OpenRouter API key here",
161
- type="password",
162
- )
163
- with gr.Row():
164
- num_questions = gr.Slider(
165
- minimum=1,
166
- maximum=340,
167
- value=10,
168
- label="Number of questions to run the benchmark on",
169
- )
170
- with gr.Row():
171
- run_button = gr.Button("Run Benchmark")
172
- with gr.Row():
173
- with gr.Accordion("Results", open=True):
174
- result = gr.Dataframe()
175
- with gr.Row():
176
- with gr.Accordion("Summary", open=False):
177
- summary = gr.Dataframe()
178
-
179
- run_button.click(
180
- run_benchmark_gradio,
181
- inputs=[
182
- tool_name,
183
- model_name,
184
- num_questions,
185
- openai_api_key,
186
- anthropic_api_key,
187
- openrouter_api_key,
188
- ],
189
- outputs=[result, summary],
190
- )
191
 
192
 
193
  demo.queue(default_concurrency_limit=40).launch()
 
13
  from tabs.howto_benchmark import how_to_run
14
 
15
  # disabling temporarily
16
+ # from tabs.run_benchmark import run_benchmark_main
17
 
18
  demo = gr.Blocks()
19
 
 
111
  gr.Markdown(how_to_run)
112
 
113
  # fourth tab - run the benchmark
114
+ # with gr.TabItem("🔥 Run the Benchmark"):
115
+ # with gr.Row():
116
+ # tool_name = gr.Dropdown(
117
+ # [
118
+ # "prediction-offline",
119
+ # "prediction-online",
120
+ # # "prediction-online-summarized-info",
121
+ # # "prediction-offline-sme",
122
+ # # "prediction-online-sme",
123
+ # "prediction-request-rag",
124
+ # "prediction-request-reasoning",
125
+ # # "prediction-url-cot-claude",
126
+ # # "prediction-request-rag-cohere",
127
+ # # "prediction-with-research-conservative",
128
+ # # "prediction-with-research-bold",
129
+ # ],
130
+ # label="Tool Name",
131
+ # info="Choose the tool to run",
132
+ # )
133
+ # model_name = gr.Dropdown(
134
+ # [
135
+ # "gpt-3.5-turbo-0125",
136
+ # "gpt-4-0125-preview",
137
+ # "claude-3-haiku-20240307",
138
+ # "claude-3-sonnet-20240229",
139
+ # "claude-3-opus-20240229",
140
+ # "databricks/dbrx-instruct:nitro",
141
+ # "nousresearch/nous-hermes-2-mixtral-8x7b-sft",
142
+ # # "cohere/command-r-plus",
143
+ # ],
144
+ # label="Model Name",
145
+ # info="Choose the model to use",
146
+ # )
147
+ # with gr.Row():
148
+ # openai_api_key = gr.Textbox(
149
+ # label="OpenAI API Key",
150
+ # placeholder="Enter your OpenAI API key here",
151
+ # type="password",
152
+ # )
153
+ # anthropic_api_key = gr.Textbox(
154
+ # label="Anthropic API Key",
155
+ # placeholder="Enter your Anthropic API key here",
156
+ # type="password",
157
+ # )
158
+ # openrouter_api_key = gr.Textbox(
159
+ # label="OpenRouter API Key",
160
+ # placeholder="Enter your OpenRouter API key here",
161
+ # type="password",
162
+ # )
163
+ # with gr.Row():
164
+ # num_questions = gr.Slider(
165
+ # minimum=1,
166
+ # maximum=340,
167
+ # value=10,
168
+ # label="Number of questions to run the benchmark on",
169
+ # )
170
+ # with gr.Row():
171
+ # run_button = gr.Button("Run Benchmark")
172
+ # with gr.Row():
173
+ # with gr.Accordion("Results", open=True):
174
+ # result = gr.Dataframe()
175
+ # with gr.Row():
176
+ # with gr.Accordion("Summary", open=False):
177
+ # summary = gr.Dataframe()
178
+
179
+ # run_button.click(
180
+ # run_benchmark_gradio,
181
+ # inputs=[
182
+ # tool_name,
183
+ # model_name,
184
+ # num_questions,
185
+ # openai_api_key,
186
+ # anthropic_api_key,
187
+ # openrouter_api_key,
188
+ # ],
189
+ # outputs=[result, summary],
190
+ # )
191
 
192
 
193
  demo.queue(default_concurrency_limit=40).launch()
start.py CHANGED
@@ -45,27 +45,27 @@ def start():
45
  """Start commands."""
46
  print("Starting commands...")
47
  base_dir = os.getcwd()
48
- olas_dir = os.path.join(base_dir, "olas-predict-benchmark")
49
- benchmark_dir = os.path.join(olas_dir, "benchmark")
50
- mech_dir = os.path.join(olas_dir, "benchmark", "mech")
51
 
52
  commands = [
53
  ("pip install poetry", base_dir),
54
  ("git submodule init", base_dir),
55
  ("git submodule update --init --recursive", base_dir),
56
  ("git submodule update --remote --recursive", base_dir),
57
- (
58
- 'git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"',
59
- olas_dir,
60
- ),
61
- ("git remote update", olas_dir),
62
- ("git fetch --all", olas_dir),
63
- ("git checkout main", olas_dir),
64
- ("git pull origin main", olas_dir),
65
- ("git checkout main", mech_dir),
66
- ("git pull origin main", mech_dir),
67
- ("poetry install", benchmark_dir),
68
- ("pip install -e .", mech_dir),
69
  ("pip install lxml[html_clean]", base_dir),
70
  ("pip install --upgrade huggingface_hub", base_dir),
71
  ]
@@ -74,7 +74,7 @@ def start():
74
  run_command(command, cwd=cwd)
75
 
76
  # add benchmark to the path
77
- sys.path.append(os.path.join(olas_dir, "benchmark"))
78
 
79
  # Download the dataset
80
  download_dataset()
 
45
  """Start commands."""
46
  print("Starting commands...")
47
  base_dir = os.getcwd()
48
+ # olas_dir = os.path.join(base_dir, "olas-predict-benchmark")
49
+ # benchmark_dir = os.path.join(olas_dir, "benchmark")
50
+ # mech_dir = os.path.join(olas_dir, "benchmark", "mech")
51
 
52
  commands = [
53
  ("pip install poetry", base_dir),
54
  ("git submodule init", base_dir),
55
  ("git submodule update --init --recursive", base_dir),
56
  ("git submodule update --remote --recursive", base_dir),
57
+ # (
58
+ # 'git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"',
59
+ # olas_dir,
60
+ # ),
61
+ # ("git remote update", olas_dir),
62
+ # ("git fetch --all", olas_dir),
63
+ # ("git checkout main", olas_dir),
64
+ # ("git pull origin main", olas_dir),
65
+ # ("git checkout main", mech_dir),
66
+ # ("git pull origin main", mech_dir),
67
+ # ("poetry install", benchmark_dir),
68
+ # ("pip install -e .", mech_dir),
69
  ("pip install lxml[html_clean]", base_dir),
70
  ("pip install --upgrade huggingface_hub", base_dir),
71
  ]
 
74
  run_command(command, cwd=cwd)
75
 
76
  # add benchmark to the path
77
+ # sys.path.append(os.path.join(olas_dir, "benchmark"))
78
 
79
  # Download the dataset
80
  download_dataset()
tabs/run_benchmark.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
 
3
- from benchmark.run_benchmark import run_benchmark
4
 
5
 
6
  def run_benchmark_main(
@@ -55,7 +55,7 @@ def run_benchmark_main(
55
 
56
  # Run the benchmark
57
  try:
58
- run_benchmark(kwargs=kwargs)
59
  return "completed"
60
  except Exception as e:
61
  return f"Error running benchmark: {e}"
 
1
  import os
2
 
3
+ # from benchmark.run_benchmark import run_benchmark
4
 
5
 
6
  def run_benchmark_main(
 
55
 
56
  # Run the benchmark
57
  try:
58
+ # run_benchmark(kwargs=kwargs)
59
  return "completed"
60
  except Exception as e:
61
  return f"Error running benchmark: {e}"