cyberosa commited on
Commit
9b36cb7
·
1 Parent(s): d269dc6

disabling the benchmark feature til we fix it

Browse files
Files changed (5) hide show
  1. .gitmodules +0 -3
  2. app.py +125 -125
  3. automate/automate.py +5 -9
  4. olas-predict-benchmark +0 -1
  5. start.py +16 -17
.gitmodules DELETED
@@ -1,3 +0,0 @@
1
- [submodule "olas-predict-benchmark"]
2
- path = olas-predict-benchmark
3
- url = https://github.com/valory-xyz/olas-predict-benchmark.git
 
 
 
 
app.py CHANGED
@@ -13,69 +13,69 @@ from tabs.faq import (
13
  from tabs.howto_benchmark import how_to_run
14
 
15
  # Feature temporarily disabled til HF support helps us with the Space Error
16
- from tabs.run_benchmark import run_benchmark_main
17
 
18
 
19
  demo = gr.Blocks()
20
 
21
 
22
- def run_benchmark_gradio(
23
- tool_name,
24
- model_name,
25
- num_questions,
26
- openai_api_key,
27
- anthropic_api_key,
28
- openrouter_api_key,
29
- ):
30
- """Run the benchmark using inputs."""
31
- if tool_name is None:
32
- return "Please enter the name of your tool."
33
- if (
34
- openai_api_key is None
35
- and anthropic_api_key is None
36
- and openrouter_api_key is None
37
- ):
38
- return "Please enter either OpenAI or Anthropic or OpenRouter API key."
39
-
40
- result = run_benchmark_main(
41
- tool_name,
42
- model_name,
43
- num_questions,
44
- openai_api_key,
45
- anthropic_api_key,
46
- openrouter_api_key,
47
- )
48
 
49
- if result == "completed":
50
- # get the results file in the results directory
51
- fns = glob("results/*.csv")
52
 
53
- print(f"Number of files in results directory: {len(fns)}")
54
 
55
- # convert to Path
56
- files = [Path(file) for file in fns]
57
 
58
- # get results and summary files
59
- results_files = [file for file in files if "results" in file.name]
60
 
61
- # the other file is the summary file
62
- summary_files = [file for file in files if "summary" in file.name]
63
 
64
- print(results_files, summary_files)
65
 
66
- # get the path with results
67
- results_df = pd.read_csv(results_files[0])
68
- summary_df = pd.read_csv(summary_files[0])
69
 
70
- # make sure all df float values are rounded to 4 decimal places
71
- results_df = results_df.round(4)
72
- summary_df = summary_df.round(4)
73
 
74
- return gr.Dataframe(value=results_df), gr.Dataframe(value=summary_df)
75
 
76
- return gr.Textbox(
77
- label="Benchmark Result", value=result, interactive=False
78
- ), gr.Textbox(label="Summary", value="")
79
 
80
 
81
  with demo:
@@ -112,83 +112,83 @@ with demo:
112
  gr.Markdown(how_to_run)
113
 
114
  # fourth tab - run the benchmark
115
- with gr.TabItem("🔥 Run the Benchmark"):
116
- with gr.Row():
117
- tool_name = gr.Dropdown(
118
- [
119
- "prediction-offline",
120
- "prediction-online",
121
- # "prediction-online-summarized-info",
122
- # "prediction-offline-sme",
123
- # "prediction-online-sme",
124
- "prediction-request-rag",
125
- "prediction-request-reasoning",
126
- # "prediction-url-cot-claude",
127
- # "prediction-request-rag-cohere",
128
- # "prediction-with-research-conservative",
129
- # "prediction-with-research-bold",
130
- ],
131
- label="Tool Name",
132
- info="Choose the tool to run",
133
- )
134
- model_name = gr.Dropdown(
135
- [
136
- "gpt-3.5-turbo-0125",
137
- "gpt-4-0125-preview",
138
- "claude-3-haiku-20240307",
139
- "claude-3-sonnet-20240229",
140
- "claude-3-opus-20240229",
141
- "databricks/dbrx-instruct:nitro",
142
- "nousresearch/nous-hermes-2-mixtral-8x7b-sft",
143
- # "cohere/command-r-plus",
144
- ],
145
- label="Model Name",
146
- info="Choose the model to use",
147
- )
148
- with gr.Row():
149
- openai_api_key = gr.Textbox(
150
- label="OpenAI API Key",
151
- placeholder="Enter your OpenAI API key here",
152
- type="password",
153
- )
154
- anthropic_api_key = gr.Textbox(
155
- label="Anthropic API Key",
156
- placeholder="Enter your Anthropic API key here",
157
- type="password",
158
- )
159
- openrouter_api_key = gr.Textbox(
160
- label="OpenRouter API Key",
161
- placeholder="Enter your OpenRouter API key here",
162
- type="password",
163
- )
164
- with gr.Row():
165
- num_questions = gr.Slider(
166
- minimum=1,
167
- maximum=340,
168
- value=10,
169
- label="Number of questions to run the benchmark on",
170
- )
171
- with gr.Row():
172
- run_button = gr.Button("Run Benchmark")
173
- with gr.Row():
174
- with gr.Accordion("Results", open=True):
175
- result = gr.Dataframe()
176
- with gr.Row():
177
- with gr.Accordion("Summary", open=False):
178
- summary = gr.Dataframe()
179
-
180
- run_button.click(
181
- run_benchmark_gradio,
182
- inputs=[
183
- tool_name,
184
- model_name,
185
- num_questions,
186
- openai_api_key,
187
- anthropic_api_key,
188
- openrouter_api_key,
189
- ],
190
- outputs=[result, summary],
191
- )
192
 
193
 
194
  demo.queue(default_concurrency_limit=40).launch()
 
13
  from tabs.howto_benchmark import how_to_run
14
 
15
  # Feature temporarily disabled til HF support helps us with the Space Error
16
+ # from tabs.run_benchmark import run_benchmark_main
17
 
18
 
19
  demo = gr.Blocks()
20
 
21
 
22
+ # def run_benchmark_gradio(
23
+ # tool_name,
24
+ # model_name,
25
+ # num_questions,
26
+ # openai_api_key,
27
+ # anthropic_api_key,
28
+ # openrouter_api_key,
29
+ # ):
30
+ # """Run the benchmark using inputs."""
31
+ # if tool_name is None:
32
+ # return "Please enter the name of your tool."
33
+ # if (
34
+ # openai_api_key is None
35
+ # and anthropic_api_key is None
36
+ # and openrouter_api_key is None
37
+ # ):
38
+ # return "Please enter either OpenAI or Anthropic or OpenRouter API key."
39
+
40
+ # result = run_benchmark_main(
41
+ # tool_name,
42
+ # model_name,
43
+ # num_questions,
44
+ # openai_api_key,
45
+ # anthropic_api_key,
46
+ # openrouter_api_key,
47
+ # )
48
 
49
+ # if result == "completed":
50
+ # # get the results file in the results directory
51
+ # fns = glob("results/*.csv")
52
 
53
+ # print(f"Number of files in results directory: {len(fns)}")
54
 
55
+ # # convert to Path
56
+ # files = [Path(file) for file in fns]
57
 
58
+ # # get results and summary files
59
+ # results_files = [file for file in files if "results" in file.name]
60
 
61
+ # # the other file is the summary file
62
+ # summary_files = [file for file in files if "summary" in file.name]
63
 
64
+ # print(results_files, summary_files)
65
 
66
+ # # get the path with results
67
+ # results_df = pd.read_csv(results_files[0])
68
+ # summary_df = pd.read_csv(summary_files[0])
69
 
70
+ # # make sure all df float values are rounded to 4 decimal places
71
+ # results_df = results_df.round(4)
72
+ # summary_df = summary_df.round(4)
73
 
74
+ # return gr.Dataframe(value=results_df), gr.Dataframe(value=summary_df)
75
 
76
+ # return gr.Textbox(
77
+ # label="Benchmark Result", value=result, interactive=False
78
+ # ), gr.Textbox(label="Summary", value="")
79
 
80
 
81
  with demo:
 
112
  gr.Markdown(how_to_run)
113
 
114
  # fourth tab - run the benchmark
115
+ # with gr.TabItem("🔥 Run the Benchmark"):
116
+ # with gr.Row():
117
+ # tool_name = gr.Dropdown(
118
+ # [
119
+ # "prediction-offline",
120
+ # "prediction-online",
121
+ # # "prediction-online-summarized-info",
122
+ # # "prediction-offline-sme",
123
+ # # "prediction-online-sme",
124
+ # "prediction-request-rag",
125
+ # "prediction-request-reasoning",
126
+ # # "prediction-url-cot-claude",
127
+ # # "prediction-request-rag-cohere",
128
+ # # "prediction-with-research-conservative",
129
+ # # "prediction-with-research-bold",
130
+ # ],
131
+ # label="Tool Name",
132
+ # info="Choose the tool to run",
133
+ # )
134
+ # model_name = gr.Dropdown(
135
+ # [
136
+ # "gpt-3.5-turbo-0125",
137
+ # "gpt-4-0125-preview",
138
+ # "claude-3-haiku-20240307",
139
+ # "claude-3-sonnet-20240229",
140
+ # "claude-3-opus-20240229",
141
+ # "databricks/dbrx-instruct:nitro",
142
+ # "nousresearch/nous-hermes-2-mixtral-8x7b-sft",
143
+ # # "cohere/command-r-plus",
144
+ # ],
145
+ # label="Model Name",
146
+ # info="Choose the model to use",
147
+ # )
148
+ # with gr.Row():
149
+ # openai_api_key = gr.Textbox(
150
+ # label="OpenAI API Key",
151
+ # placeholder="Enter your OpenAI API key here",
152
+ # type="password",
153
+ # )
154
+ # anthropic_api_key = gr.Textbox(
155
+ # label="Anthropic API Key",
156
+ # placeholder="Enter your Anthropic API key here",
157
+ # type="password",
158
+ # )
159
+ # openrouter_api_key = gr.Textbox(
160
+ # label="OpenRouter API Key",
161
+ # placeholder="Enter your OpenRouter API key here",
162
+ # type="password",
163
+ # )
164
+ # with gr.Row():
165
+ # num_questions = gr.Slider(
166
+ # minimum=1,
167
+ # maximum=340,
168
+ # value=10,
169
+ # label="Number of questions to run the benchmark on",
170
+ # )
171
+ # with gr.Row():
172
+ # run_button = gr.Button("Run Benchmark")
173
+ # with gr.Row():
174
+ # with gr.Accordion("Results", open=True):
175
+ # result = gr.Dataframe()
176
+ # with gr.Row():
177
+ # with gr.Accordion("Summary", open=False):
178
+ # summary = gr.Dataframe()
179
+
180
+ # run_button.click(
181
+ # run_benchmark_gradio,
182
+ # inputs=[
183
+ # tool_name,
184
+ # model_name,
185
+ # num_questions,
186
+ # openai_api_key,
187
+ # anthropic_api_key,
188
+ # openrouter_api_key,
189
+ # ],
190
+ # outputs=[result, summary],
191
+ # )
192
 
193
 
194
  demo.queue(default_concurrency_limit=40).launch()
automate/automate.py CHANGED
@@ -1,10 +1,11 @@
1
- import os
2
  import subprocess
3
  from apscheduler.schedulers.blocking import BackgroundScheduler
4
 
5
 
6
  def run_command(command, shell=True):
7
- process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell)
 
 
8
  stdout, stderr = process.communicate()
9
 
10
  if process.returncode == 0:
@@ -20,10 +21,5 @@ def run_benchmark():
20
 
21
 
22
  scheduler = BackgroundScheduler()
23
- scheduler.add_job(
24
- run_benchmark,
25
- 'cron',
26
- day_of_week='sun',
27
- hour=0,
28
- timezone='UTC')
29
- scheduler.start()
 
 
1
  import subprocess
2
  from apscheduler.schedulers.blocking import BackgroundScheduler
3
 
4
 
5
  def run_command(command, shell=True):
6
+ process = subprocess.Popen(
7
+ command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell
8
+ )
9
  stdout, stderr = process.communicate()
10
 
11
  if process.returncode == 0:
 
21
 
22
 
23
  scheduler = BackgroundScheduler()
24
+ scheduler.add_job(run_benchmark, "cron", day_of_week="sun", hour=0, timezone="UTC")
25
+ scheduler.start()
 
 
 
 
 
olas-predict-benchmark DELETED
@@ -1 +0,0 @@
1
- Subproject commit bac77acc64ed129608e6f428d40e86c0eb2cb4d1
 
 
start.py CHANGED
@@ -45,27 +45,26 @@ def start():
45
  """Start commands."""
46
  print("Starting commands...")
47
  base_dir = os.getcwd()
48
- olas_dir = os.path.join(base_dir, "olas-predict-benchmark")
49
- mech_dir = os.path.join(olas_dir, "benchmark", "mech")
50
 
51
  commands = [
52
- ("git submodule init", base_dir),
53
  # no updates
54
  # ("git submodule update --init --recursive", base_dir),
55
  # ("git submodule update --remote --recursive", base_dir),
56
- (
57
- 'git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"',
58
- olas_dir,
59
- ),
60
- # no updates
61
- ("git remote update", olas_dir),
62
- ("git fetch --all", olas_dir),
63
- ("git checkout main", olas_dir),
64
- ("git pull origin main", olas_dir),
65
- ("git checkout 56ecf18a982c4548feac5efe787690a3ec37c835", mech_dir),
66
- # ("git pull origin main", mech_dir),
67
- ("pip install -e .", os.path.join(olas_dir, "benchmark")),
68
- ("pip install -e .", mech_dir),
69
  ("pip install lxml[html_clean]", base_dir),
70
  ("pip install --upgrade huggingface_hub", base_dir),
71
  ]
@@ -74,7 +73,7 @@ def start():
74
  run_command(command, cwd=cwd)
75
 
76
  # add benchmark to the path
77
- sys.path.append(os.path.join(olas_dir, "benchmark"))
78
 
79
  # Download the dataset
80
  download_dataset()
 
45
  """Start commands."""
46
  print("Starting commands...")
47
  base_dir = os.getcwd()
48
+ # olas_dir = os.path.join(base_dir, "olas-predict-benchmark")
49
+ # mech_dir = os.path.join(olas_dir, "benchmark", "mech")
50
 
51
  commands = [
52
+ # ("git submodule init", base_dir),
53
  # no updates
54
  # ("git submodule update --init --recursive", base_dir),
55
  # ("git submodule update --remote --recursive", base_dir),
56
+ # (
57
+ # 'git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"',
58
+ # olas_dir,
59
+ # ),
60
+ # ("git remote update", olas_dir),
61
+ # ("git fetch --all", olas_dir),
62
+ # ("git checkout main", olas_dir),
63
+ # ("git pull origin main", olas_dir),
64
+ # ("git checkout 56ecf18a982c4548feac5efe787690a3ec37c835", mech_dir),
65
+ # # ("git pull origin main", mech_dir),
66
+ # ("pip install -e .", os.path.join(olas_dir, "benchmark")),
67
+ # ("pip install -e .", mech_dir),
 
68
  ("pip install lxml[html_clean]", base_dir),
69
  ("pip install --upgrade huggingface_hub", base_dir),
70
  ]
 
73
  run_command(command, cwd=cwd)
74
 
75
  # add benchmark to the path
76
+ # sys.path.append(os.path.join(olas_dir, "benchmark"))
77
 
78
  # Download the dataset
79
  download_dataset()