Spaces:

valory
/

olas-prediction-leaderboard

Running

App Files Files Community

cyberosa commited on Aug 8, 2024

Commit

a9bd212

1 Parent(s): 967e6fd

disabling temporarily the run_benchmark tab

Browse files

Files changed (3) hide show

app.py +78 -78
start.py +16 -16
tabs/run_benchmark.py +2 -2

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ from tabs.faq import (
 from tabs.howto_benchmark import how_to_run
 # disabling temporarily
-from tabs.run_benchmark import run_benchmark_main
 demo = gr.Blocks()
@@ -111,83 +111,83 @@ with demo:
             gr.Markdown(how_to_run)
         # fourth tab - run the benchmark
-        with gr.TabItem("🔥 Run the Benchmark"):
-            with gr.Row():
-                tool_name = gr.Dropdown(
-                    [
-                        "prediction-offline",
-                        "prediction-online",
-                        # "prediction-online-summarized-info",
-                        # "prediction-offline-sme",
-                        # "prediction-online-sme",
-                        "prediction-request-rag",
-                        "prediction-request-reasoning",
-                        # "prediction-url-cot-claude",
-                        # "prediction-request-rag-cohere",
-                        # "prediction-with-research-conservative",
-                        # "prediction-with-research-bold",
-                    ],
-                    label="Tool Name",
-                    info="Choose the tool to run",
-                )
-                model_name = gr.Dropdown(
-                    [
-                        "gpt-3.5-turbo-0125",
-                        "gpt-4-0125-preview",
-                        "claude-3-haiku-20240307",
-                        "claude-3-sonnet-20240229",
-                        "claude-3-opus-20240229",
-                        "databricks/dbrx-instruct:nitro",
-                        "nousresearch/nous-hermes-2-mixtral-8x7b-sft",
-                        # "cohere/command-r-plus",
-                    ],
-                    label="Model Name",
-                    info="Choose the model to use",
-                )
-            with gr.Row():
-                openai_api_key = gr.Textbox(
-                    label="OpenAI API Key",
-                    placeholder="Enter your OpenAI API key here",
-                    type="password",
-                )
-                anthropic_api_key = gr.Textbox(
-                    label="Anthropic API Key",
-                    placeholder="Enter your Anthropic API key here",
-                    type="password",
-                )
-                openrouter_api_key = gr.Textbox(
-                    label="OpenRouter API Key",
-                    placeholder="Enter your OpenRouter API key here",
-                    type="password",
-                )
-            with gr.Row():
-                num_questions = gr.Slider(
-                    minimum=1,
-                    maximum=340,
-                    value=10,
-                    label="Number of questions to run the benchmark on",
-                )
-            with gr.Row():
-                run_button = gr.Button("Run Benchmark")
-            with gr.Row():
-                with gr.Accordion("Results", open=True):
-                    result = gr.Dataframe()
-            with gr.Row():
-                with gr.Accordion("Summary", open=False):
-                    summary = gr.Dataframe()
-            run_button.click(
-                run_benchmark_gradio,
-                inputs=[
-                    tool_name,
-                    model_name,
-                    num_questions,
-                    openai_api_key,
-                    anthropic_api_key,
-                    openrouter_api_key,
-                ],
-                outputs=[result, summary],
-            )
 demo.queue(default_concurrency_limit=40).launch()

 from tabs.howto_benchmark import how_to_run
 # disabling temporarily
+# from tabs.run_benchmark import run_benchmark_main
 demo = gr.Blocks()
             gr.Markdown(how_to_run)
         # fourth tab - run the benchmark
+        # with gr.TabItem("🔥 Run the Benchmark"):
+        #     with gr.Row():
+        #         tool_name = gr.Dropdown(
+        #             [
+        #                 "prediction-offline",
+        #                 "prediction-online",
+        #                 # "prediction-online-summarized-info",
+        #                 # "prediction-offline-sme",
+        #                 # "prediction-online-sme",
+        #                 "prediction-request-rag",
+        #                 "prediction-request-reasoning",
+        #                 # "prediction-url-cot-claude",
+        #                 # "prediction-request-rag-cohere",
+        #                 # "prediction-with-research-conservative",
+        #                 # "prediction-with-research-bold",
+        #             ],
+        #             label="Tool Name",
+        #             info="Choose the tool to run",
+        #         )
+        #         model_name = gr.Dropdown(
+        #             [
+        #                 "gpt-3.5-turbo-0125",
+        #                 "gpt-4-0125-preview",
+        #                 "claude-3-haiku-20240307",
+        #                 "claude-3-sonnet-20240229",
+        #                 "claude-3-opus-20240229",
+        #                 "databricks/dbrx-instruct:nitro",
+        #                 "nousresearch/nous-hermes-2-mixtral-8x7b-sft",
+        #                 # "cohere/command-r-plus",
+        #             ],
+        #             label="Model Name",
+        #             info="Choose the model to use",
+        #         )
+        #     with gr.Row():
+        #         openai_api_key = gr.Textbox(
+        #             label="OpenAI API Key",
+        #             placeholder="Enter your OpenAI API key here",
+        #             type="password",
+        #         )
+        #         anthropic_api_key = gr.Textbox(
+        #             label="Anthropic API Key",
+        #             placeholder="Enter your Anthropic API key here",
+        #             type="password",
+        #         )
+        #         openrouter_api_key = gr.Textbox(
+        #             label="OpenRouter API Key",
+        #             placeholder="Enter your OpenRouter API key here",
+        #             type="password",
+        #         )
+        #     with gr.Row():
+        #         num_questions = gr.Slider(
+        #             minimum=1,
+        #             maximum=340,
+        #             value=10,
+        #             label="Number of questions to run the benchmark on",
+        #         )
+        #     with gr.Row():
+        #         run_button = gr.Button("Run Benchmark")
+        #     with gr.Row():
+        #         with gr.Accordion("Results", open=True):
+        #             result = gr.Dataframe()
+        #     with gr.Row():
+        #         with gr.Accordion("Summary", open=False):
+        #             summary = gr.Dataframe()
+        #     run_button.click(
+        #         run_benchmark_gradio,
+        #         inputs=[
+        #             tool_name,
+        #             model_name,
+        #             num_questions,
+        #             openai_api_key,
+        #             anthropic_api_key,
+        #             openrouter_api_key,
+        #         ],
+        #         outputs=[result, summary],
+        #     )
 demo.queue(default_concurrency_limit=40).launch()

start.py CHANGED Viewed

@@ -45,27 +45,27 @@ def start():
     """Start commands."""
     print("Starting commands...")
     base_dir = os.getcwd()
-    olas_dir = os.path.join(base_dir, "olas-predict-benchmark")
-    benchmark_dir = os.path.join(olas_dir, "benchmark")
-    mech_dir = os.path.join(olas_dir, "benchmark", "mech")
     commands = [
         ("pip install poetry", base_dir),
         ("git submodule init", base_dir),
         ("git submodule update --init --recursive", base_dir),
         ("git submodule update --remote --recursive", base_dir),
-        (
-            'git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"',
-            olas_dir,
-        ),
-        ("git remote update", olas_dir),
-        ("git fetch --all", olas_dir),
-        ("git checkout main", olas_dir),
-        ("git pull origin main", olas_dir),
-        ("git checkout main", mech_dir),
-        ("git pull origin main", mech_dir),
-        ("poetry install", benchmark_dir),
-        ("pip install -e .", mech_dir),
         ("pip install lxml[html_clean]", base_dir),
         ("pip install --upgrade huggingface_hub", base_dir),
     ]
@@ -74,7 +74,7 @@ def start():
         run_command(command, cwd=cwd)
     # add benchmark to the path
-    sys.path.append(os.path.join(olas_dir, "benchmark"))
     # Download the dataset
     download_dataset()

     """Start commands."""
     print("Starting commands...")
     base_dir = os.getcwd()
+    # olas_dir = os.path.join(base_dir, "olas-predict-benchmark")
+    # benchmark_dir = os.path.join(olas_dir, "benchmark")
+    # mech_dir = os.path.join(olas_dir, "benchmark", "mech")
     commands = [
         ("pip install poetry", base_dir),
         ("git submodule init", base_dir),
         ("git submodule update --init --recursive", base_dir),
         ("git submodule update --remote --recursive", base_dir),
+        # (
+        #     'git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"',
+        #     olas_dir,
+        # ),
+        # ("git remote update", olas_dir),
+        # ("git fetch --all", olas_dir),
+        # ("git checkout main", olas_dir),
+        # ("git pull origin main", olas_dir),
+        # ("git checkout main", mech_dir),
+        # ("git pull origin main", mech_dir),
+        # ("poetry install", benchmark_dir),
+        # ("pip install -e .", mech_dir),
         ("pip install lxml[html_clean]", base_dir),
         ("pip install --upgrade huggingface_hub", base_dir),
     ]
         run_command(command, cwd=cwd)
     # add benchmark to the path
+    # sys.path.append(os.path.join(olas_dir, "benchmark"))
     # Download the dataset
     download_dataset()

tabs/run_benchmark.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
-from benchmark.run_benchmark import run_benchmark
 def run_benchmark_main(
@@ -55,7 +55,7 @@ def run_benchmark_main(
     # Run the benchmark
     try:
-        run_benchmark(kwargs=kwargs)
         return "completed"
     except Exception as e:
         return f"Error running benchmark: {e}"

 import os
+# from benchmark.run_benchmark import run_benchmark
 def run_benchmark_main(
     # Run the benchmark
     try:
+        # run_benchmark(kwargs=kwargs)
         return "completed"
     except Exception as e:
         return f"Error running benchmark: {e}"