import gradio as gr import evaluate suite = evaluate.EvaluationSuite.load("Vipitis/ShaderEval") #downloads it #TODO: can you import it locally instead? # from ShaderEval import Suite # suite = Suite("Vipitis/ShaderEval") # save resutls to a file? text = """# Welcome to the ShaderEval Suite. This space hosts the ShaderEval Suite. more to follow soon. For an interactive Demo and more information see the demo space [ShaderCoder](https://huggingface.co/spaces/Vipitis/ShaderCoder) # Task1: Return Completion ## Explanation Modelled after the [CodeXGLUE code_completion_line](https://huggingface.co/datasets/code_x_glue_cc_code_completion_line) task. Using the "return_completion" subset of the [Shadertoys-fine dataset](https://huggingface.co/datasets/Vipitis/Shadertoys-fine). All preprocessing and post proessing is done by the custom evaluator for this suite. It should be as easy as just giving it a model checkpoint that can do the "text-generation" task. Evaluated is currently with just [exact_match](https://huggingface.co/metrics/exact_match). ## Notice should you find any model that throws an error, please let me know in the issues tab. Several parts of this suite are still missing. ## Instructions ### Run the code yourself:. ```python import evaluate suite = evaluate.EvaluationSuite.load("Vipitis/ShaderEval") model_cp = "gpt2" suite.run(model_cp, snippet=300) ``` ### try the demo below - Select a **model checkpoint** from the "dropdown" - Select how many **samples** to run (there us up to 300 from the test set) - Click **Run** to run the suite - The results will be displayed in the **Output** box ## Results ![](file/bar.png) Additionally, you can report results to your models and it should show up on this [leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards?dataset=Vipitis%2FShadertoys-fine) ## Todo (feel free to contribute in a [Pull Request](https://huggingface.co/spaces/Vipitis/ShaderEval/discussions?status=open&type=pull_request)) - [~] leaderboard (via autoevaluate and self reporting) - [?] supporting batches to speed up inference - [ ] CER metric (via a custom metric perhaps?) - [x] removing the pad_token warning - [ ] adding OpenVINO pipelines for inference, pending on OpenVINO release - [ ] task1b for "better" featuring a improved testset as well as better metrics. Will allow more generation parameters - [ ] semantic match by comparing the rendered frames (depending on WGPU implementation?) - [ ] CLIP match to evaluate rendered images fitting to title/description """ def run_suite(model_cp, snippet): # print(model_cp, snippet) results = suite.run(model_cp, snippet) print(results) # so they show up in the logs for me. return results[0] with gr.Blocks() as site: text_md = gr.Markdown(text) model_cp = gr.Textbox(value="gpt2", label="Model Checkpoint", interactive=True) first_n = gr.Slider(minimum=1, maximum=300, default=5, label="num_samples", step=1.0) output = gr.Textbox(label="Output") run_button = gr.Button(label="Run") run_button.click(fn=run_suite, inputs=[model_cp, first_n], outputs=output) site.launch()