import streamlit as st from langchain_community.llms import OpenAI import argparse from datasets import load_dataset import yaml from tqdm import tqdm import re def load_data(split="test"): data = load_dataset("bigcode/humanevalpack") print("=========== dataset statistics ===========") print(len(data[split])) print("==========================================") return data[split] def split_function_header_and_docstring(s): # pattern = re.compile(r'\"\"\"(.*?)\"\"\"', re.DOTALL) pattern = re.compile(r"(\"\"\"(.*?)\"\"\"|\'\'\'(.*?)\'\'\')", re.DOTALL) match = pattern.findall(s) if match: # docstring = match.group(-1) docstring = match[-1][0] code_without_docstring = s.replace(docstring, "").replace('"' * 6, "").strip() docstring = docstring.replace('"', "") else: raise ValueError return code_without_docstring, docstring def prepare_model_input(code_data): prompt = """Provide feedback on the errors in the given code and suggest the correct code to address the described problem. Problem Description: {description} Incorrect Code: {wrong_code}""" description = code_data["prompt"] function_header, docstring = split_function_header_and_docstring(description) problem = docstring.split(">>>")[0] wrong_code = function_header + code_data["buggy_solution"] template_dict = {"function_header": function_header, "description": problem, "wrong_code": wrong_code} model_input = prompt.format(**template_dict) return model_input, problem, function_header def load_and_prepare_data(): dataset = load_data() all_model_inputs = {} print("### load and prepare data") for data in tqdm(dataset): problem_id = data['task_id'] buggy_solution = data['buggy_solution'] model_input, problem, function_header = prepare_model_input(data) new_model_input =f"Provide feedback on the errors in the given code and suggest the correct code to address the described problem.\nProblem Description:{problem}\nIncorrect Code:\n{buggy_solution}\nFeedback:" # data["header"] = function_header all_model_inputs[problem_id] = { "model_input": new_model_input, "header": function_header, "problem_description": problem, "data": data } return all_model_inputs dataset = load_dataset("bigcode/humanevalpack", split='test', trust_remote_code=True) # Ensuring consistent split usage problem_ids = [problem['task_id'] for problem in dataset] all_model_inputs = load_and_prepare_data() # Initialize with dummy ports for demonstration purposes here parser = argparse.ArgumentParser() parser.add_argument("--editor_port", type=str, default="6000") parser.add_argument("--critic_port", type=str, default="6001") # Assuming args are passed via command line interface args = parser.parse_args() # Initialize Langchain LLMs for our models (please replace 'your_api_key' with actual API keys) editor_model = OpenAI(model="Anonymous-COFFEE/COFFEEPOTS-editor", api_key="EMPTY", openai_api_base=f"https://editor.jp.ngrok.io/v1") # critic_model = OpenAI(model="Anonymous-COFFEE/COFFEEPOTS-critic", api_key="EMPTY", openai_api_base=f"http://localhost:{args.critic_port}/v1") critic_model = OpenAI(model="Anonymous-COFFEE/COFFEEPOTS-critic", api_key="EMPTY", openai_api_base=f"https://critic.jp.ngrok.io/v1") st.title("Demo for COFFEEPOTS") selected_task_id = st.selectbox("Select a problem ID:", problem_ids) # Retrieve selected problem details problem_details = dataset[problem_ids.index(selected_task_id)] st.write(f"**Selected Problem ID:** {problem_details['task_id']}") st.write(f"**Problem Description:**\n{all_model_inputs[selected_task_id]['problem_description']}") # Display buggy code with syntax highlighting st.code(problem_details['buggy_solution'], language='python') status_text = st.empty() code_output = st.code("", language="python") def generate_feedback(): return critic_model.stream(input=f"{all_model_inputs[selected_task_id]['model_input']}", logit_bias=None) # feedback = output.generations[0][0].text # return feedback # def generate_corrected_code(): # return "```python"+editor_model.stream(input=f"Buggy Code:\n{problem_details['buggy_solution']}\nFeedback: {feedback}", logit_bias=None) def generate_corrected_code(): # Stream output from the editor model yield "```python" for text_chunk in editor_model.stream(input=f"[INST]Buggy Code:\n{problem_details['buggy_solution']}\nFeedback: {feedback}[/INST]", logit_bias=None): yield text_chunk # Assuming each chunk is part of the final code yield "```" # time.sleep(0.02) # Simulate processing delay; Adjust timing as necessary if st.button("Generate Feedback and Corrected Code"): # Example of generating feedback and corrected code (replace these with actual model calls) with st.spinner("Generating feedback..."): # Simulate API call to critic_model print(f"model input for critic:") print(all_model_inputs[selected_task_id]['model_input']) # output = critic_model.generate(prompts=[f"{all_model_inputs[selected_task_id]['model_input']}"], logit_bias=None) # feedback = output.generations[0][0].text # print(feedback) # feedback = "dummy feedback" # status_text.markdown(f"{feedback}") feedback = status_text.write_stream(generate_feedback()) # status_text.code(f"{feedback}", language='python') with st.spinner("Generating corrected code..."): # Simulate API call to editor_model # output = editor_model.generate(prompts=[f"Buggy Code:\n{problem_details['buggy_solution']}\nFeedback: {feedback}"], logit_bias=None) # corrected_code = output.generations[0][0].text # print(corrected_code) # corrected_code = "dummy code" # st.write("**Corrected Code:**") corrected_code = code_output.write_stream(generate_corrected_code()) # code_output.code(corrected_code, language='python')