Spaces:

xu3kev
/

llm_visual_program_sythensis

Running on Zero

App Files Files Community

xu3kev commited on Jun 13, 2024

Commit

2795c26

1 Parent(s): be3d62e

update

Browse files

Files changed (1) hide show

app.py +42 -22

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import argparse
 import json
 import numpy as np
@@ -8,9 +9,17 @@ from openai import OpenAI
 from func_timeout import FunctionTimedOut, func_timeout
 from tqdm import tqdm
-MOCK = True
 TEST_FOLDER = "c4f5"
 INPUT_STRUCTION_TEMPLATE = """Here is a gray scale images representing with integer values 0-9.
 {image_str}
 Please write a Python program that generates the image using our own custom turtle module"""
@@ -208,25 +217,39 @@ def generate_grid_images(folder):
     return image_array
 def llm_call(question_prompt, model_name,
     temperature=1, max_tokens=320,
     top_p=1, n_samples=64, stop=None):
-    client = OpenAI(base_url=f"http://localhost:{PORT}/v1", api_key="empty")
-    response = client.completions.create(
-        prompt=question_prompt,
-        model=model_name,
-        temperature=temperature,
-        max_tokens=max_tokens,
-        top_p=top_p,
-        frequency_penalty=0,
-        presence_penalty=0,
-        n=n_samples,
-        stop=stop
-    )
-    return response
 import cv2
@@ -287,12 +310,9 @@ turtle.save('{fname}')
 def run(img_str):
     prompt = PROMPT_TEMPLATE.format(input_struction=INPUT_STRUCTION_TEMPLATE.format(image_str=img_str))
     if not MOCK:
-        response = llm_call(prompt, MODEL_NAME)
-        print(response)
-        codes = []
-        for i, choice in enumerate(response.choices):
-            print(f"Choice {i}: {choice.text}")
-            codes.append(choice.text)
     else:
         codes = MOCK_RESPONSE
@@ -396,7 +416,7 @@ def main():
         gr.Markdown("""Here we can draw a target image using the sketchpad, and see what kinds of graphics program LLM generates. To allow the LLM to visually perceive the input image, we convert the image to ASCII strings.""")
         gr.Markdown("## Draw logo")
         with gr.Column():
-            canvas = gr.Sketchpad(canvas_size=(512,512), brush=Brush(colors=["black"], default_size=3, color_mode='fixed'))
             submit_button = gr.Button("Submit")
             output_image = gr.Image(label="output")

+import spaces
 import argparse
 import json
 import numpy as np
 from func_timeout import FunctionTimedOut, func_timeout
 from tqdm import tqdm
+HUGGINGFACE=True
+MOCK = False
 TEST_FOLDER = "c4f5"
+if HUGGINGFACE:
+    MODEL_NAME="xu3kev/deepseekcoder-7b-logo-pbe"
+    import torch
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+    hug_model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, attn_implementation="flash_attention_2",).to('cuda')
+    hug_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 INPUT_STRUCTION_TEMPLATE = """Here is a gray scale images representing with integer values 0-9.
 {image_str}
 Please write a Python program that generates the image using our own custom turtle module"""
     return image_array
+@spaces.GPU
 def llm_call(question_prompt, model_name,
     temperature=1, max_tokens=320,
     top_p=1, n_samples=64, stop=None):
+    if HUGGINGFACE:
+        model_inputs = hug_tokenizer([question_prompt], return_tensors="pt").to('cuda')
+        generated_ids = hug_model.generate(**model_inputs, max_length=1400, temperature=1, num_return_sequences=16, do_sample=True)
+        responses = hug_tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+        codes = []
+        for response in responses:
+            codes.append(response[len(question_prompt):].strip()+'\n')
+        return codes
+    else:
+        client = OpenAI(base_url=f"http://localhost:{PORT}/v1", api_key="empty")
+        response = client.completions.create(
+            prompt=question_prompt,
+            model=model_name,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            top_p=top_p,
+            frequency_penalty=0,
+            presence_penalty=0,
+            n=n_samples,
+            stop=stop
+        )
+        codes = []
+        for i, choice in enumerate(response.choices):
+            print(f"Choice {i}: {choice.text}")
+            codes.append(choice.text)
+        return codes
 import cv2
 def run(img_str):
     prompt = PROMPT_TEMPLATE.format(input_struction=INPUT_STRUCTION_TEMPLATE.format(image_str=img_str))
     if not MOCK:
+        responses = llm_call(prompt, MODEL_NAME)
+        print(responses)
+        codes = responses
     else:
         codes = MOCK_RESPONSE
         gr.Markdown("""Here we can draw a target image using the sketchpad, and see what kinds of graphics program LLM generates. To allow the LLM to visually perceive the input image, we convert the image to ASCII strings.""")
         gr.Markdown("## Draw logo")
         with gr.Column():
+            canvas = gr.Sketchpad(canvas_size=(512,512), brush=Brush(colors=["black"], default_size=2, color_mode='fixed'))
             submit_button = gr.Button("Submit")
             output_image = gr.Image(label="output")