mboushaba commited on
Commit
37448c1
·
verified ·
1 Parent(s): 7f156f1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -0
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+
3
+ import gradio as gr
4
+ import torch
5
+ from transformers import AutoModelForCausalLM
6
+ from transformers import AutoProcessor
7
+
8
+ model_id = "microsoft/Phi-3.5-vision-instruct"
9
+
10
+ # Note: set _attn_implementation='eager' if you don't have flash_attn installed
11
+ model = AutoModelForCausalLM.from_pretrained(
12
+ model_id,
13
+ device_map = "auto",
14
+ trust_remote_code = True,
15
+ torch_dtype = torch.bfloat16,
16
+ _attn_implementation = 'eager'
17
+ )
18
+ device = torch.device("cpu")
19
+ model.to(device)
20
+
21
+ # for best performance, use num_crops=4 for multi-frame, num_crops=16 for single-frame.
22
+ processor = AutoProcessor.from_pretrained(model_id,
23
+ trust_remote_code = True,
24
+ num_crops = 4
25
+ )
26
+
27
+ user_prompt = '<|user|>\n'
28
+ assistant_prompt = '<|assistant|>\n'
29
+ prompt_suffix = "<|end|>\n"
30
+
31
+
32
+ def call_model(raw_image = None, text_input = None):
33
+ prompt = f"{user_prompt}<|image_1|>\n{text_input}{prompt_suffix}{assistant_prompt}"
34
+ image = raw_image.convert("RGB")
35
+
36
+ inputs = processor(prompt, image, return_tensors = "pt").to("cpu:0")
37
+ generate_ids = model.generate(**inputs,
38
+ max_new_tokens = 1000,
39
+ eos_token_id = processor.tokenizer.eos_token_id,
40
+ )
41
+ generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
42
+ response = processor.batch_decode(generate_ids,
43
+ skip_special_tokens = True,
44
+ clean_up_tokenization_spaces = False)[0]
45
+ return response
46
+
47
+
48
+ def get_model_memory_footprint(model_):
49
+ footprint = model_.get_memory_footprint()
50
+ return f"Footprint of the model in MBs: {footprint / 1e+6}Mb"
51
+
52
+
53
+ def process(raw_image, prompt):
54
+ print("start...")
55
+ start_time = time.time()
56
+ memory_usage = get_model_memory_footprint(model)
57
+ model_response = call_model(raw_image = raw_image, text_input = raw_image)
58
+ end_time = time.time()
59
+ execution_time = end_time - start_time
60
+ execution_time_min = round((execution_time / 60), 2)
61
+ print(f"Execution time: {execution_time:.4f} seconds")
62
+ print(f"Execution time: {execution_time_min:.2f} min")
63
+ return memory_usage, model_response, execution_time_min
64
+
65
+
66
+ iface = gr.Interface(process,
67
+ inputs = [gr.Image(type = 'pil'), gr.Textbox(label = "What do you want to ask?")],
68
+ outputs = [gr.Textbox(label = "Memory usage"), gr.Textbox(label = "Model response"),
69
+ gr.Textbox(label = "Execution time (min)")])
70
+
71
+ if __name__ == '__main__':
72
+ iface.launch()