miaw1419 commited on
Commit
3156d96
·
verified ·
1 Parent(s): c46710e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -35
app.py CHANGED
@@ -13,6 +13,8 @@ from peft import PeftModel
13
  import re
14
  import spaces
15
  from diffusers import StableDiffusionXLPipeline, DiffusionPipeline
 
 
16
 
17
  device = "cuda" if torch.cuda.is_available() else "cpu"
18
  print(device)
@@ -55,17 +57,14 @@ css="""
55
  margin: 0 auto;
56
  max-width: 520px;
57
  }
58
-
59
  #gen-container {
60
  margin: 0 auto;
61
  max-width: 640px;
62
  }
63
-
64
  #title-container {
65
  margin: 0 auto;
66
  max-width: 1340px;
67
  }
68
-
69
  #main-container {
70
  margin: 0 auto;
71
  max-width: 1340px;
@@ -141,7 +140,7 @@ comment_images = [
141
  ]
142
 
143
  comments = {'test.png': "Not sure about the concept, it's too straightforward. Though the boy looks kinda creepy which makes it exciting. the art style is pretty to look at. I like that the colors are muted, but wish they were a bit darker to make it more eerie and add depth.", 'comment_images/0.png': "Hate this with a passion. The colors are too vibrant and don't match at all. I hate these colors in general. The patterns are too abstract and contemporary. a 5-year-old could draw this. pass.", 'comment_images/1.png': "Woah I love the art style. The texture feels like old paper which is oh so beautiful. There are so many details to focus on. I love the expressive lines and how busy the composition is. Even though orange isn't my favorite, the greenish blue color of the water is so gorgeous.", 'comment_images/2.png': "I don't like how monochromatic and muted this one is. but the paperish texture is nice and the details are so intricate.", 'comment_images/3.png': "Oh super pretty! Looks so smooth and wet. Love the details and loose lines too. Feels mystical and magical and eerie. Also dark purples and blues? deep indigo? My fav ever. I'm here for it.", 'comment_images/4.png': "Love the art style. The uncanny vibe and nightmarish horror is so cool. Like its horror but if you squint you can't tell? Love the strange. wish it had more colors though. not a fan of greyscale.", 'comment_images/5.png': 'omg I hate this haha. what the hell. everything about it disgusts me so boring and childish ew.', 'comment_images/6.png': 'yessss. give it to the texture give it to the brushstrokes give it to the style. perfect. just wish the colors were less beige and more bold. I want an active nightmare. but kisses to the surrealism.'}
144
- comments = dict()
145
 
146
  image_index = 0
147
 
@@ -205,23 +204,90 @@ def clear_comments():
205
  extract_vp_botton = gr.Button(f"Extract visual preference from {len(comments)} comments", interactive=len(comments) != 0)
206
  clear_botton = gr.Button("Clear comments", interactive=len(comments) != 0)
207
  return extract_vp_botton, clear_botton
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
- @spaces.GPU(duration=200)
211
  def extract_vp():
212
  if valid_api == "":
213
-
214
- global comments
215
 
216
- prompt = """I will provide a set of artworks along with accompanying comments from a person. Analyze these artworks and the comments on them and identify artistic features such as present or mentioned colors, style, composition, mood, medium, texture, brushwork, lighting, shadow effects, perspective, and other noteworthy elements.
217
 
218
- Your task is to extract the artistic features the person likes and dislikes based on both the artworks' features and the person's comments. Focus solely on artistic aspects and refrain from considering subject matter.
 
 
 
219
 
220
- If the person expresses a preference for a specific aspect without clearly stating its category (e.g., appreciating the colors without specifying which colors), identify these specific features from the images directly to make the person's preference understandable without needing to see the artwork.
 
221
 
222
- Your output should consist of two concise lists of keywords: one listing the specific art features the person likes and another listing the specific features they dislike (specified in keyword format without using sentences).
223
 
224
- Here are the images and their corresponding comments:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  """
226
  messages = [
227
  {
@@ -236,11 +302,30 @@ Here are the images and their corresponding comments:
236
  comment_number = 1
237
  for image in comments:
238
  comment = comments[image]
239
- image = Image.open(image)
240
- images.append(image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
  messages[0]["content"].append(
243
- {"type": "image"}
 
 
 
 
 
244
  )
245
 
246
  messages[0]["content"].append(
@@ -248,17 +333,15 @@ Here are the images and their corresponding comments:
248
  "text": f"Comment {comment_number}: {comment}"}
249
  )
250
  comment_number = comment_number + 1
251
-
252
- prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
253
- inputs = processor(text=prompt, images=images, return_tensors="pt")
254
- inputs = {k: v.to(device) for k, v in inputs.items()}
255
-
256
- generated_ids = vpe_model.generate(**inputs, max_new_tokens=2000, repetition_penalty=0.99, do_sample=False)
257
- generated_texts = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
258
- positive_vp, negative_vp = re.search('.* \nAssistant: Liked Art Features: (.*)\nDisliked Art Features: (.*)', generated_texts).groups()
259
 
260
- else:
261
- PRINT(valid_api)
 
 
 
 
 
 
262
 
263
  gr.Info("Visual preference successfully extracted.")
264
 
@@ -266,13 +349,27 @@ Here are the images and their corresponding comments:
266
 
267
  def api_fn(api):
268
  global valid_api
269
- if api != "correct":
270
- gr.Warning("Invalid API!")
271
- valid_api = ""
272
- else:
 
 
 
 
 
 
 
273
  gr.Info("Valid API")
 
274
  valid_api = api
275
 
 
 
 
 
 
 
276
  @spaces.GPU(duration=45)
277
  def generate(prompt, vp_pos, vp_neg, slider):
278
  if vp_pos == "" and vp_neg == "":
@@ -306,7 +403,6 @@ with gr.Blocks(css=css, title="ViPer Demo", theme=gr.themes.Base()) as demo:
306
  \n
307
  \n
308
  \n
309
-
310
  """)
311
  with gr.Row(elem_id="main-container"):
312
 
@@ -355,7 +451,7 @@ with gr.Blocks(css=css, title="ViPer Demo", theme=gr.themes.Base()) as demo:
355
 
356
  clear_botton = gr.Button("Clear comments", interactive=len(comments) != 0)
357
 
358
- with gr.Accordion("Enter GPT API for Better Results (optional)", open=False):
359
  with gr.Row():
360
  api = gr.Text(
361
  max_lines=1,
@@ -490,7 +586,4 @@ with gr.Blocks(css=css, title="ViPer Demo", theme=gr.themes.Base()) as demo:
490
  [comment_image, comment]
491
  )
492
 
493
- demo.launch(share=True)
494
-
495
-
496
-
 
13
  import re
14
  import spaces
15
  from diffusers import StableDiffusionXLPipeline, DiffusionPipeline
16
+ import anthropic
17
+ import base64
18
 
19
  device = "cuda" if torch.cuda.is_available() else "cpu"
20
  print(device)
 
57
  margin: 0 auto;
58
  max-width: 520px;
59
  }
 
60
  #gen-container {
61
  margin: 0 auto;
62
  max-width: 640px;
63
  }
 
64
  #title-container {
65
  margin: 0 auto;
66
  max-width: 1340px;
67
  }
 
68
  #main-container {
69
  margin: 0 auto;
70
  max-width: 1340px;
 
140
  ]
141
 
142
  comments = {'test.png': "Not sure about the concept, it's too straightforward. Though the boy looks kinda creepy which makes it exciting. the art style is pretty to look at. I like that the colors are muted, but wish they were a bit darker to make it more eerie and add depth.", 'comment_images/0.png': "Hate this with a passion. The colors are too vibrant and don't match at all. I hate these colors in general. The patterns are too abstract and contemporary. a 5-year-old could draw this. pass.", 'comment_images/1.png': "Woah I love the art style. The texture feels like old paper which is oh so beautiful. There are so many details to focus on. I love the expressive lines and how busy the composition is. Even though orange isn't my favorite, the greenish blue color of the water is so gorgeous.", 'comment_images/2.png': "I don't like how monochromatic and muted this one is. but the paperish texture is nice and the details are so intricate.", 'comment_images/3.png': "Oh super pretty! Looks so smooth and wet. Love the details and loose lines too. Feels mystical and magical and eerie. Also dark purples and blues? deep indigo? My fav ever. I'm here for it.", 'comment_images/4.png': "Love the art style. The uncanny vibe and nightmarish horror is so cool. Like its horror but if you squint you can't tell? Love the strange. wish it had more colors though. not a fan of greyscale.", 'comment_images/5.png': 'omg I hate this haha. what the hell. everything about it disgusts me so boring and childish ew.', 'comment_images/6.png': 'yessss. give it to the texture give it to the brushstrokes give it to the style. perfect. just wish the colors were less beige and more bold. I want an active nightmare. but kisses to the surrealism.'}
143
+ #comments = dict()
144
 
145
  image_index = 0
146
 
 
204
  extract_vp_botton = gr.Button(f"Extract visual preference from {len(comments)} comments", interactive=len(comments) != 0)
205
  clear_botton = gr.Button("Clear comments", interactive=len(comments) != 0)
206
  return extract_vp_botton, clear_botton
207
+
208
+ @spaces.GPU(duration=120)
209
+ def extract_vp_from_vpe():
210
+ global comments
211
+
212
+ prompt = """I will provide a set of artworks along with accompanying comments from a person. Analyze these artworks and the comments on them and identify artistic features such as present or mentioned colors, style, composition, mood, medium, texture, brushwork, lighting, shadow effects, perspective, and other noteworthy elements.
213
+ Your task is to extract the artistic features the person likes and dislikes based on both the artworks' features and the person's comments. Focus solely on artistic aspects and refrain from considering subject matter.
214
+ If the person expresses a preference for a specific aspect without clearly stating its category (e.g., appreciating the colors without specifying which colors), identify these specific features from the images directly to make the person's preference understandable without needing to see the artwork.
215
+ Your output should consist of two concise lists of keywords: one listing the specific art features the person likes and another listing the specific features they dislike (specified in keyword format without using sentences).
216
+ Here are the images and their corresponding comments:
217
+ """
218
+ messages = [
219
+ {
220
+ "role": "user",
221
+ "content": [
222
+ {"type": "text",
223
+ "text": prompt},
224
+ ]
225
+ }
226
+ ]
227
+ images = []
228
+ comment_number = 1
229
+ for image in comments:
230
+ comment = comments[image]
231
+ image = Image.open(image)
232
+ images.append(image)
233
 
234
+ messages[0]["content"].append(
235
+ {"type": "image"}
236
+ )
237
+
238
+ messages[0]["content"].append(
239
+ {"type": "text",
240
+ "text": f"Comment {comment_number}: {comment}"}
241
+ )
242
+ comment_number = comment_number + 1
243
+
244
+ prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
245
+ inputs = processor(text=prompt, images=images, return_tensors="pt")
246
+ inputs = {k: v.to(device) for k, v in inputs.items()}
247
+
248
+ generated_ids = vpe_model.generate(**inputs, max_new_tokens=2000, repetition_penalty=0.99, do_sample=False)
249
+ generated_texts = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
250
+ positive_vp, negative_vp = re.search('.* \nAssistant: Liked Art Features: (.*)\nDisliked Art Features: (.*)', generated_texts).groups()
251
+ return positive_vp, negative_vp
252
 
 
253
  def extract_vp():
254
  if valid_api == "":
255
+ positive_vp, negative_vp = extract_vp_from_vpe()
 
256
 
 
257
 
258
+ else:
259
+ client = anthropic.Anthropic(
260
+ api_key=valid_api,
261
+ )
262
 
263
+ prompt = """**Objective:**
264
+ Analyze a set of artworks and accompanying comments from a person to identify artistic features they like and dislike.
265
 
266
+ **Steps:**
267
 
268
+ 1. **Analyze Artworks and Comments:**
269
+ - Examine each artwork for artistic features such as colors, style, composition, mood, medium, texture, brushwork, lighting, shadow effects, perspective, and other noteworthy elements.
270
+ - Review the accompanying comments to understand the person's preferences and opinions on these features.
271
+
272
+ 2. **Identify Preferences:**
273
+ - Extract artistic features that the person likes and dislikes based on the artworks' features and the comments.
274
+ - Focus solely on artistic aspects and ignore the subject matter.
275
+ - Convert the art features mentioned in the comments to well-known synonyms if needed.
276
+
277
+ 3. **Resolve Ambiguous Preferences:**
278
+ - If the person expresses a preference without clearly stating its category (e.g., "I like the style" without specifying which style), identify these specific features from the images directly.
279
+ - Make the person's preference understandable and independednt of the artworks.
280
+
281
+ 4. **Output Format:**
282
+ - Create two concise lists of keywords: one for features the person likes and another for features they dislike.
283
+ - Ensure the lists are in keyword format, divided by commas, without using sentences.
284
+ - Maintain detail and accuracy for all comments and images.
285
+
286
+ **Your Task:**
287
+
288
+ Follow the example format and ensure that your output consists of two lists of keywords summarizing the person's preferences based on the artworks and comments provided. Consider all comments and images comprehensively.
289
+
290
+ **Example**: example START:
291
  """
292
  messages = [
293
  {
 
302
  comment_number = 1
303
  for image in comments:
304
  comment = comments[image]
305
+ if not image.lower().endswith(".jpg"):
306
+ jpg_image_path = image.replace("png", "jpg")
307
+ image = Image.open(image)
308
+ rgb_img = image.convert("RGB")
309
+ rgb_img.save(jpg_image_path, format="JPEG")
310
+ with open(jpg_image_path, "rb") as image_file:
311
+ image = base64.b64encode(image_file.read()).decode("utf-8")
312
+
313
+ else:
314
+ with open(image_path, "rb") as image_file:
315
+ image = base64.b64encode(image_file.read()).decode("utf-8")
316
+
317
+ messages[0]["content"].append(
318
+ {"type": "text",
319
+ "text": f"Image {comment_number}:"}
320
+ )
321
 
322
  messages[0]["content"].append(
323
+ {"type": "image",
324
+ "source": {
325
+ "type": "base64",
326
+ "media_type": "image/jpeg",
327
+ "data": image,
328
+ },}
329
  )
330
 
331
  messages[0]["content"].append(
 
333
  "text": f"Comment {comment_number}: {comment}"}
334
  )
335
  comment_number = comment_number + 1
 
 
 
 
 
 
 
 
336
 
337
+ message = client.messages.create(
338
+ model="claude-3-5-sonnet-20240620",
339
+ max_tokens=1024,
340
+ messages=messages
341
+ )
342
+
343
+ generated_text = message.content[0].text
344
+ positive_vp, negative_vp = re.search('.*Like.*:\n(.*)\n*Dislike.*:\n(.*)', generated_text).groups()
345
 
346
  gr.Info("Visual preference successfully extracted.")
347
 
 
349
 
350
  def api_fn(api):
351
  global valid_api
352
+ client = anthropic.Anthropic(
353
+ api_key=api,
354
+ )
355
+ try:
356
+ message = client.messages.create(
357
+ model="claude-3-5-sonnet-20240620",
358
+ max_tokens=1024,
359
+ messages=[
360
+ {"role": "user", "content": "Hello, Claude"}
361
+ ]
362
+ )
363
  gr.Info("Valid API")
364
+ print("correct")
365
  valid_api = api
366
 
367
+ except anthropic.AuthenticationError:
368
+ gr.Warning("Invalid API!")
369
+ valid_api = ""
370
+
371
+
372
+
373
  @spaces.GPU(duration=45)
374
  def generate(prompt, vp_pos, vp_neg, slider):
375
  if vp_pos == "" and vp_neg == "":
 
403
  \n
404
  \n
405
  \n
 
406
  """)
407
  with gr.Row(elem_id="main-container"):
408
 
 
451
 
452
  clear_botton = gr.Button("Clear comments", interactive=len(comments) != 0)
453
 
454
+ with gr.Accordion("Enter Cluade API for Better Results (optional)", open=False):
455
  with gr.Row():
456
  api = gr.Text(
457
  max_lines=1,
 
586
  [comment_image, comment]
587
  )
588
 
589
+ demo.launch(share=True)