Awiny commited on
Commit
8381241
Β·
1 Parent(s): e0a0001

make the pipeline simple

Browse files
app.py CHANGED
@@ -49,7 +49,8 @@ def process_image(image_src, options=None, processor=None):
49
  print(options)
50
  if options is None:
51
  options = []
52
- processor.args.semantic_segment = "Semantic Segment" in options
 
53
  image_generation_status = "Image Generation" in options
54
  image_caption, dense_caption, region_semantic, gen_text = processor.image_to_text(image_src)
55
  if image_generation_status:
@@ -93,7 +94,7 @@ processor = ImageTextTransformation(args)
93
 
94
  # Create Gradio input and output components
95
  image_input = gr.inputs.Image(type='filepath', label="Input Image")
96
- semantic_segment_checkbox = gr.inputs.Checkbox(label="Semantic Segment", default=False)
97
  image_generation_checkbox = gr.inputs.Checkbox(label="Image Generation", default=False)
98
 
99
  logo_base64 = add_logo()
@@ -101,7 +102,7 @@ logo_base64 = add_logo()
101
  title_with_logo = f'<img src="data:image/jpeg;base64,{logo_base64}" width="400" style="vertical-align: middle;"> Understanding Image with Text'
102
 
103
  examples = [
104
- ["examples/test_3.jpg"],
105
  ]
106
 
107
  # Create Gradio interface
@@ -110,17 +111,18 @@ interface = gr.Interface(
110
  inputs=[image_input,
111
  gr.CheckboxGroup(
112
  label="Options",
113
- choices=["Semantic Segment", "Image Generation"],
114
  ),
115
  ],
116
  outputs=gr.outputs.HTML(),
117
  title=title_with_logo,
118
- # examples=examples,
119
  description="""
120
  This code support image to text transformation. Then the generated text can do retrieval, question answering et al to conduct zero-shot.
121
- \n Since GPU is expensive, we use CPU for demo. Run code local with gpu or google colab we provided for fast speed.
122
- \n Semantic segment is very slow in cpu(~8m).
123
- \n Ttext2image model is controlnet is also very slow in cpu(~2m), which used canny edge as reference.
 
124
  """
125
  )
126
 
 
49
  print(options)
50
  if options is None:
51
  options = []
52
+ # processor.args.semantic_segment = "Semantic Segment" in options
53
+ processor.args.semantic_segment = False
54
  image_generation_status = "Image Generation" in options
55
  image_caption, dense_caption, region_semantic, gen_text = processor.image_to_text(image_src)
56
  if image_generation_status:
 
94
 
95
  # Create Gradio input and output components
96
  image_input = gr.inputs.Image(type='filepath', label="Input Image")
97
+ # semantic_segment_checkbox = gr.inputs.Checkbox(label="Semantic Segment", default=False)
98
  image_generation_checkbox = gr.inputs.Checkbox(label="Image Generation", default=False)
99
 
100
  logo_base64 = add_logo()
 
102
  title_with_logo = f'<img src="data:image/jpeg;base64,{logo_base64}" width="400" style="vertical-align: middle;"> Understanding Image with Text'
103
 
104
  examples = [
105
+ ["examples/test_4.jpg"],
106
  ]
107
 
108
  # Create Gradio interface
 
111
  inputs=[image_input,
112
  gr.CheckboxGroup(
113
  label="Options",
114
+ choices=["Image Generation"],
115
  ),
116
  ],
117
  outputs=gr.outputs.HTML(),
118
  title=title_with_logo,
119
+ examples=examples,
120
  description="""
121
  This code support image to text transformation. Then the generated text can do retrieval, question answering et al to conduct zero-shot.
122
+ \n Github: https://github.com/showlab/Image2Paragraph
123
+ \n Twitter: https://twitter.com/awinyimgprocess/status/1646225454599372800?s=46&t=HvOe9T2n35iFuCHP5aIHpQ
124
+ \n Since GPU is expensive, we use CPU for demo and not include semantic segment anything. Run code local with gpu or google colab we provided for fast speed.
125
+ \n Ttext2image model is controlnet ( very slow in cpu(~2m)), which used canny edge as reference.
126
  """
127
  )
128
 
models/__pycache__/controlnet_model.cpython-38.pyc CHANGED
Binary files a/models/__pycache__/controlnet_model.cpython-38.pyc and b/models/__pycache__/controlnet_model.cpython-38.pyc differ
 
models/__pycache__/image_text_transformation.cpython-38.pyc CHANGED
Binary files a/models/__pycache__/image_text_transformation.cpython-38.pyc and b/models/__pycache__/image_text_transformation.cpython-38.pyc differ
 
models/image_text_transformation.py CHANGED
@@ -33,7 +33,8 @@ class ImageTextTransformation:
33
  self.dense_caption_model = DenseCaptioning(device=self.args.dense_caption_device)
34
  self.gpt_model = ImageToText(openai_key)
35
  self.controlnet_model = TextToImage(device=self.args.contolnet_device)
36
- self.region_semantic_model = RegionSemantic(device=self.args.semantic_segment_device)
 
37
  print('\033[1;32m' + "Model initialization finished!".center(50, '-') + '\033[0m')
38
 
39
 
 
33
  self.dense_caption_model = DenseCaptioning(device=self.args.dense_caption_device)
34
  self.gpt_model = ImageToText(openai_key)
35
  self.controlnet_model = TextToImage(device=self.args.contolnet_device)
36
+ # time-conusimg on CPU, run on local
37
+ # self.region_semantic_model = RegionSemantic(device=self.args.semantic_segment_device)
38
  print('\033[1;32m' + "Model initialization finished!".center(50, '-') + '\033[0m')
39
 
40
 
models/segment_models/__pycache__/semantic_segment_anything_model.cpython-38.pyc CHANGED
Binary files a/models/segment_models/__pycache__/semantic_segment_anything_model.cpython-38.pyc and b/models/segment_models/__pycache__/semantic_segment_anything_model.cpython-38.pyc differ
 
pretrained_models/blip-image-captioning-large DELETED
@@ -1 +0,0 @@
1
- Subproject commit 293ab01f2dc41c1c214299314f11de635d0937dc
 
 
pretrained_models/blip2-opt-2.7b DELETED
@@ -1 +0,0 @@
1
- Subproject commit 56e1fe81e7e7c346e95e196ace7b442b3f8ff483
 
 
pretrained_models/clip-vit-large-patch14 DELETED
@@ -1 +0,0 @@
1
- Subproject commit 8d052a0f05efbaefbc9e8786ba291cfdf93e5bff
 
 
pretrained_models/clipseg-rd64-refined DELETED
@@ -1 +0,0 @@
1
- Subproject commit 583b388deb98a04feb3e1f816dcdb8f3062ee205
 
 
pretrained_models/oneformer_ade20k_swin_large DELETED
@@ -1 +0,0 @@
1
- Subproject commit 4a5bac8e64f82681a12db2e151a4c2f4ce6092b2
 
 
pretrained_models/oneformer_coco_swin_large DELETED
@@ -1 +0,0 @@
1
- Subproject commit 3a263017ca5c75adbea145f25f81b118243d4394
 
 
pretrained_models/stable-diffusion-v1-5 DELETED
@@ -1 +0,0 @@
1
- Subproject commit 39593d5650112b4cc580433f6b0435385882d819