Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -164,9 +164,9 @@ def predict(upload_image):
|
|
164 |
# Load the test data
|
165 |
# Load the image
|
166 |
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
# img = upload_image
|
171 |
# img = cv2.cvtColor((upload_image * 255).astype(np.uint8), cv2.COLOR_RGB2BGR)
|
172 |
pil_image = upload_image.convert('RGB')
|
@@ -174,8 +174,8 @@ def predict(upload_image):
|
|
174 |
# Convert RGB to BGR
|
175 |
img = open_cv_image[:, :, ::-1].copy()
|
176 |
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
177 |
-
|
178 |
-
|
179 |
|
180 |
|
181 |
# img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
@@ -195,7 +195,7 @@ def predict(upload_image):
|
|
195 |
|
196 |
return label_list[predicted_class_idx] if probabilities.max().item() > 0.90 else '不是校狗'
|
197 |
|
198 |
-
def captioning():
|
199 |
|
200 |
model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
|
201 |
feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
|
@@ -209,12 +209,18 @@ def captioning():
|
|
209 |
gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
|
210 |
|
211 |
images = []
|
212 |
-
for image_path in [test_image_path]:
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
|
217 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
|
219 |
pixel_values = feature_extractor(images=images, return_tensors="pt").pixel_values
|
220 |
pixel_values = pixel_values.to(device)
|
@@ -246,11 +252,8 @@ if __name__ == '__main__':
|
|
246 |
train_model()
|
247 |
# output(predict(), captioning())
|
248 |
|
249 |
-
|
250 |
-
# def greet(name):
|
251 |
-
# return "Hello " + name + "!!"
|
252 |
def get_result(upload_image):
|
253 |
-
result = output(predict(upload_image), captioning())
|
254 |
return result
|
255 |
|
256 |
iface = gr.Interface(fn=get_result, inputs=gr.Image(type="pil"), outputs="text")
|
|
|
164 |
# Load the test data
|
165 |
# Load the image
|
166 |
|
167 |
+
img2 = cv2.imread(test_image_path)
|
168 |
+
print("cv2: ", img2)
|
169 |
+
print("cv2 shape: ", img2.shape)
|
170 |
# img = upload_image
|
171 |
# img = cv2.cvtColor((upload_image * 255).astype(np.uint8), cv2.COLOR_RGB2BGR)
|
172 |
pil_image = upload_image.convert('RGB')
|
|
|
174 |
# Convert RGB to BGR
|
175 |
img = open_cv_image[:, :, ::-1].copy()
|
176 |
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
177 |
+
print("gradio: ", img)
|
178 |
+
print("gradio shape: ", img.shape)
|
179 |
|
180 |
|
181 |
# img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
|
|
195 |
|
196 |
return label_list[predicted_class_idx] if probabilities.max().item() > 0.90 else '不是校狗'
|
197 |
|
198 |
+
def captioning(upload_image):
|
199 |
|
200 |
model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
|
201 |
feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
|
|
|
209 |
gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
|
210 |
|
211 |
images = []
|
212 |
+
# for image_path in [test_image_path]:
|
213 |
+
# i_image = Image.open(image_path)
|
214 |
+
# if i_image.mode != "RGB":
|
215 |
+
# i_image = i_image.convert(mode="RGB")
|
216 |
|
217 |
+
# images.append(i_image)
|
218 |
+
pil_image = upload_image.convert('RGB')
|
219 |
+
open_cv_image = np.array(pil_image)
|
220 |
+
# Convert RGB to BGR
|
221 |
+
img = open_cv_image[:, :, ::-1].copy()
|
222 |
+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
223 |
+
images.append(img)
|
224 |
|
225 |
pixel_values = feature_extractor(images=images, return_tensors="pt").pixel_values
|
226 |
pixel_values = pixel_values.to(device)
|
|
|
252 |
train_model()
|
253 |
# output(predict(), captioning())
|
254 |
|
|
|
|
|
|
|
255 |
def get_result(upload_image):
|
256 |
+
result = output(predict(upload_image), captioning(upload_image))
|
257 |
return result
|
258 |
|
259 |
iface = gr.Interface(fn=get_result, inputs=gr.Image(type="pil"), outputs="text")
|