Spaces:

yuanphon
/

NTHU-dogs-identification

Sleeping

App Files Files Community

yuanphon commited on Jan 5, 2024

Commit

3d63b15

1 Parent(s): 6f28186

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -15

app.py CHANGED Viewed

@@ -164,9 +164,9 @@ def predict(upload_image):
     # Load the test data
     # Load the image
-    # img2 = cv2.imread(test_image_path)
-    # print("cv2: ", img2)
-    # print("cv2 shape: ", img2.shape)
     # img = upload_image
     # img = cv2.cvtColor((upload_image * 255).astype(np.uint8), cv2.COLOR_RGB2BGR)
     pil_image = upload_image.convert('RGB')
@@ -174,8 +174,8 @@ def predict(upload_image):
     # Convert RGB to BGR
     img = open_cv_image[:, :, ::-1].copy()
     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-    # print("gradio: ", img)
-    # print("gradio shape: ", img.shape)
     # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
@@ -195,7 +195,7 @@ def predict(upload_image):
     return label_list[predicted_class_idx] if probabilities.max().item() > 0.90 else '不是校狗'
-def captioning():
     model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
     feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
@@ -209,12 +209,18 @@ def captioning():
     gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
     images = []
-    for image_path in [test_image_path]:
-        i_image = Image.open(image_path)
-        if i_image.mode != "RGB":
-            i_image = i_image.convert(mode="RGB")
-        images.append(i_image)
     pixel_values = feature_extractor(images=images, return_tensors="pt").pixel_values
     pixel_values = pixel_values.to(device)
@@ -246,11 +252,8 @@ if __name__ == '__main__':
         train_model()
     # output(predict(), captioning())
-    # def greet(name):
-    #     return "Hello " + name + "!!"
     def get_result(upload_image):
-        result = output(predict(upload_image), captioning())
         return result
     iface = gr.Interface(fn=get_result, inputs=gr.Image(type="pil"), outputs="text")

     # Load the test data
     # Load the image
+    img2 = cv2.imread(test_image_path)
+    print("cv2: ", img2)
+    print("cv2 shape: ", img2.shape)
     # img = upload_image
     # img = cv2.cvtColor((upload_image * 255).astype(np.uint8), cv2.COLOR_RGB2BGR)
     pil_image = upload_image.convert('RGB')
     # Convert RGB to BGR
     img = open_cv_image[:, :, ::-1].copy()
     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    print("gradio: ", img)
+    print("gradio shape: ", img.shape)
     # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
     return label_list[predicted_class_idx] if probabilities.max().item() > 0.90 else '不是校狗'
+def captioning(upload_image):
     model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
     feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
     gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
     images = []
+    # for image_path in [test_image_path]:
+    #     i_image = Image.open(image_path)
+    #     if i_image.mode != "RGB":
+    #         i_image = i_image.convert(mode="RGB")
+    #     images.append(i_image)
+    pil_image = upload_image.convert('RGB')
+    open_cv_image = np.array(pil_image)
+    # Convert RGB to BGR
+    img = open_cv_image[:, :, ::-1].copy()
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    images.append(img)
     pixel_values = feature_extractor(images=images, return_tensors="pt").pixel_values
     pixel_values = pixel_values.to(device)
         train_model()
     # output(predict(), captioning())
     def get_result(upload_image):
+        result = output(predict(upload_image), captioning(upload_image))
         return result
     iface = gr.Interface(fn=get_result, inputs=gr.Image(type="pil"), outputs="text")