import gradio as gr
import numpy as np
from PIL import Image
import tensorflow as tf
from transformers import SegformerFeatureExtractor, TFSegformerForSemanticSegmentation

feature_extractor = SegformerFeatureExtractor.from_pretrained(
    "nvidia/segformer-b1-finetuned-cityscapes-1024-1024")

model = TFSegformerForSemanticSegmentation.from_pretrained(
    "nvidia/segformer-b1-finetuned-cityscapes-1024-1024"
)

# 모델 입력 크기를 확인합니다.
input_size = model.input_shape[1:3]

# 모델 예측 함수를 정의합니다.
def classify_image(img):
    # 이미지를 모델 입력 크기에 맞게 조정합니다.
    img = img.resize(input_size)
    img_array = np.array(img) / 255.0  # 이미지를 0에서 1 사이로 정규화합니다.
    img_array = np.expand_dims(img_array, axis=0)  # 배치 차원을 추가합니다.

    # 모델로 예측을 수행합니다.
    predictions = model.predict(img_array)
    
    # 예측 결과 중에서 가장 높은 확률을 가진 클래스를 선택합니다.
    predicted_label = np.argmax(predictions)

    # 라벨을 반환합니다.
    return predicted_label

# Gradio UI를 생성합니다.
iface = gr.Interface(fn=classify_image, 
                     inputs=gr.Image(shape=(800, 600), 
                     outputs="label", live=True)

# Gradio UI를 시작합니다.
iface.launch()