|
import streamlit as st |
|
import numpy as np |
|
from keras.models import load_model |
|
import cv2 |
|
from io import BytesIO |
|
import mediapipe as mp |
|
|
|
|
|
model = load_model('sign_asl_cnn_30_epochs.h5') |
|
class_labels = {i: str(i) if i < 10 else chr(65 + i - 10) for i in range(36)} |
|
|
|
|
|
def preprocess_image(image): |
|
image = cv2.resize(image, (200, 200)) |
|
image = image / 255.0 |
|
image = image.reshape(1, 200, 200, 3) |
|
return image |
|
|
|
|
|
def predict_letter(image): |
|
processed_image = preprocess_image(image) |
|
predictions = model.predict(processed_image) |
|
predicted_class = np.argmax(predictions, axis=1)[0] |
|
sign_letter = class_labels[predicted_class] |
|
return sign_letter |
|
|
|
|
|
def detect_hands(image): |
|
mp_hands = mp.solutions.hands |
|
hands = mp_hands.Hands() |
|
margin = 15 |
|
|
|
|
|
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) |
|
|
|
|
|
results = hands.process(image_rgb) |
|
|
|
if results.multi_hand_landmarks: |
|
for landmarks in results.multi_hand_landmarks: |
|
|
|
landmarks_xy = [(int(landmark.x * image.shape[1]), int(landmark.y * image.shape[0])) |
|
for landmark in landmarks.landmark] |
|
|
|
|
|
x_min = max(0, min(landmarks_xy, key=lambda x: x[0])[0] - margin) |
|
y_min = max(0, min(landmarks_xy, key=lambda x: x[1])[1] - margin) |
|
x_max = min(image.shape[1], max(landmarks_xy, key=lambda x: x[0])[0] + margin) |
|
y_max = min(image.shape[0], max(landmarks_xy, key=lambda x: x[1])[1] + margin) |
|
|
|
|
|
roi = image[y_min:y_max, x_min:x_max] |
|
|
|
|
|
if roi.size == 0: |
|
continue |
|
|
|
|
|
roi = cv2.resize(roi, (200, 200), interpolation=cv2.INTER_AREA) |
|
hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB) |
|
|
|
lower_yellow = np.array([93, 72, 51]) |
|
upper_yellow = np.array([224, 194, 183]) |
|
mask = cv2.inRange(hsv, lower_yellow, upper_yellow) |
|
roi = cv2.bitwise_and(roi, roi, mask=mask) |
|
roi = roi.reshape(1, 200, 200, 3) |
|
|
|
|
|
predictions = model.predict(roi) |
|
predicted_class = int(np.argmax(predictions, axis=1)[0]) |
|
result = class_labels[predicted_class] |
|
|
|
|
|
cv2.putText(image, str(result), (x_min, y_min - 10), |
|
cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2) |
|
|
|
|
|
cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2) |
|
|
|
return image |
|
|
|
|
|
st.title('Sign Language Recognition') |
|
|
|
|
|
selected_option = st.sidebar.radio("Select Option", ["Upload", "Webcam"], index=0) |
|
|
|
if selected_option == "Upload": |
|
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png"]) |
|
|
|
if uploaded_file is not None: |
|
if st.button('Predict'): |
|
contents = uploaded_file.read() |
|
nparr = np.frombuffer(contents, np.uint8) |
|
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR) |
|
|
|
|
|
predicted_letter = predict_letter(image) |
|
|
|
|
|
st.write('Predicted Letter:', predicted_letter) |
|
|
|
elif selected_option == "Webcam": |
|
|
|
webcam_frame = st.empty() |
|
|
|
|
|
predicted_letter_webcam = st.empty() |
|
|
|
|
|
webcam_capture_status = st.empty() |
|
|
|
|
|
webcam_stop_button = st.empty() |
|
|
|
|
|
webcam_status = st.empty() |
|
|
|
|
|
webcam_button = st.button("Start Webcam") |
|
|
|
if webcam_button: |
|
webcam_status.text("Webcam is on.") |
|
webcam_stop_button = st.button("Stop Webcam") |
|
|
|
|
|
cap = cv2.VideoCapture(0) |
|
|
|
while True: |
|
|
|
ret, frame = cap.read() |
|
|
|
|
|
webcam_frame.image(frame, channels="BGR") |
|
|
|
|
|
frame = detect_hands(frame) |
|
|
|
|
|
_, jpeg = cv2.imencode(".jpg", frame) |
|
|
|
|
|
predicted_letter = predict_letter(frame) |
|
predicted_letter_webcam.text(f"Predicted Letter: {predicted_letter}") |
|
|
|
|
|
if webcam_stop_button: |
|
webcam_status.text("Webcam is off.") |
|
break |
|
|
|
|
|
cap.release() |
|
|