import pickle import cv2 import mediapipe as mp import numpy as np from PIL import Image import requests from io import BytesIO import gradio as gr from huggingface_hub import hf_hub_download labels = ['A','B','C','D','E','F','G','H','I','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y'] model_path = hf_hub_download(repo_id="Paulie-Aditya/SignSense-Model", filename="stacked_model_new.p") model_dict = pickle.load(open(model_path, "rb")) model = model_dict["model"] # # get url from backend def predict(url): response = requests.get(url) print(response) img = Image.open(BytesIO(response.content)) img.save('image.jpg') mp_hands = mp.solutions.hands mp_drawing = mp.solutions.drawing_utils mp_drawing_styles = mp.solutions.drawing_styles hands = mp_hands.Hands(static_image_mode=False, min_detection_confidence=0.3) hands.maxHands = 1 data_aux = [] x_ = [] y_ = [] frame = cv2.imread('image.jpg') H,W, _ = frame.shape frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) results = hands.process(frame_rgb) if results.multi_hand_landmarks: if(len(results.multi_hand_landmarks) == 1): for hand_landmarks in results.multi_hand_landmarks: for i in range(len(hand_landmarks.landmark)): x = hand_landmarks.landmark[i].x y = hand_landmarks.landmark[i].y x_.append(x) y_.append(y) for i in range(len(hand_landmarks.landmark)): x = hand_landmarks.landmark[i].x y = hand_landmarks.landmark[i].y data_aux.append(x - min(x_)) data_aux.append(y - min(y_)) x1 = int(min(x_) * W) - 10 y1 = int(min(y_) * H) - 10 x2 = int(max(x_) * W) - 10 y2 = int(max(y_) * H) - 10 if(len(data_aux) == 42): prediction = model.predict([np.asarray(data_aux)]) predicted_character = labels[prediction[0]] return {"prediction":predicted_character} else: return {"prediction": "Too many Hands"} iface = gr.Interface(fn=predict, inputs="image", outputs="text", title="Image to Text Model") iface.launch()