Spaces:

aje6
/

ASL-Final-Project-ONNX

Sleeping

App Files Files Community

ASL-Final-Project-ONNX / app.py

aje6

Update app.py

6774a59 verified 2 months ago

raw

history blame

4.94 kB

	# import gradio as gr
	# import cv2
	# import numpy as np
	# import onnxruntime as ort

	# # Load the ONNX model using onnxruntime
	# onnx_model_path = "Model_IV.onnx" # Update with your ONNX model path
	# session = ort.InferenceSession(onnx_model_path)

	# # Function to perform object detection with the ONNX model
	# def detect_objects(frame, confidence_threshold=0.5):
	# # Convert the frame from BGR (OpenCV) to RGB
	# image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

	# # Preprocessing: Resize and normalize the image
	# # Assuming YOLO model input is 640x640, update according to your model's input size
	# input_size = (640, 640)
	# image_resized = cv2.resize(image, input_size)
	# image_normalized = image_resized / 255.0 # Normalize to [0, 1]
	# image_input = np.transpose(image_normalized, (2, 0, 1)) # Change to CHW format
	# image_input = np.expand_dims(image_input, axis=0).astype(np.float32) # Add batch dimension

	# # Perform inference
	# inputs = {session.get_inputs()[0].name: image_input}
	# outputs = session.run(None, inputs)

	# # # Assuming YOLO model outputs are in the form of [boxes, confidences, class_probs]
	# # boxes, confidences, class_probs = outputs

	# # # Post-processing: Filter boxes by confidence threshold
	# # detections = []
	# # for i, confidence in enumerate(confidences[0]):
	# # if confidence >= confidence_threshold:
	# # x1, y1, x2, y2 = boxes[0][i]
	# # class_id = np.argmax(class_probs[0][i]) # Get class with highest probability
	# # detections.append((x1, y1, x2, y2, confidence, class_id))

	# # # Draw bounding boxes and labels on the image
	# # for (x1, y1, x2, y2, confidence, class_id) in detections:
	# # color = (0, 255, 0) # Green color for bounding boxes
	# # cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
	# # label = f"Class {class_id}: {confidence:.2f}"
	# # cv2.putText(image, label, (int(x1), int(y1)-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

	# # # Convert the image back to BGR for displaying in Gradio
	# # image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

	# return outputs

	# # Gradio interface to use the webcam for real-time object detection
	# # Added a slider for the confidence threshold
	# iface = gr.Interface(fn=detect_objects,
	# #inputs=[
	# # gr.Video(sources="webcam", type="numpy"), # Webcam input
	# inputs = gr.Image(sources=["webcam"], type="numpy"),
	# # gr.Slider(minimum=0.0, maximum=1.0, default=0.5, label="Confidence Threshold") # Confidence slider
	# # ],
	# outputs="image") # Show output image with bounding boxes

	# iface.launch()

	import gradio as gr
	import cv2
	from huggingface_hub import hf_hub_download
	from gradio_webrtc import WebRTC
	from twilio.rest import Client
	import os
	from inference import YOLOv8

	model_file = hf_hub_download(
	repo_id="aje6/ASL-Fingerspelling-Detection", filename="onnx/Model_IV.onnx"
	)

	model = YOLOv8(model_file)

	account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
	auth_token = os.environ.get("TWILIO_AUTH_TOKEN")

	if account_sid and auth_token:
	client = Client(account_sid, auth_token)

	token = client.tokens.create()

	rtc_configuration = {
	"iceServers": token.ice_servers,
	"iceTransportPolicy": "relay",
	}
	else:
	rtc_configuration = None


	def detection(image, conf_threshold=0.3):
	image = cv2.resize(image, (model.input_width, model.input_height))
	new_image = model.detect_objects(image, conf_threshold)
	return cv2.resize(new_image, (500, 500))


	css = """.my-group {max-width: 600px !important; max-height: 600 !important;}
	.my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""


	with gr.Blocks(css=css) as demo:
	gr.HTML(
	"""
	<h1 style='text-align: center'>
	YOLOv10 Webcam Stream (Powered by WebRTC ⚡️)
	</h1>
	"""
	)
	gr.HTML(
	"""
	<h3 style='text-align: center'>
	<a href='https://arxiv.org/abs/2405.14458' target='_blank'>arXiv</a> \| <a href='https://github.com/THU-MIG/yolov10' target='_blank'>github</a>
	</h3>
	"""
	)
	with gr.Column(elem_classes=["my-column"]):
	with gr.Group(elem_classes=["my-group"]):
	image = WebRTC(label="Stream", rtc_configuration=rtc_configuration)
	conf_threshold = gr.Slider(
	label="Confidence Threshold",
	minimum=0.0,
	maximum=1.0,
	step=0.05,
	value=0.30,
	)

	image.stream(
	fn=detection, inputs=[image, conf_threshold], outputs=[image], time_limit=10
	)

	if __name__ == "__main__":
	demo.launch()