import gradio as gr import torch import requests from torchvision import transforms from sampling_util import furthest_neighbours from video_reader import video_reader model = torch.load("model").eval() avg_pool = nn.AdaptiveAvgPool2d((1, 1)) def predict(input_file): base_directory = os.getcwd() selected_directory = os.path.join(base_directory, "selected_images") if os.path.isdir(selected_directory): shutil.rmtree(selected_directory) os.mkdir(selected_directory) zip_path = os.path.join(input_file.split('/')[-1][:-4] + ".zip") mean = [0.3156024, 0.33569682, 0.34337464] std = [0.16568947, 0.17827448, 0.18925823] img_vecs = [] with torch.no_grad(): for fp_i, file_path in enumerate([input_file]): for i, in_img in enumerate(video_reader(file_path, targetFPS=9, targetWidth=100, to_rgb=True)): in_img = (in_img.astype(np.float32) / 255.) in_img = (in_img - mean) / std in_img = np.transpose(in_img, (0, 3, 1, 2)) in_img = torch.from_numpy(in_img) encoded = avg_pool(model(in_img))[0, :, 0, 0].cpu().numpy() img_vecs += [encoded] img_vecs = np.asarray(img_vecs) rv_indices, _ = furthest_neighbours( img_vecs, downsample_size, seed=0) indices = np.zeros((img_vecs.shape[0],)) indices[np.asarray(rv_indices)] = 1 global_ctr = 0 for fp_i, file_path in enumerate([input_file]): for i, img in enumerate(video_reader(file_path, targetFPS=9, targetWidth=None, to_rgb=False)): if indices[global_ctr] == 1: cv2.imwrite(join(selected_directory, str(global_ctr) + ".jpg"), img) global_ctr += 1 all_selected_imgs_path = [join(selected_directory, f) for f in listdir(selected_directory) if isfile(join(selected_directory, f))] if 0 < len(all_file_paths): zipf = zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) for i, f in enumerate(all_selected_imgs_path): zipf.write(f, basename(f)) zipf.close() return zip_path demo = gr.Interface( fn=predict, inputs=gr.inputs.Video(label="Upload Video File"), outputs=gr.outputs.File(label="Zip")) demo.launch()