fcakyon commited on
Commit
09295f0
β€’
1 Parent(s): 80ae000

initial upload

Browse files
Files changed (4) hide show
  1. README.md +7 -9
  2. app.py +98 -0
  3. requirements.txt +7 -0
  4. utils.py +37 -0
README.md CHANGED
@@ -1,13 +1,11 @@
1
  ---
2
- title: Timesformer
3
- emoji: 🐨
4
- colorFrom: gray
5
- colorTo: purple
6
  sdk: gradio
7
- sdk_version: 3.12.0
8
  app_file: app.py
9
  pinned: false
10
- license: openrail
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Zero Shot Video Classification
3
+ emoji: πŸ‘€
4
+ colorFrom: blue
5
+ colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 2.9.1
8
  app_file: app.py
9
  pinned: false
10
+ license: apache-2.0
11
+ ---
 
 
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import gradio as gr
4
+ from video_transformers import VideoModel
5
+ from utils import (
6
+ convert_frames_to_gif,
7
+ download_youtube_video,
8
+ sample_frames_from_video_file,
9
+ )
10
+
11
+ video_model = VideoModel.from_transformers("facebook/timesformer-base-finetuned-k400")
12
+
13
+
14
+ examples = [
15
+ ["https://www.youtube.com/watch?v=huAJ9dC5lmI"],
16
+ ["https://www.youtube.com/watch?v=wvcWt6u5HTg"],
17
+ ["https://www.youtube.com/watch?v=-3kZSi5qjRM"],
18
+ ["https://www.youtube.com/watch?v=-6usjfP8hys"],
19
+ ["https://www.youtube.com/watch?v=B8OdMwVwyXc"],
20
+ ["https://www.youtube.com/watch?v=B9ea7YyCP6E"],
21
+ ["https://www.youtube.com/watch?v=BBkpaeJBKmk"],
22
+ ["https://www.youtube.com/watch?v=BBqU8Apee_g"],
23
+ ["https://www.youtube.com/watch?v=BDHub0gBGtc"],
24
+ ["https://www.youtube.com/watch?v=I7cwq6_4QtM"],
25
+ ["https://www.youtube.com/watch?v=Z0mJDXpNhYA"],
26
+ ["https://www.youtube.com/watch?v=QkQQjFGnZlg"],
27
+ ["https://www.youtube.com/watch?v=IQaoRUQif14"],
28
+ ]
29
+
30
+
31
+ def predict(youtube_url):
32
+
33
+ video_path = download_youtube_video(youtube_url)
34
+
35
+ frames = sample_frames_from_video_file(video_path, num_frames=16)
36
+ gif_path = convert_frames_to_gif(frames)
37
+
38
+ result = video_model.predict(video_or_folder_path=video_path)
39
+
40
+ os.remove(video_path)
41
+
42
+ return result["predictions"], gif_path
43
+
44
+
45
+ app = gr.Blocks()
46
+ with app:
47
+ gr.Markdown("# **<p align='center'>Video Classification with Timesformer</p>**")
48
+ gr.Markdown(
49
+ """
50
+ <p style='text-align: center'>
51
+ Timesformer is a video model that uses a Transformer architecture to process video frames.
52
+ <br>It is released by Facebook AI Research in ICML 2021.
53
+ <br>This version is trained on Kinetics-400 dataset and can classify videos into 400 classes.
54
+ </p>
55
+ """
56
+ )
57
+ gr.Markdown(
58
+ """
59
+ <p style='text-align: center'>
60
+ Follow me for more!
61
+ <br> <a href='https://twitter.com/fcakyon' target='_blank'>twitter</a> | <a href='https://github.com/fcakyon' target='_blank'>github</a> | <a href='https://www.linkedin.com/in/fcakyon/' target='_blank'>linkedin</a> | <a href='https://fcakyon.medium.com/' target='_blank'>medium</a>
62
+ </p>
63
+ """
64
+ )
65
+
66
+ with gr.Row():
67
+ with gr.Column():
68
+ gr.Markdown("Provide a Youtube video URL.")
69
+ youtube_url = gr.Textbox(label="Youtube URL:", show_label=True)
70
+ predict_btn = gr.Button(value="Predict")
71
+ with gr.Column():
72
+ video_gif = gr.Image(
73
+ label="Input Clip",
74
+ show_label=True,
75
+ )
76
+ with gr.Column():
77
+ predictions = gr.Label(
78
+ label="Predictions:", show_label=True, num_top_classes=5
79
+ )
80
+
81
+ gr.Markdown("**Examples:**")
82
+ gr.Examples(
83
+ examples,
84
+ youtube_url,
85
+ [predictions, video_gif],
86
+ fn=predict,
87
+ cache_examples=True,
88
+ )
89
+
90
+ predict_btn.click(predict, inputs=youtube_url, outputs=[predictions, video_gif])
91
+ gr.Markdown(
92
+ """
93
+ \n Demo created by: <a href=\"https://github.com/fcakyon\">fcakyon</a>
94
+ <br> Based on this <a href=\"https://huggingface.co/facebook/timesformer-base-finetuned-k400\">HuggingFace model</a>
95
+ """
96
+ )
97
+
98
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ decord
4
+ pytube
5
+ imageio
6
+ transformers @ git+https://github.com/huggingface/transformers.git@c54646b13d468b7a21fd6ee18f943ad69daab48e
7
+ video_transformers == 0.0.8
utils.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pytube import YouTube
2
+ import numpy as np
3
+ from decord import VideoReader, cpu
4
+ import imageio
5
+
6
+
7
+ def download_youtube_video(url: str):
8
+ yt = YouTube(url)
9
+
10
+ streams = yt.streams.filter(file_extension="mp4")
11
+ file_path = streams[0].download()
12
+ return file_path
13
+
14
+
15
+ def sample_frame_indices(clip_len, frame_sample_rate):
16
+ converted_len = int(clip_len * frame_sample_rate)
17
+ start_idx = 0
18
+ end_idx = converted_len
19
+ indices = np.linspace(start_idx, end_idx, num=clip_len)
20
+ indices = np.clip(indices, start_idx, end_idx - 1).astype(np.int64)
21
+ return indices
22
+
23
+
24
+ def sample_frames_from_video_file(file_path: str, num_frames: int = 16):
25
+ videoreader = VideoReader(file_path, num_threads=1, ctx=cpu(0))
26
+
27
+ # sample frames
28
+ videoreader.seek(0)
29
+ indices = sample_frame_indices(clip_len=num_frames, frame_sample_rate=4)
30
+ frames = videoreader.get_batch(indices).asnumpy()
31
+ return frames
32
+
33
+
34
+ def convert_frames_to_gif(frames):
35
+ converted_frames = frames.astype(np.uint8)
36
+ imageio.mimsave("frames.gif", converted_frames, fps=8)
37
+ return "frames.gif"