Spaces:
Runtime error
Runtime error
File size: 3,813 Bytes
24611b8 3d771a2 69d29a2 53628ab c8b45a2 69d29a2 24611b8 7586903 24611b8 69d29a2 24611b8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import os
from typing import List, Tuple
import multiprocessing
import numpy as np
import pandas as pd
import streamlit as st
import torch
from torch import Tensor
from decord import VideoReader, cpu
from transformers import AutoFeatureExtractor, TimesformerForVideoClassification
np.random.seed(0)
st.set_page_config(
page_title="TimeSFormer",
page_icon="🧊",
layout="wide",
initial_sidebar_state="expanded",
menu_items={
"Get Help": "https://www.extremelycoolapp.com/help",
"Report a bug": "https://www.extremelycoolapp.com/bug",
"About": "# This is a header. This is an *extremely* cool app!",
},
)
def sample_frame_indices(
clip_len: int, frame_sample_rate: float, seg_len: int
) -> np.ndarray:
converted_len = int(clip_len * frame_sample_rate)
end_idx = np.random.randint(converted_len, seg_len)
start_idx = end_idx - converted_len
indices = np.linspace(start_idx, end_idx, num=clip_len)
indices = np.clip(indices, start_idx, end_idx - 1).astype(np.int64)
return indices
# @st.cache_resource
@st.experimental_singleton
def load_model(model_name: str):
if "base-finetuned-k400" in model_name or "base-finetuned-k600" in model_name:
feature_extractor = AutoFeatureExtractor.from_pretrained(
"MCG-NJU/videomae-base-finetuned-kinetics"
)
else:
feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
model = TimesformerForVideoClassification.from_pretrained(model_name)
return feature_extractor, model
def inference(file_path: str):
videoreader = VideoReader(VIDEO_TMP_PATH, num_threads=1, ctx=cpu(0))
# sample 8 frames
videoreader.seek(0)
indices = sample_frame_indices(
clip_len=8, frame_sample_rate=4, seg_len=len(videoreader)
)
video = videoreader.get_batch(indices).asnumpy()
inputs = feature_extractor(list(video), return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
logits: Tensor = outputs.logits
# model predicts one of the 400 Kinetics-400 classes
predicted_label = logits.argmax(-1).item()
print(model.config.id2label[predicted_label])
TOP_K = 12
# logits = np.squeeze(logits)
logits = logits.squeeze().numpy()
indices = np.argsort(logits)[::-1][:TOP_K]
values = logits[indices]
results: List[Tuple[str, float]] = []
for index, value in zip(indices, values):
predicted_label = model.config.id2label[index]
print(f"Label: {predicted_label} - {value:.2f}%")
results.append((predicted_label, value))
return pd.DataFrame(results, columns=("Label", "Confidence"))
st.title("TimeSFormer")
with st.expander("INTRODUCTION"):
st.text(
f"""Streamlit demo for TimeSFormer.
Author: Hiep Phuoc Secondary High School
Number of CPU(s): {multiprocessing.cpu_count()}
"""
)
model_name = st.selectbox(
"model_name",
(
"facebook/timesformer-base-finetuned-k400",
"facebook/timesformer-base-finetuned-k600",
"facebook/timesformer-base-finetuned-ssv2",
"facebook/timesformer-hr-finetuned-k600",
"facebook/timesformer-hr-finetuned-k400",
"facebook/timesformer-hr-finetuned-ssv2",
"fcakyon/timesformer-large-finetuned-k400",
"fcakyon/timesformer-large-finetuned-k600",
),
)
feature_extractor, model = load_model(model_name)
VIDEO_TMP_PATH = os.path.join("tmp", "tmp.mp4")
uploadedfile = st.file_uploader("Upload file", type=["mp4"])
if uploadedfile is not None:
with st.spinner():
with open(VIDEO_TMP_PATH, "wb") as f:
f.write(uploadedfile.getbuffer())
with st.spinner("Processing..."):
df = inference(VIDEO_TMP_PATH)
st.dataframe(df)
st.video(VIDEO_TMP_PATH)
|