RamAnanth1's picture
Update app.py
cd717c0
import gradio as gr
from dataset import TranscriptDataset
from downloader import WhisperPP, YoutubeDownloader
from interpreter import WhisperInterpreter
model_size = "base"
mode = "transcribe"
write = False
download_path = "tmp/"
def dataset(url, name, token):
ds = TranscriptDataset(name)
data = []
#whisper_options = dict(
# model_size=model_size, mode=mode, write=write, number_videos=500)
#whisperPP = WhisperPP(data,name, **whisper_options)
#downloader = YoutubeDownloader(download_path)
#downloader.download(url, whisperPP)
params = dict(model_size=model_size,write=write, number_videos=500)
overwrite = True
ds.generate_dataset(url, download_path, overwrite, params)
ds.upload(token)
return "Dataset created at : " + "https://huggingface.co/datasets/"+ name
yt_input = gr.Textbox(label = 'Youtube Link')
name_input = gr.Textbox(label = 'Dataset Name',placeholder = "Enter in the format username/repo_name")
token_input = gr.Textbox(label = "HF Token", placeholder="Write access token")
repo_output = gr.Textbox(label = "Outcome")
iface = gr.Interface(fn=dataset, inputs=[yt_input, name_input, token_input], outputs=repo_output, title="Create Transcription Dataset for Youtube using OpenAI Whisper !",
description="Create a HuggingFace repository for Youtube Transcripts! You need to specify a write token obtained in https://hf.co/settings/token. This Space is a an experimental demo.",
article="<p>Find your write token at <a href='https://huggingface.co/settings/token' target='_blank'>token settings</a></p>")
iface.launch()