import os
import shutil
import tempfile
import spaces
import gradio as gr
import torch
title_markdown = ("""
ShareGPT4Video: Improving Video Understanding and Generation with Better Captions
If you like our project, please give us a star ✨ on Github for the latest update.
""")
block_css = """
#buttons button {
min-width: min(120px,100%);
}
"""
learn_more_markdown = ("""
### License
The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA, [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
""")
model_path = ''
device = 'cuda'
load_8bit = False
load_4bit = False
dtype = torch.float16
@spaces.GPU(duration=60)
def generate_slidingcaptioning(video):
return 'text'
@spaces.GPU(duration=60)
def generate_fastcaptioning(video):
return 'text'
@spaces.GPU(duration=60)
def generate_promptrecaptioning(text):
return text
def save_video_to_local(video_path):
filename = os.path.join('temp', next(
tempfile._get_candidate_names()) + '.mp4')
shutil.copyfile(video_path, filename)
return filename
with gr.Blocks(title='ShareCaptioner-Video', theme=gr.themes.Default(), css=block_css) as demo:
gr.Markdown(title_markdown)
state = gr.State()
state_ = gr.State()
first_run = gr.State()
with gr.Row():
gr.Markdown("### The ShareCaptioner-Video is a Four-in-One exceptional video captioning model with the following capabilities:\n1. Fast captioning, 2. Sliding Captioning, 3. Clip Summarizing, 4. Prompt Re-Captioning")
with gr.Row():
gr.Markdown("(THE DEMO OF \"Clip Summarizing\" IS COMING SOON...)")
with gr.Row():
with gr.Column(scale=6):
with gr.Row():
video = gr.Video(label="Input Video")
cur_dir = os.path.dirname(os.path.abspath(__file__))
with gr.Row():
textbox = gr.Textbox(
show_label=False, placeholder="Input Text", container=False
)
with gr.Row():
with gr.Column(scale=2, min_width=50):
submit_btn_sc = gr.Button(
value="Sliding Captioning", variant="primary", interactive=True
)
with gr.Column(scale=2, min_width=50):
submit_btn_fc = gr.Button(
value="Fast Captioning", variant="primary", interactive=True
)
with gr.Column(scale=2, min_width=50):
submit_btn_pr = gr.Button(
value="Prompt Re-captioning", variant="primary", interactive=True
)
with gr.Column(scale=4, min_width=200):
with gr.Row():
textbox_out = gr.Textbox(
show_label=False, placeholder="Output", container=False
)
gr.Markdown(learn_more_markdown)
submit_btn_sc.click(generate_slidingcaptioning, [video],[textbox_out])
submit_btn_fc.click(generate_fastcaptioning, [video], [textbox_out])
submit_btn_pr.click(generate_promptrecaptioning, [textbox], [textbox_out])
### for local launch
# demo.launch(server_name="0.0.0.0",
# server_port=28358,
# share=True)
### for huggingface launch
demo.launch()