|
import gradio as gr |
|
import requests |
|
import os |
|
from huggingface_hub import InferenceClient,HfApi |
|
import random |
|
import json |
|
import datetime |
|
import uuid |
|
import yt_dlp |
|
import cv2 |
|
import whisper |
|
|
|
from agent import ( |
|
PREFIX, |
|
COMPRESS_DATA_PROMPT, |
|
COMPRESS_DATA_PROMPT_SMALL, |
|
LOG_PROMPT, |
|
LOG_RESPONSE, |
|
) |
|
client = InferenceClient( |
|
"mistralai/Mixtral-8x7B-Instruct-v0.1" |
|
) |
|
|
|
|
|
|
|
|
|
|
|
sizes = list(whisper._MODELS.keys()) |
|
langs = ["none"] + sorted(list(whisper.tokenizer.LANGUAGES.values())) |
|
current_size = "base" |
|
loaded_model = whisper.load_model(current_size) |
|
|
|
VERBOSE = True |
|
MAX_HISTORY = 100 |
|
MAX_DATA = 16000 |
|
|
|
def dl(inp,img): |
|
uid=uuid.uuid4() |
|
fps="Error" |
|
out = None |
|
out_file=[] |
|
if img == None and inp !="": |
|
try: |
|
inp_out=inp.replace("https://","") |
|
inp_out=inp_out.replace("/","_").replace(".","_").replace("=","_").replace("?","_") |
|
|
|
|
|
os.system(f'yt-dlp --skip-download --write-subs --write-auto-subs --sub-lang en --sub-format ttml --convert-subs srt "{inp}" -o "{uid}/{inp_out}"') |
|
f = open(f"{uid}/{inp_out}.en.srt") |
|
ft=f.readlines() |
|
line_fin="" |
|
line_out="" |
|
for line in ft: |
|
if "<" in line: |
|
line_out = line.split(">",1)[1].split("<",1)[0] |
|
else: |
|
line_out = line |
|
if not line.strip("\n").isnumeric(): |
|
line_fin+=line_out |
|
|
|
|
|
|
|
|
|
|
|
out=f'{line_fin}' |
|
except Exception as e: |
|
print(e) |
|
out = None |
|
elif img !=None and inp == "": |
|
capture = cv2.VideoCapture(img) |
|
fps = capture.get(cv2.CAP_PROP_FPS) |
|
capture.release() |
|
out = f"{img}" |
|
return out |
|
|
|
def csv(segments): |
|
output = "" |
|
for segment in segments: |
|
output += f"{segment['start']},{segment['end']},{segment['text']}\n" |
|
return output |
|
def transcribe(path,lang,size): |
|
yield (None,[("","Transcribing Video...")]) |
|
|
|
loaded_model = whisper.load_model(size) |
|
current_size = size |
|
results = loaded_model.transcribe(path, language=lang) |
|
subs = ".csv" |
|
if subs == "None": |
|
yield results["text"],[("","Transcription Complete...")] |
|
elif subs == ".csv": |
|
yield csv(results["segments"]),[("","Transcription Complete...")] |
|
|
|
|
|
def format_prompt(message, history): |
|
prompt = "<s>" |
|
for user_prompt, bot_response in history: |
|
prompt += f"[INST] {user_prompt} [/INST]" |
|
prompt += f" {bot_response}</s> " |
|
prompt += f"[INST] {message} [/INST]" |
|
return prompt |
|
|
|
|
|
|
|
def run_gpt( |
|
prompt_template, |
|
stop_tokens, |
|
max_tokens, |
|
seed, |
|
**prompt_kwargs, |
|
): |
|
print(seed) |
|
timestamp=datetime.datetime.now() |
|
|
|
generate_kwargs = dict( |
|
temperature=0.9, |
|
max_new_tokens=max_tokens, |
|
top_p=0.95, |
|
repetition_penalty=1.0, |
|
do_sample=True, |
|
seed=seed, |
|
) |
|
|
|
content = PREFIX.format( |
|
timestamp=timestamp, |
|
purpose="Compile the provided data and complete the users task" |
|
) + prompt_template.format(**prompt_kwargs) |
|
if VERBOSE: |
|
print(LOG_PROMPT.format(content)) |
|
|
|
|
|
|
|
|
|
|
|
stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=False) |
|
resp = "" |
|
for response in stream: |
|
resp += response.token.text |
|
|
|
|
|
if VERBOSE: |
|
print(LOG_RESPONSE.format(resp)) |
|
return resp |
|
|
|
|
|
def compress_data(c, instruct, history, seed): |
|
|
|
|
|
print (f'c:: {c}') |
|
|
|
|
|
divr=int(c)/MAX_DATA |
|
divi=int(divr)+1 if divr != int(divr) else int(divr) |
|
chunk = int(int(c)/divr) |
|
print(f'chunk:: {chunk}') |
|
print(f'divr:: {divr}') |
|
print (f'divi:: {divi}') |
|
out = [] |
|
|
|
s=0 |
|
e=chunk |
|
print(f'e:: {e}') |
|
new_history="" |
|
|
|
for z in range(divi): |
|
print(f's:e :: {s}:{e}') |
|
|
|
hist = history[s:e] |
|
|
|
resp = run_gpt( |
|
COMPRESS_DATA_PROMPT_SMALL, |
|
stop_tokens=["observation:", "task:", "action:", "thought:"], |
|
max_tokens=16000, |
|
seed=seed, |
|
direction=instruct, |
|
knowledge="", |
|
history=hist, |
|
) |
|
out.append(resp) |
|
|
|
|
|
|
|
e=e+chunk |
|
s=s+chunk |
|
return out |
|
|
|
|
|
def compress_data_og(c, instruct, history, seed, MAX_DATA=MAX_DATA): |
|
|
|
|
|
print (c) |
|
|
|
|
|
divr=int(c)/MAX_DATA |
|
divi=int(divr)+1 if divr != int(divr) else int(divr) |
|
chunk = int(int(c)/divr) |
|
print(f'chunk:: {chunk}') |
|
print(f'divr:: {divr}') |
|
print (f'divi:: {divi}') |
|
out = [] |
|
|
|
s=0 |
|
e=chunk |
|
|
|
new_history="" |
|
|
|
for z in range(divi): |
|
print(f's:e :: {s}:{e}') |
|
|
|
hist = history[s:e] |
|
|
|
resp = run_gpt( |
|
COMPRESS_DATA_PROMPT, |
|
stop_tokens=[], |
|
max_tokens=16000, |
|
seed=seed, |
|
direction=instruct, |
|
knowledge=new_history, |
|
history=hist, |
|
) |
|
|
|
new_history = resp |
|
print (resp) |
|
|
|
e=e+chunk |
|
s=s+chunk-1000 |
|
|
|
print ("final" + resp) |
|
|
|
return resp |
|
|
|
|
|
|
|
def summarize(inp,history,mem_check,seed=None,data=None,MAX_DATA=MAX_DATA): |
|
if seed==None or seed=="": |
|
seed=random.randint(1,1000000000) |
|
seed=int(seed) |
|
json_box=[] |
|
error_box="" |
|
json_out={} |
|
rawp="Error" |
|
if inp == "": |
|
inp = "Process this data" |
|
history.clear() |
|
history = [(inp,"Summarizing Transcription...")] |
|
yield "",history,error_box,json_box |
|
|
|
if data != "Error" and data != "" and data != None: |
|
print(inp) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
out = str(data) |
|
print (out) |
|
rl = len(out) |
|
print(f'rl:: {rl}') |
|
c=1 |
|
for i in str(out): |
|
if i == " " or i=="," or i=="\n" or i.isalpha()==True or i.isnumeric()==True: |
|
c +=1 |
|
print (f'c2:: {c}') |
|
rawp = compress_data_og(c,inp,out,seed,MAX_DATA) |
|
history.clear() |
|
history.append((inp,rawp)) |
|
|
|
yield "", history,error_box,json_out |
|
else: |
|
rawp = "Provide a valid data source" |
|
history.clear() |
|
history.append((inp,rawp)) |
|
yield "", history,error_box,json_out |
|
|
|
|
|
|
|
def clear_fn(): |
|
return "",[(None,None)] |
|
|
|
with gr.Blocks() as app: |
|
gr.HTML("""<center><h1>Video Summarizer</h1>""") |
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Row(): |
|
inp_url = gr.Textbox(label="Video URL") |
|
url_btn = gr.Button("Load Video") |
|
vid = gr.Video() |
|
|
|
trans = gr.Textbox(interactive=True) |
|
chatbot = gr.Chatbot(label="Mixtral 8x7B Chatbot",show_copy_button=True) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=3): |
|
prompt=gr.Textbox(label = "Instructions (optional)") |
|
with gr.Column(scale=1): |
|
mem_check=gr.Checkbox(label="Memory", value=False) |
|
button=gr.Button() |
|
with gr.Row(): |
|
out_slider=gr.Slider(minimum=1000, maximum=100000, step=1, value=MAX_DATA) |
|
|
|
with gr.Row(): |
|
stop_button=gr.Button("Stop") |
|
clear_btn = gr.Button("Clear") |
|
with gr.Row(): |
|
sz = gr.Dropdown(label="Model Size", choices=sizes, value='base') |
|
lang = gr.Dropdown(label="Language (Optional)", choices=langs, value="English") |
|
json_out=gr.JSON() |
|
e_box=gr.Textbox() |
|
|
|
|
|
blank_text=gr.Textbox() |
|
url_btn.click(dl,[inp_url,vid],trans) |
|
|
|
clear_btn.click(clear_fn,None,[prompt,chatbot]) |
|
|
|
go=button.click(summarize,[prompt,chatbot,mem_check,blank_text,trans,out_slider],[prompt,chatbot,e_box,json_out]) |
|
stop_button.click(None,None,None,cancels=[go]) |
|
app.queue(default_concurrency_limit=20).launch(show_api=True, show_error=True) |