File size: 9,705 Bytes
7530e54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53f694a
7530e54
 
 
 
 
 
 
 
 
 
66210a7
 
686cebc
03403af
4849fc7
 
 
 
 
 
 
 
19942c0
d94ea1f
4849fc7
66210a7
 
 
 
4849fc7
7530e54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ccd660
 
7530e54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53f694a
7530e54
 
 
 
 
 
 
 
 
 
 
 
 
4ccd660
43600f7
4ccd660
7530e54
 
 
 
 
 
 
 
 
 
 
 
 
 
3bf3810
7530e54
 
 
 
 
 
 
 
 
4240c7e
53f694a
7530e54
 
 
 
 
 
 
 
b2ed74f
7530e54
fa1a529
7530e54
 
 
38f5734
7530e54
 
 
43600f7
d21a58e
 
fcec191
7530e54
 
 
 
 
 
 
 
 
 
 
 
cae13b6
 
 
 
 
 
 
 
 
 
 
 
7530e54
cae13b6
a0b431c
 
 
 
 
 
 
 
43600f7
7530e54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43600f7
5791048
7530e54
 
 
 
 
 
 
 
 
 
 
ed750e7
a4d5c16
7530e54
 
a4d5c16
43600f7
7530e54
88b3a3f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
import gradio as gr
import requests
import os
from huggingface_hub import InferenceClient,HfApi
import random
import json
import datetime
import uuid
import yt_dlp
import cv2
import whisper

from agent import (
    PREFIX,
    COMPRESS_DATA_PROMPT,
    COMPRESS_DATA_PROMPT_SMALL,
    LOG_PROMPT,
    LOG_RESPONSE,
)
client = InferenceClient(
    "mistralai/Mixtral-8x7B-Instruct-v0.1"
)

#save_data=f'https://huggingface.co/datasets/{reponame}/raw/main/'
#token_self = os.environ['HF_TOKEN']
#api=HfApi(token=token_self)

sizes = list(whisper._MODELS.keys())
langs = ["none"] + sorted(list(whisper.tokenizer.LANGUAGES.values()))
current_size = "base"
loaded_model = whisper.load_model(current_size)

VERBOSE = True
MAX_HISTORY = 100
MAX_DATA = 16000

def dl(inp,img):
    uid=uuid.uuid4()
    fps="Error"
    out = None
    out_file=[]
    if img == None and inp !="":
        try:
            inp_out=inp.replace("https://","")
            inp_out=inp_out.replace("/","_").replace(".","_").replace("=","_").replace("?","_")
 
            #os.system(f'yt-dlp "{inp}" --trim-filenames 160 -o "{uid}/{inp_out}.mp4" -S res,mp4 --recode mp4')  
            os.system(f'yt-dlp --skip-download --write-subs --write-auto-subs --sub-lang en --sub-format ttml --convert-subs srt "{inp}" -o "{uid}/{inp_out}"')    
            f = open(f"{uid}/{inp_out}.en.srt")
            ft=f.readlines()
            line_fin=""
            line_out=""
            for line in ft:
                if "<" in line:
                    line_out = line.split(">",1)[1].split("<",1)[0]
                else:
                    line_out = line
                if not line.strip("\n").isnumeric():
                    line_fin+=line_out
            #print(ft)
            #out = f"{uid}/{inp_out}.mp4"
            #capture = cv2.VideoCapture(out)
            #fps = capture.get(cv2.CAP_PROP_FPS)
            #capture.release()
            out=f'{line_fin}'
        except Exception as e:
            print(e)
            out = None
    elif img !=None and inp == "":
        capture = cv2.VideoCapture(img)
        fps = capture.get(cv2.CAP_PROP_FPS)
        capture.release()
        out = f"{img}"
    return out
  
def csv(segments):
    output = ""
    for segment in segments:
      output += f"{segment['start']},{segment['end']},{segment['text']}\n"
    return output
def transcribe(path,lang,size):
    yield (None,[("","Transcribing Video...")])
    #if size != current_size:
    loaded_model = whisper.load_model(size)
    current_size = size
    results = loaded_model.transcribe(path, language=lang)
    subs = ".csv"
    if subs == "None":
      yield results["text"],[("","Transcription Complete...")]
    elif subs == ".csv":
      yield csv(results["segments"]),[("","Transcription Complete...")]


def format_prompt(message, history):
  prompt = "<s>"
  for user_prompt, bot_response in history:
    prompt += f"[INST] {user_prompt} [/INST]"
    prompt += f" {bot_response}</s> "
  prompt += f"[INST] {message} [/INST]"
  return prompt



def run_gpt(
    prompt_template,
    stop_tokens,
    max_tokens,
    seed,
    **prompt_kwargs,
):
    print(seed)
    timestamp=datetime.datetime.now()
    
    generate_kwargs = dict(
        temperature=0.9,
        max_new_tokens=max_tokens,
        top_p=0.95,
        repetition_penalty=1.0,
        do_sample=True,
        seed=seed,
    )
    
    content = PREFIX.format(
        timestamp=timestamp,
        purpose="Compile the provided data and complete the users task"
    ) + prompt_template.format(**prompt_kwargs)
    if VERBOSE:
        print(LOG_PROMPT.format(content))
    
    
    #formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
    #formatted_prompt = format_prompt(f'{content}', history)

    stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=False)
    resp = ""
    for response in stream:
        resp += response.token.text
        #yield resp

    if VERBOSE:
        print(LOG_RESPONSE.format(resp))
    return resp

    
def compress_data(c, instruct, history, seed):
    #seed=random.randint(1,1000000000)
    
    print (f'c:: {c}')
    #tot=len(purpose)
    #print(tot)
    divr=int(c)/MAX_DATA
    divi=int(divr)+1 if divr != int(divr) else int(divr)
    chunk = int(int(c)/divr)
    print(f'chunk:: {chunk}')
    print(f'divr:: {divr}')
    print (f'divi:: {divi}')
    out = []
    #out=""
    s=0
    e=chunk
    print(f'e:: {e}')
    new_history=""
    #task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n'
    for z in range(divi):
        print(f's:e :: {s}:{e}')
        
        hist = history[s:e]
        
        resp = run_gpt(
            COMPRESS_DATA_PROMPT_SMALL,
            stop_tokens=["observation:", "task:", "action:", "thought:"],
            max_tokens=16000,
            seed=seed,
            direction=instruct,
            knowledge="",
            history=hist,
        )
        out.append(resp)
        #new_history = resp
        #print (resp)
        #out+=resp
        e=e+chunk
        s=s+chunk
    return out


def compress_data_og(c, instruct, history, seed, MAX_DATA=MAX_DATA):
    #seed=random.randint(1,1000000000)
    
    print (c)
    #tot=len(purpose)
    #print(tot)
    divr=int(c)/MAX_DATA
    divi=int(divr)+1 if divr != int(divr) else int(divr)
    chunk = int(int(c)/divr)
    print(f'chunk:: {chunk}')
    print(f'divr:: {divr}')
    print (f'divi:: {divi}')
    out = []
    #out=""
    s=0
    e=chunk
    #print(f'e:: {e}')
    new_history=""
    #task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n'
    for z in range(divi):
        print(f's:e :: {s}:{e}')
        
        hist = history[s:e]
        
        resp = run_gpt(
            COMPRESS_DATA_PROMPT,
            stop_tokens=[],
            max_tokens=16000,
            seed=seed,
            direction=instruct,
            knowledge=new_history,
            history=hist,
        )
        
        new_history = resp
        print (resp)
        #out+=resp
        e=e+chunk
        s=s+chunk-1000

    print ("final" + resp)
    #history = "observation: {}\n".format(resp)
    return resp



def summarize(inp,history,mem_check,seed=None,data=None,MAX_DATA=MAX_DATA):
    if seed==None or seed=="":
        seed=random.randint(1,1000000000)
    seed=int(seed)
    json_box=[]
    error_box=""
    json_out={}
    rawp="Error"
    if inp == "":
        inp = "Process this data"
    history.clear()
    history = [(inp,"Summarizing Transcription...")] 
    yield "",history,error_box,json_box

    if data != "Error" and data != "" and data != None:
        print(inp)
        #out = str(data)
        #rl = len(out)
        #print(f'rl:: {rl}')
        #c=1
        #for i in str(out):
        #    #print(f'i:: {i}')
        #    if i == " " or i=="," or i=="\n" or i.isalpha()==True or i.isnumeric()==True:
        #        c +=1
        #print (f'c:: {c}')
        #json_out = compress_data(c,inp,out,seed)  
        #history = [(inp,"Generating Report...")] 
        #yield "", history,error_box,json_out

        out = str(data)
        print (out)
        rl = len(out)
        print(f'rl:: {rl}')
        c=1
        for i in str(out):
            if i == " " or i=="," or i=="\n" or i.isalpha()==True or i.isnumeric()==True:
                c +=1
        print (f'c2:: {c}')
        rawp = compress_data_og(c,inp,out,seed,MAX_DATA)
        history.clear()
        history.append((inp,rawp))                

        yield "", history,error_box,json_out
    else:
        rawp = "Provide a valid data source"
        history.clear()
        history.append((inp,rawp))
        yield "", history,error_box,json_out

    
#################################
def clear_fn():
    return "",[(None,None)]

with gr.Blocks() as app:
    gr.HTML("""<center><h1>Video Summarizer</h1>""")
    with gr.Row():
        with gr.Column():
            with gr.Row():
                inp_url = gr.Textbox(label="Video URL")
                url_btn = gr.Button("Load Video")
            vid = gr.Video()
        #trans_btn=gr.Button("Transcribe")
        trans = gr.Textbox(interactive=True)
    chatbot = gr.Chatbot(label="Mixtral 8x7B Chatbot",show_copy_button=True)
    
    with gr.Row():
        with gr.Column(scale=3):
            prompt=gr.Textbox(label = "Instructions (optional)")
        with gr.Column(scale=1):
            mem_check=gr.Checkbox(label="Memory", value=False)
            button=gr.Button()
    with gr.Row():
        out_slider=gr.Slider(minimum=1000, maximum=100000, step=1, value=MAX_DATA)
        #models_dd=gr.Dropdown(choices=[m for m in return_list],interactive=True)
    with gr.Row():
        stop_button=gr.Button("Stop")
        clear_btn = gr.Button("Clear")
    with gr.Row():
        sz = gr.Dropdown(label="Model Size", choices=sizes, value='base')
        lang = gr.Dropdown(label="Language (Optional)", choices=langs, value="English")
    json_out=gr.JSON()
    e_box=gr.Textbox()
    #text=gr.JSON()
    #inp_query.change(search_models,inp_query,models_dd)
    blank_text=gr.Textbox()
    url_btn.click(dl,[inp_url,vid],trans)
    #trans_btn.click(transcribe,[vid,lang,sz],trans)
    clear_btn.click(clear_fn,None,[prompt,chatbot])
    #go=button.click(transcribe,[vid,lang,sz],[trans,chatbot]).then(summarize,[prompt,chatbot,mem_check,trans],[prompt,chatbot,e_box,json_out])
    go=button.click(summarize,[prompt,chatbot,mem_check,blank_text,trans,out_slider],[prompt,chatbot,e_box,json_out])
    stop_button.click(None,None,None,cancels=[go])
app.queue(default_concurrency_limit=20).launch(show_api=True, show_error=True)