import gradio as gr from youtube_transcript_api import YouTubeTranscriptApi from transformers import AutoTokenizer from transformers import pipeline from transformers import AutoModelForQuestionAnswering import pandas as pd from sentence_transformers import SentenceTransformer, util import torch #from IPython.display import HTML, IFrame #from IPython.display import YouTubeVideo #input - video link, output - full transcript def get_transcript(link): print("******** Inside get_transcript ********") print(f"link to be extracted is : {link}") video_id = link.split("=")[1] print(f"video id extracted is : {video_id}") transcript = YouTubeTranscriptApi.get_transcript(video_id) FinalTranscript = ' '.join([i['text'] for i in transcript]) return FinalTranscript,transcript, video_id #input - question and transcript, output - answer timestamp def get_answers_timestamp(question, final_transcript, transcript): print("******** Inside get_answers_timestamp ********") model_ckpt = "deepset/minilm-uncased-squad2" tokenizer = AutoTokenizer.from_pretrained(model_ckpt) #question = "any funny examples in video??" context = final_transcript print(f"Input Question is : {question}") print(f"Type of trancript is : {type(context)}, Length of transcript is : {len(context)}") inputs = tokenizer(question, context, return_overflowing_tokens=True, max_length=512, stride = 25) #overlaps #getting a list of contexts available after striding contx=[] for window in inputs["input_ids"]: #print(f"{tokenizer.decode(window)} \n") contx.append(tokenizer.decode(window).split('[SEP]')[1].strip()) #print(ques) #print(contx) model = AutoModelForQuestionAnswering.from_pretrained(model_ckpt) lst=[] pipe = pipeline("question-answering", model=model, tokenizer=tokenizer) for contexts in contx: #print(pipe(question=question, context=contexts)) lst.append(pipe(question=question, context=contexts)) lst_scores = [dicts['score'] for dicts in lst] #print(lst_scores) #getting highest and second highest scores idxmax = lst_scores.index(max(lst_scores)) lst_scores.remove(max(lst_scores)) idxmax2 = lst_scores.index(max(lst_scores)) #idxmax, idxmax2 idxcont = lst[idxmax2]['start'] answer = final_transcript[len(contx[0])-135 + idxcont:] sentence_keyword = answer[:50] dftranscript = pd.DataFrame(transcript) #dftranscript.head() modelST = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') embedding_1= modelST.encode(dftranscript.text, convert_to_tensor=True) embedding_2 = modelST.encode(sentence_keyword, convert_to_tensor=True) similarity_tensor = util.pytorch_cos_sim(embedding_1, embedding_2) idx = torch.argmax(similarity_tensor) start_timestamp = dftranscript.iloc[[int(idx)+1]].start.values[0] start_timestamp = round(start_timestamp) return start_timestamp def display_vid(url, question): print("******** display_vid ********") #https://www.youtube.com/watch?v=smUHQndcmOY&t=425s #html = HTML("") #html = "" #get embedding and youtube link html_in = "" #print(html) #get transcript final_transcript, transcript, video_id = get_transcript(url) #get answer timestamp #input - question and transcript, output - answer timestamp ans_timestamp = get_answers_timestamp(question, final_transcript, transcript) #created embedding #sample - smUHQndcmOY?start=234 html_out = "" print(f"html output is : {html_out}") return html_out def set_example_question(example): return gr.Radio.update(value=example[0]) demo = gr.Blocks() with demo: gr.Markdown("