import streamlit as st from llama_index import VectorStoreIndex, ServiceContext, Document from llama_index.llms import OpenAI import openai from llama_hub.youtube_transcript import YoutubeTranscriptReader from llama_index import VectorStoreIndex from langchain.embeddings import HuggingFaceEmbeddings from llama_index import LangchainEmbedding, ServiceContext from llama_index.llm_predictor import LLMPredictor from langchain.llms import LlamaCpp ## For embedding the video, we will use the Hugging Face Sentence Transformers model_name = "sentence-transformers/all-mpnet-base-v2" hf = HuggingFaceEmbeddings( model_name=model_name ) ### We are using LlamaCPP to load the LLAMA-2-18 8 bit quantised model in GGUF format llm = LlamaCpp( model_path="codeup-llama-2-13b-chat-hf.Q8_0.gguf", n_gpu_layers=-1, n_batch=512, temperature=0.1, max_tokens=256, top_p=1, verbose=True, f16_kv=True, n_ctx=4096, use_mlock=True,n_threads=4, stop=["Human:","User:"] ) ## Create a service context object, that will allow us to use the Hugging Face embeddings and llama 2 model as our Language model llm_predictor=LLMPredictor(llm=llm) embed_model = LangchainEmbedding(hf) service_context = ServiceContext.from_defaults(embed_model=embed_model,llm_predictor=llm_predictor) index=None ### The load data function , takes in youtube_url and allows us to index the youtube video. def load_data(youtube_url): print("In Load Data") if youtube_url.strip()=="": st.error("Enter A youtube URL") return None else: try: loader = YoutubeTranscriptReader() documents = loader.load_data(ytlinks=[youtube_url]) index = VectorStoreIndex.from_documents(documents, service_context=service_context) return index except: print("Enter a valid youtube URL") st.error("Enter a valid youtube URL") return None #### We will have user enter the youtube_url and press submit => which loads the index index=None chat_engine=None ### we initiate twp session_state object : clicked and index. ### Clicked: This is set to true when the Submit button is clicked. ### Index: This stores the vector index. By keeping this session state, we allow the index to be persistent till a new yoputube url is enteres if 'clicked' not in st.session_state: st.session_state.clicked = False if 'index' not in st.session_state: st.session_state.index=None ### click_button-> changes state to Truw when button is clicked def click_button(): st.session_state.clicked = True with st.sidebar: st.title("Youtube QA with Llama 2 Bot") st.subheader("Upload Documents/URL") youtube_url = st.sidebar.text_input('Enter Youtube URL', '') submit_btn=st.sidebar.button('Submit',on_click=click_button) ## When the submit button is clicked, load the data and set the index session_state to the loaded index if st.session_state.clicked: print("Going to Load Data") index=load_data(youtube_url) st.session_state.index=index print("Index ",index) st.session_state.clicked=False # set it to false , so that load_data function is not called for every single user message #print("Index",index) print("Index State ",st.session_state.index) ### If the index has been loaded, create the chat_engine object if st.session_state.index!=None: chat_engine=st.session_state.index.as_chat_engine(verbose=True,chat_mode="context",service_context=service_context) print("CHat engine",chat_engine) if "messages" not in st.session_state.keys(): st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}] for message in st.session_state.messages: with st.chat_message(message["role"]): st.write(message["content"]) def clear_chat_history(): st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}] st.sidebar.button('Clear Chat History', on_click=clear_chat_history) if prompt := st.chat_input(): st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.write(prompt) # Generate a new response if last message is not from assistant if st.session_state.messages[-1]["role"] != "assistant": full_response = '' with st.chat_message("assistant"): with st.spinner("Thinking..."): print("Calling CHat Engine") if chat_engine!=None: response = chat_engine.stream_chat(prompt) placeholder = st.empty() for item in response.response_gen: full_response += item placeholder.markdown(full_response.strip("Assistant:")) placeholder.markdown(full_response) if full_response!="": message = {"role": "assistant", "content": full_response} st.session_state.messages.append(message)