Spaces:
Sleeping
Sleeping
File size: 5,033 Bytes
3cf5b2b 86d2a55 3cf5b2b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import streamlit as st
from llama_index import VectorStoreIndex, ServiceContext, Document
from llama_index.llms import OpenAI
import openai
from llama_hub.youtube_transcript import YoutubeTranscriptReader
from llama_index import VectorStoreIndex
from langchain.embeddings import HuggingFaceEmbeddings
from llama_index import LangchainEmbedding, ServiceContext
from llama_index.llm_predictor import LLMPredictor
from langchain.llms import LlamaCpp
## For embedding the video, we will use the Hugging Face Sentence Transformers
model_name = "sentence-transformers/all-mpnet-base-v2"
hf = HuggingFaceEmbeddings(
model_name=model_name
)
### We are using LlamaCPP to load the LLAMA-2-18 8 bit quantised model in GGUF format
llm = LlamaCpp(
model_path="codeup-llama-2-13b-chat-hf.Q8_0.gguf",
n_gpu_layers=-1,
n_batch=512,
temperature=0.1,
max_tokens=256,
top_p=1,
verbose=True,
f16_kv=True,
n_ctx=4096,
use_mlock=True,n_threads=4,
stop=["Human:","User:"]
)
## Create a service context object, that will allow us to use the Hugging Face embeddings and llama 2 model as our Language model
llm_predictor=LLMPredictor(llm=llm)
embed_model = LangchainEmbedding(hf)
service_context = ServiceContext.from_defaults(embed_model=embed_model,llm_predictor=llm_predictor)
index=None
### The load data function , takes in youtube_url and allows us to index the youtube video.
def load_data(youtube_url):
print("In Load Data")
if youtube_url.strip()=="":
st.error("Enter A youtube URL")
return None
else:
try:
loader = YoutubeTranscriptReader()
documents = loader.load_data(ytlinks=[youtube_url])
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
return index
except:
print("Enter a valid youtube URL")
st.error("Enter a valid youtube URL")
return None
#### We will have user enter the youtube_url and press submit => which loads the index
index=None
chat_engine=None
### we initiate twp session_state object : clicked and index.
### Clicked: This is set to true when the Submit button is clicked.
### Index: This stores the vector index. By keeping this session state, we allow the index to be persistent till a new yoputube url is enteres
if 'clicked' not in st.session_state:
st.session_state.clicked = False
if 'index' not in st.session_state:
st.session_state.index=None
### click_button-> changes state to Truw when button is clicked
def click_button():
st.session_state.clicked = True
with st.sidebar:
st.title("Youtube QA with Llama 2 Bot")
st.subheader("Upload Documents/URL")
youtube_url = st.sidebar.text_input('Enter Youtube URL', '')
submit_btn=st.sidebar.button('Submit',on_click=click_button)
## When the submit button is clicked, load the data and set the index session_state to the loaded index
if st.session_state.clicked:
print("Going to Load Data")
index=load_data(youtube_url)
st.session_state.index=index
print("Index ",index)
st.session_state.clicked=False # set it to false , so that load_data function is not called for every single user message
#print("Index",index)
print("Index State ",st.session_state.index)
### If the index has been loaded, create the chat_engine object
if st.session_state.index!=None:
chat_engine=st.session_state.index.as_chat_engine(verbose=True,chat_mode="context",service_context=service_context)
print("CHat engine",chat_engine)
if "messages" not in st.session_state.keys():
st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.write(message["content"])
def clear_chat_history():
st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]
st.sidebar.button('Clear Chat History', on_click=clear_chat_history)
if prompt := st.chat_input():
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.write(prompt)
# Generate a new response if last message is not from assistant
if st.session_state.messages[-1]["role"] != "assistant":
full_response = ''
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
print("Calling CHat Engine")
if chat_engine!=None:
response = chat_engine.stream_chat(prompt)
placeholder = st.empty()
for item in response.response_gen:
full_response += item
placeholder.markdown(full_response.strip("Assistant:"))
placeholder.markdown(full_response)
if full_response!="":
message = {"role": "assistant", "content": full_response}
st.session_state.messages.append(message)
|