File size: 5,033 Bytes
3cf5b2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86d2a55
 
 
3cf5b2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import streamlit as st
from llama_index import VectorStoreIndex, ServiceContext, Document
from llama_index.llms import OpenAI
import openai
from llama_hub.youtube_transcript import YoutubeTranscriptReader

from llama_index import VectorStoreIndex

from langchain.embeddings import HuggingFaceEmbeddings
from llama_index import LangchainEmbedding, ServiceContext
from llama_index.llm_predictor import LLMPredictor
from langchain.llms import LlamaCpp


## For embedding the video, we will use the Hugging Face Sentence Transformers
model_name = "sentence-transformers/all-mpnet-base-v2"
hf = HuggingFaceEmbeddings(
    model_name=model_name
)

### We are using LlamaCPP to load the LLAMA-2-18 8 bit quantised model in GGUF format 
llm = LlamaCpp(
   
    model_path="codeup-llama-2-13b-chat-hf.Q8_0.gguf",
    n_gpu_layers=-1,
    n_batch=512,
    temperature=0.1,
    max_tokens=256,
    top_p=1,
    verbose=True, 
    f16_kv=True,
    n_ctx=4096,
    use_mlock=True,n_threads=4,
    stop=["Human:","User:"]

)

## Create a service context object, that will allow us to use the Hugging Face embeddings and llama 2 model as our Language model
llm_predictor=LLMPredictor(llm=llm)
embed_model = LangchainEmbedding(hf) 
service_context = ServiceContext.from_defaults(embed_model=embed_model,llm_predictor=llm_predictor)
index=None




### The load data function , takes in youtube_url and allows us to index the youtube video.


def load_data(youtube_url):
    print("In Load Data")

    if youtube_url.strip()=="":
        st.error("Enter A youtube URL")
        return None
    else:
        try:
            loader = YoutubeTranscriptReader()
            documents = loader.load_data(ytlinks=[youtube_url])
    
        
            index = VectorStoreIndex.from_documents(documents, service_context=service_context)
            return index
        except:
            print("Enter a valid youtube URL")
            st.error("Enter a valid youtube URL")
            return None

#### We will have user enter the youtube_url and press submit => which loads the index
index=None


chat_engine=None

### we initiate twp session_state object : clicked and index.
### Clicked: This is set to true when the Submit button is clicked.
### Index: This stores the vector index. By keeping this session state, we allow the index to be persistent till a new yoputube url is enteres

if 'clicked' not in st.session_state:
    st.session_state.clicked = False
if 'index' not in st.session_state:
    st.session_state.index=None

### click_button-> changes state to Truw when button is clicked 
def click_button():
    st.session_state.clicked = True
with st.sidebar:
    st.title("Youtube QA with Llama 2 Bot")
             
    st.subheader("Upload Documents/URL")
    youtube_url = st.sidebar.text_input('Enter Youtube URL', '')
    submit_btn=st.sidebar.button('Submit',on_click=click_button)
    ## When the submit button is clicked, load the data and set the index session_state to the loaded index
    if st.session_state.clicked: 
        print("Going to Load Data")
        index=load_data(youtube_url)
        st.session_state.index=index
        print("Index ",index)
        
        st.session_state.clicked=False # set it to false , so that load_data function is not called for every single user message



#print("Index",index)

print("Index State ",st.session_state.index)
### If the index has been loaded, create the chat_engine object
if st.session_state.index!=None:
    chat_engine=st.session_state.index.as_chat_engine(verbose=True,chat_mode="context",service_context=service_context)
    print("CHat engine",chat_engine) 
if "messages" not in st.session_state.keys():
    st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]

for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.write(message["content"])

def clear_chat_history():
    st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]
st.sidebar.button('Clear Chat History', on_click=clear_chat_history)

if prompt := st.chat_input():
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.write(prompt)
# Generate a new response if last message is not from assistant
if st.session_state.messages[-1]["role"] != "assistant":
    full_response = ''
    with st.chat_message("assistant"):
        with st.spinner("Thinking..."):
            print("Calling CHat Engine")
            if chat_engine!=None:
                response = chat_engine.stream_chat(prompt)
                placeholder = st.empty()
                
                for item in response.response_gen:
                    full_response += item
                    placeholder.markdown(full_response.strip("Assistant:"))
                placeholder.markdown(full_response)
    if full_response!="":
        message = {"role": "assistant", "content": full_response}
        st.session_state.messages.append(message)