krishanusinha20 commited on
Commit
d535483
·
verified ·
1 Parent(s): f38b3c4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -0
app.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ from langchain.embeddings.openai import OpenAIEmbeddings
4
+ from langchain.embeddings import CacheBackedEmbeddings
5
+ from langchain.storage import LocalFileStore
6
+
7
+ import chainlit as cl
8
+ from chainlit.playground.providers import ChatOpenAI
9
+ from dotenv import load_dotenv
10
+ load_dotenv()
11
+
12
+ from langchain.chat_models import ChatOpenAI
13
+ from langchain.prompts import ChatPromptTemplate
14
+ from langchain.schema.runnable import RunnablePassthrough
15
+ from langchain.embeddings.openai import OpenAIEmbeddings
16
+ from langchain.embeddings import CacheBackedEmbeddings
17
+ from langchain.storage import LocalFileStore
18
+ from langchain.vectorstores import Pinecone
19
+
20
+ from operator import itemgetter
21
+ import pinecone
22
+
23
+ # =============================================================================
24
+ # Retrieval Chain
25
+ # =============================================================================
26
+ def load_llm():
27
+ llm = ChatOpenAI(
28
+ model='gpt-3.5-turbo',
29
+ temperature=0.0,
30
+ openai_api_key='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'
31
+ )
32
+ return llm
33
+
34
+
35
+ def load_vectorstore():
36
+
37
+ pinecone.init(
38
+ api_key='1ac2dff1-ef00-4f27-9b83-b1e56bfa461d',
39
+ environment='gcp-starter'
40
+ )
41
+
42
+ index = pinecone.Index("youtube-index")
43
+ store = LocalFileStore("./cache/")
44
+ core_embeddings_model = OpenAIEmbeddings(openai_api_key='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')
45
+
46
+ embedder = CacheBackedEmbeddings.from_bytes_store(
47
+ core_embeddings_model,
48
+ store,
49
+ namespace=core_embeddings_model.model
50
+ )
51
+
52
+ text_field = "text"
53
+
54
+ vectorstore = Pinecone(
55
+ index,
56
+ embedder,
57
+ text_field
58
+ )
59
+
60
+ return vectorstore
61
+
62
+
63
+ def qa_chain():
64
+
65
+ vectorstore = load_vectorstore()
66
+ llm = load_llm()
67
+ retriever = vectorstore.as_retriever()
68
+
69
+ template = """You are a helpful assistant that answers questions on the provided context, if its not answered within the context respond with "This query is not directly mentioned by David Bradford in his videos" then respond to the best of your ability.
70
+ Additionally, the context includes a specific integer formatted as <int>, representing a timestamp.
71
+ In your response, include this integer as a citation, formatted as a YouTube video link: "https://www.youtube.com/watch?v=[video_id]&t=<int>s" and have the hyperlink text be the title of video.
72
+
73
+
74
+ ### CONTEXT
75
+ {context}
76
+
77
+ ### QUESTION
78
+ {question}
79
+ """
80
+
81
+ prompt = ChatPromptTemplate.from_template(template)
82
+
83
+ retrieval_augmented_qa_chain = (
84
+ {"context": itemgetter("question") | retriever,
85
+ "question": itemgetter("question")
86
+ }
87
+ | RunnablePassthrough.assign(
88
+ context=itemgetter("context")
89
+ )
90
+ | {
91
+ "response": prompt | llm,
92
+ "context": itemgetter("context"),
93
+ }
94
+ )
95
+
96
+ return retrieval_augmented_qa_chain
97
+
98
+ # =============================================================================
99
+ # Chainlit
100
+ # =============================================================================
101
+ @cl.on_chat_start
102
+ async def on_chat_start():
103
+ chain = qa_chain()
104
+ cl.user_session.set("chain", chain)
105
+ msg=cl.Message(content="What is your question for David Bradford?")
106
+ await msg.send()
107
+
108
+ @cl.on_message
109
+ async def on_message(message: cl.Message):
110
+ chain=cl.user_session.get("chain")
111
+ res = chain.invoke({"question" : message.content})
112
+
113
+ answer = res['response'].content
114
+ await cl.Message(content=answer).send()
115
+
116
+ #Use to show all source documents used
117
+ '''
118
+ source_documents = set()
119
+
120
+ for document in res['context']:
121
+ source_url = document.metadata['source_document']
122
+ source_documents.add(source_url)
123
+
124
+ combined_message = answer + "\n\nSource Documents:\n" + "\n".join(source_documents)
125
+
126
+ await cl.Message(content=combined_message).send()
127
+ '''