cmagganas commited on
Commit
870a04d
1 Parent(s): caddd8c

Create rag.py

Browse files
Files changed (1) hide show
  1. rag.py +44 -0
rag.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import openai
3
+ from langchain.chat_models import ChatOpenAI
4
+ from langchain.embeddings.openai import OpenAIEmbeddings
5
+ from langchain.vectorstores import Chroma
6
+ from langchain.chains.question_answering import load_qa_chain
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain.document_loaders import UnstructuredPDFLoader
9
+
10
+ # OpenAI API Key Setup
11
+ openai.api_key = os.environ["OPENAI_API_KEY"]
12
+
13
+ # Load The Goal PDF
14
+ loader = UnstructuredPDFLoader("data/The Goal - A Process of Ongoing Improvement (Third Revised Edition).pdf") # , mode="elements"
15
+ docs = loader.load()
16
+
17
+ # Split Text Chunks
18
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
19
+ splits = text_splitter.split_documents(docs)
20
+
21
+ # Embed Chunks into Chroma Vector Store
22
+ vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
23
+ retriever = vectorstore.as_retriever()
24
+
25
+ # Use RAG Prompt Template
26
+ prompt = hub.pull("rlm/rag-prompt")
27
+ llm = ChatOpenAI(model_name="gpt-4-1106-preview", temperature=0) # or gpt-3.5-turbo
28
+
29
+
30
+ def format_docs(docs):
31
+ return "\n\n".join(doc.page_content for doc in docs)
32
+
33
+
34
+ rag_chain = (
35
+ {"context": retriever | format_docs, "question": RunnablePassthrough()}
36
+ | prompt
37
+ | llm
38
+ | StrOutputParser()
39
+ )
40
+
41
+ for chunk in rag_chain.stream("What is a Bottleneck Constraint?"):
42
+ print(chunk, end="", flush=True)
43
+
44
+ rag_chain.invoke("What is a Bottleneck Constraint?")