cmagganas commited on
Commit
18b3349
1 Parent(s): b0da961

Delete rag.py

Browse files
Files changed (1) hide show
  1. rag.py +0 -44
rag.py DELETED
@@ -1,44 +0,0 @@
1
- import os
2
- import openai
3
- from langchain.chat_models import ChatOpenAI
4
- from langchain.embeddings.openai import OpenAIEmbeddings
5
- from langchain.vectorstores import Chroma
6
- from langchain.chains.question_answering import load_qa_chain
7
- from langchain.text_splitter import RecursiveCharacterTextSplitter
8
- from langchain.document_loaders import UnstructuredPDFLoader
9
-
10
- # OpenAI API Key Setup
11
- openai.api_key = os.environ["OPENAI_API_KEY"]
12
-
13
- # Load The Goal PDF
14
- loader = UnstructuredPDFLoader("data/The Goal - A Process of Ongoing Improvement (Third Revised Edition).pdf") # , mode="elements"
15
- docs = loader.load()
16
-
17
- # Split Text Chunks
18
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
19
- splits = text_splitter.split_documents(docs)
20
-
21
- # Embed Chunks into Chroma Vector Store
22
- vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
23
- retriever = vectorstore.as_retriever()
24
-
25
- # Use RAG Prompt Template
26
- prompt = hub.pull("rlm/rag-prompt")
27
- llm = ChatOpenAI(model_name="gpt-4-1106-preview", temperature=0) # or gpt-3.5-turbo
28
-
29
-
30
- def format_docs(docs):
31
- return "\n\n".join(doc.page_content for doc in docs)
32
-
33
-
34
- rag_chain = (
35
- {"context": retriever | format_docs, "question": RunnablePassthrough()}
36
- | prompt
37
- | llm
38
- | StrOutputParser()
39
- )
40
-
41
- for chunk in rag_chain.stream("What is a Bottleneck Constraint?"):
42
- print(chunk, end="", flush=True)
43
-
44
- rag_chain.invoke("What is a Bottleneck Constraint?")