midrees2806 commited on
Commit
48322d5
·
verified ·
1 Parent(s): 95c157a

Create pdf_bot.py

Browse files
Files changed (1) hide show
  1. pdf_bot.py +36 -0
pdf_bot.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from langchain.document_loaders import PyPDFLoader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.embeddings import HuggingFaceEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.chains import RetrievalQA
8
+ from langchain_community.llms import ChatGroq
9
+
10
+ load_dotenv()
11
+ groq_api_key = os.getenv("GROQ_API_KEY")
12
+
13
+ # Load PDF and prepare QA chain
14
+ def create_qa_chain_from_pdf(pdf_path):
15
+ loader = PyPDFLoader(pdf_path)
16
+ documents = loader.load()
17
+
18
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
19
+ texts = splitter.split_documents(documents)
20
+
21
+ embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-m3")
22
+ vectorstore = FAISS.from_documents(texts, embeddings)
23
+
24
+ llm = ChatGroq(
25
+ model="llama3-8b-8192",
26
+ temperature=0.3,
27
+ api_key=groq_api_key,
28
+ )
29
+
30
+ qa_chain = RetrievalQA.from_chain_type(
31
+ llm=llm,
32
+ chain_type="stuff",
33
+ retriever=vectorstore.as_retriever(search_kwargs={"k": 1}),
34
+ return_source_documents=True
35
+ )
36
+ return qa_chain