zamal commited on
Commit
04707cf
Β·
verified Β·
1 Parent(s): 9132a31

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +37 -0
  2. rag_utility.py +71 -0
  3. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from rag_utility import process_document_to_chromadb, answer_question
4
+
5
+ def process_and_store_file(file):
6
+ if file is not None:
7
+ working_dir = os.getcwd()
8
+ save_path = os.path.join(working_dir, os.path.basename(file))
9
+ with open(save_path, "wb") as f:
10
+ f.write(file.read())
11
+ process_document_to_chromadb(save_path)
12
+ return "Document Processed Successfully"
13
+ return "No file uploaded."
14
+
15
+ def get_answers(question):
16
+ if not question.strip():
17
+ return "Please enter a question.", "Please enter a question."
18
+ answer = answer_question(question)
19
+ return answer["answer_deepseek"], answer["answer_llama3"]
20
+
21
+ with gr.Blocks() as demo:
22
+ gr.Markdown("# πŸ‹ DeepSeek-R1 vs πŸ¦™ Llama-3")
23
+ with gr.Row():
24
+ file_input = gr.File(label="Upload a PDF file", file_types=[".pdf"], type="filepath")
25
+ process_button = gr.Button("Process Document")
26
+ status_output = gr.Textbox(label="Status", interactive=False)
27
+ process_button.click(process_and_store_file, inputs=file_input, outputs=status_output)
28
+
29
+ question_input = gr.Textbox(label="Ask your question from the document")
30
+ answer_button = gr.Button("Answer")
31
+ with gr.Row():
32
+ deepseek_output = gr.Markdown("### DeepSeek-r1 Response")
33
+ llama3_output = gr.Markdown("### Llama-3 Response")
34
+
35
+ answer_button.click(get_answers, inputs=question_input, outputs=[deepseek_output, llama3_output])
36
+
37
+ demo.launch()
rag_utility.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+
4
+ from langchain_community.document_loaders import UnstructuredPDFLoader
5
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
6
+ from langchain_huggingface import HuggingFaceEmbeddings
7
+ from langchain_chroma import Chroma
8
+ from langchain_groq import ChatGroq
9
+ from langchain.chains import RetrievalQA
10
+ from dotenv import load_dotenv
11
+ working_dir = os.path.dirname(os.path.abspath(__file__))
12
+ config_data = json.load(open(f"{working_dir}/config.json"))
13
+ load_dotenv()
14
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
15
+ os.environ["GROQ_API_KEY"] = GROQ_API_KEY
16
+
17
+
18
+ # loading the embedding model
19
+ embedding = HuggingFaceEmbeddings()
20
+
21
+ # loading the Deepseek-r1 70b model
22
+ deepseek_llm = ChatGroq(
23
+ model="deepseek-r1-distill-llama-70b",
24
+ temperature=0
25
+ )
26
+
27
+ # loading the llama-3 70b model
28
+ llama3_llm = ChatGroq(
29
+ model="llama-3.3-70b-versatile",
30
+ temperature=0
31
+ )
32
+
33
+
34
+ def process_document_to_chromadb(file_name):
35
+ # document directory loader
36
+ loader = UnstructuredPDFLoader(f"{working_dir}/{file_name}")
37
+ # loading the documents
38
+ documents = loader.load()
39
+ # splitting the text into
40
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
41
+ texts = text_splitter.split_documents(documents)
42
+ vectordb = Chroma.from_documents(documents=texts,
43
+ embedding=embedding,
44
+ persist_directory=f"{working_dir}/doc_vectorstore")
45
+ return 0
46
+
47
+
48
+ def answer_question(user_question):
49
+ # load the persisted database from disk, and use it as normal.
50
+ vectordb = Chroma(persist_directory=f"{working_dir}/doc_vectorstore",
51
+ embedding_function=embedding)
52
+ # retriever
53
+ retriever = vectordb.as_retriever()
54
+
55
+ # create the chain to answer questions - deepseek-r1
56
+ qa_chain_deepseek = RetrievalQA.from_chain_type(llm=deepseek_llm,
57
+ chain_type="stuff",
58
+ retriever=retriever,
59
+ return_source_documents=True)
60
+ response_deepseek = qa_chain_deepseek.invoke({"query": user_question})
61
+ answer_deepseek = response_deepseek["result"]
62
+
63
+ # create the chain to answer questions - llama3
64
+ qa_chain_llama3 = RetrievalQA.from_chain_type(llm=llama3_llm,
65
+ chain_type="stuff",
66
+ retriever=retriever,
67
+ return_source_documents=True)
68
+ response_llama3 = qa_chain_llama3.invoke({"query": user_question})
69
+ answer_llama3 = response_llama3["result"]
70
+
71
+ return {"answer_deepseek": answer_deepseek, "answer_llama3": answer_llama3}
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain-community==0.3.16
2
+ langchain==0.3.16
3
+ langchain-huggingface==0.1.2
4
+ langchain-text-splitters==0.3.5
5
+ unstructured==0.16.16
6
+ unstructured[pdf]==0.16.16
7
+ langchain-unstructured==0.1.6
8
+ langchain-chroma==0.2.1
9
+ langchain-groq==0.2.3
10
+ gradio>=4.0.0