Spaces:

zamal
/

Deepseek-R1-vs-LLama3

Running

App Files Files Community

zamal commited on Jan 31

Commit

04707cf

verified ·

1 Parent(s): 9132a31

Upload 3 files

Browse files

Files changed (3) hide show

app.py +37 -0
rag_utility.py +71 -0
requirements.txt +10 -0

app.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import os
+import gradio as gr
+from rag_utility import process_document_to_chromadb, answer_question
+def process_and_store_file(file):
+    if file is not None:
+        working_dir = os.getcwd()
+        save_path = os.path.join(working_dir, os.path.basename(file))
+        with open(save_path, "wb") as f:
+            f.write(file.read())
+        process_document_to_chromadb(save_path)
+        return "Document Processed Successfully"
+    return "No file uploaded."
+def get_answers(question):
+    if not question.strip():
+        return "Please enter a question.", "Please enter a question."
+    answer = answer_question(question)
+    return answer["answer_deepseek"], answer["answer_llama3"]
+with gr.Blocks() as demo:
+    gr.Markdown("# 🐋 DeepSeek-R1 vs 🦙 Llama-3")
+    with gr.Row():
+        file_input = gr.File(label="Upload a PDF file", file_types=[".pdf"], type="filepath")
+        process_button = gr.Button("Process Document")
+    status_output = gr.Textbox(label="Status", interactive=False)
+    process_button.click(process_and_store_file, inputs=file_input, outputs=status_output)
+    question_input = gr.Textbox(label="Ask your question from the document")
+    answer_button = gr.Button("Answer")
+    with gr.Row():
+        deepseek_output = gr.Markdown("### DeepSeek-r1 Response")
+        llama3_output = gr.Markdown("### Llama-3 Response")
+    answer_button.click(get_answers, inputs=question_input, outputs=[deepseek_output, llama3_output])
+demo.launch()

rag_utility.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import os
+import json
+from langchain_community.document_loaders import UnstructuredPDFLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_chroma import Chroma
+from langchain_groq import ChatGroq
+from langchain.chains import RetrievalQA
+from dotenv import load_dotenv
+working_dir = os.path.dirname(os.path.abspath(__file__))
+config_data = json.load(open(f"{working_dir}/config.json"))
+load_dotenv()
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+os.environ["GROQ_API_KEY"] = GROQ_API_KEY
+# loading the embedding model
+embedding = HuggingFaceEmbeddings()
+# loading the Deepseek-r1 70b model
+deepseek_llm = ChatGroq(
+    model="deepseek-r1-distill-llama-70b",
+    temperature=0
+)
+# loading the llama-3 70b model
+llama3_llm = ChatGroq(
+    model="llama-3.3-70b-versatile",
+    temperature=0
+)
+def process_document_to_chromadb(file_name):
+    # document directory loader
+    loader = UnstructuredPDFLoader(f"{working_dir}/{file_name}")
+    # loading the documents
+    documents = loader.load()
+    # splitting the text into
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
+    texts = text_splitter.split_documents(documents)
+    vectordb = Chroma.from_documents(documents=texts,
+                                     embedding=embedding,
+                                     persist_directory=f"{working_dir}/doc_vectorstore")
+    return 0
+def answer_question(user_question):
+    # load the persisted database from disk, and use it as normal.
+    vectordb = Chroma(persist_directory=f"{working_dir}/doc_vectorstore",
+                      embedding_function=embedding)
+    # retriever
+    retriever = vectordb.as_retriever()
+    # create the chain to answer questions - deepseek-r1
+    qa_chain_deepseek = RetrievalQA.from_chain_type(llm=deepseek_llm,
+                                           chain_type="stuff",
+                                           retriever=retriever,
+                                           return_source_documents=True)
+    response_deepseek = qa_chain_deepseek.invoke({"query": user_question})
+    answer_deepseek = response_deepseek["result"]
+    # create the chain to answer questions - llama3
+    qa_chain_llama3 = RetrievalQA.from_chain_type(llm=llama3_llm,
+                                                    chain_type="stuff",
+                                                    retriever=retriever,
+                                                    return_source_documents=True)
+    response_llama3 = qa_chain_llama3.invoke({"query": user_question})
+    answer_llama3 = response_llama3["result"]
+    return {"answer_deepseek": answer_deepseek, "answer_llama3": answer_llama3}

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+langchain-community==0.3.16
+langchain==0.3.16
+langchain-huggingface==0.1.2
+langchain-text-splitters==0.3.5
+unstructured==0.16.16
+unstructured[pdf]==0.16.16
+langchain-unstructured==0.1.6
+langchain-chroma==0.2.1
+langchain-groq==0.2.3
+gradio>=4.0.0