sfarrukh
commited on
Commit
·
ba0df82
1
Parent(s):
6eec715
bot
Browse files- .gitignore +1 -0
- app.py +76 -4
- requirements.txt +12 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.env
|
app.py
CHANGED
@@ -1,7 +1,79 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
2 |
|
3 |
-
def greet(name):
|
4 |
-
return "Hello " + name + "!!"
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import os
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
load_dotenv()
|
5 |
|
|
|
|
|
6 |
|
7 |
+
|
8 |
+
|
9 |
+
# Use followin json data to feed to Chroma
|
10 |
+
import json
|
11 |
+
with open("data/processed/final_data_for_vectorstore.json",'r') as file:
|
12 |
+
data4chroma= json.load(file)
|
13 |
+
|
14 |
+
# Initiate vector store
|
15 |
+
# from langchain_community.vectorstores import Chroma
|
16 |
+
# from langchain_huggingface import HuggingFaceEmbeddings
|
17 |
+
# embedding_function=HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')
|
18 |
+
# vectorstore=Chroma.from_texts(texts=data4chroma['chunks'],
|
19 |
+
# embedding=embedding_function,
|
20 |
+
# ids=data4chroma["chunk_ids"],
|
21 |
+
# metadatas=data4chroma["chunk_metadatas"],
|
22 |
+
# collection_name='qual_books',
|
23 |
+
# )
|
24 |
+
|
25 |
+
|
26 |
+
from langchain import hub
|
27 |
+
prompt= hub.pull("rlm/rag-prompt")
|
28 |
+
|
29 |
+
from langchain_huggingface import HuggingFaceEndpoint
|
30 |
+
llm=HuggingFaceEndpoint(repo_id="meta-llama/Meta-Llama-3.1-70B-Instruct",
|
31 |
+
max_new_tokens=3000,
|
32 |
+
top_k=20,
|
33 |
+
top_p=0.95,
|
34 |
+
typical_p=0.95,
|
35 |
+
temperature=0.001,
|
36 |
+
repetition_penalty=1.03,
|
37 |
+
huggingfacehub_api_token=os.getenv("huggingfacehub_api_token")
|
38 |
+
)
|
39 |
+
chain = prompt | llm
|
40 |
+
|
41 |
+
|
42 |
+
context="The First Crusade (1096-1099) was launched by Pope Urban II in response to the Byzantine Emperor's request for assistance against the Seljuk Turks. It marked the beginning of a series of religious wars aimed at reclaiming Jerusalem and other holy sites in the Near East. Crusaders from various European kingdoms, motivated by religious zeal and the promise of eternal salvation, marched toward the Levant. After capturing several cities, they laid siege to Jerusalem in 1099, eventually capturing the city and establishing several Crusader states. The First Crusade was followed by many others, as the struggle for control of the Holy Land continued for centuries."
|
43 |
+
|
44 |
+
def respond(
|
45 |
+
query: str,
|
46 |
+
data_type: str = "Preprocessed doc",
|
47 |
+
llm_chain = chain,
|
48 |
+
context=context
|
49 |
+
# vectorstore=vectorstore
|
50 |
+
):
|
51 |
+
"""
|
52 |
+
Generate a response to a user query using document retrieval and language model
|
53 |
+
completion
|
54 |
+
Parameters:
|
55 |
+
chatbot (List): List representing the chatbot's conversation history.
|
56 |
+
message (str): The user's query.
|
57 |
+
data_type (str): Type of data used for document retrieval
|
58 |
+
temperature (float);
|
59 |
+
Returns:
|
60 |
+
Tuple: A tuple containing an empty string, the updated chat history,
|
61 |
+
and reference from retrieved documents
|
62 |
+
"""
|
63 |
+
# Retrieve embedding function from code env resources
|
64 |
+
|
65 |
+
if data_type=="Preprocessed doc":
|
66 |
+
# retriever=vectorstore.as_retriever(search_type="mmr",
|
67 |
+
# search_kwargs={"k":10,"fetch_k":100})
|
68 |
+
# retrieved_docs=retriever.invoke(query)
|
69 |
+
|
70 |
+
|
71 |
+
# input_2_chain={"context": retrieved_docs, "question":query}
|
72 |
+
input_2_chain={"context": context, "question":query}
|
73 |
+
|
74 |
+
response=llm_chain.invoke(input_2_chain)
|
75 |
+
return response
|
76 |
+
|
77 |
+
|
78 |
+
demo = gr.Interface(fn=respond, inputs="text", outputs="text")
|
79 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain_ollama
|
2 |
+
pymupdf
|
3 |
+
chromadb
|
4 |
+
langchain
|
5 |
+
langchain_community
|
6 |
+
langchain-huggingface
|
7 |
+
langchain_chroma
|
8 |
+
gradio
|
9 |
+
gradio_client
|
10 |
+
torch
|
11 |
+
pyprojroot
|
12 |
+
python-dotenv
|