Towhidul commited on
Commit
57a9580
·
verified ·
1 Parent(s): b65588e

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ faiss_index_medical_MedEmbed/index.faiss filter=lfs diff=lfs merge=lfs -text
37
+ faiss_index_medical_OpenAI/index.faiss filter=lfs diff=lfs merge=lfs -text
38
+ medical_documents/14.Medicine[[:space:]](1).pdf filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from PIL import Image, ImageOps
4
+ from langchain_openai import ChatOpenAI
5
+ from langchain.embeddings import HuggingFaceEmbeddings, OpenAIEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.chains import RetrievalQA
8
+ from langchain import PromptTemplate
9
+ from langchain.retrievers import ContextualCompressionRetriever
10
+ from langchain.retrievers.document_compressors import FlashrankRerank
11
+ from dotenv import load_dotenv
12
+ from langchain_community.embeddings.bedrock import BedrockEmbeddings
13
+ load_dotenv()
14
+ # Hyperparameters
15
+ PDF_CHUNK_SIZE = 1024
16
+ PDF_CHUNK_OVERLAP = 256
17
+ k = 3
18
+
19
+ # Load favicon image
20
+ def load_and_pad_image(image_path, size=(64, 64)):
21
+ img = Image.open(image_path)
22
+ return ImageOps.pad(img, size)
23
+
24
+ favicon_path = "medical.png"
25
+ favicon_image = load_and_pad_image(favicon_path)
26
+
27
+ # Streamlit Page Config
28
+ st.set_page_config(
29
+ page_title="Chatbot",
30
+ page_icon=favicon_image,
31
+ )
32
+
33
+ # Set up logo and title
34
+ col1, col2 = st.columns([1, 8])
35
+ with col1:
36
+ st.image(favicon_image)
37
+ with col2:
38
+ st.markdown(
39
+ """
40
+ <h1 style='text-align: left; margin-top: -12px;'>Chatbot</h1>
41
+ """, unsafe_allow_html=True
42
+ )
43
+
44
+ # Model and Embedding Selection
45
+ model_options = ["gpt-4o", "gpt-4o-mini", "deepseek-chat"]
46
+ selected_model = st.selectbox("Choose a GPT model", model_options)
47
+
48
+ embedding_model_options = ["OpenAI", "Huggingface MedEmbed"]
49
+ selected_embedding_model = st.selectbox("Choose an Embedding model", embedding_model_options)
50
+
51
+ # Load the model
52
+ def get_llm(selected_model):
53
+ api_key = os.getenv("DeepSeek_API_KEY") if selected_model == "deepseek-chat" else os.getenv("OPENAI_API_KEY")
54
+ return ChatOpenAI(
55
+ model=selected_model,
56
+ temperature=0,
57
+ max_tokens=None,
58
+ api_key=api_key,
59
+ )
60
+
61
+ # Cache the vector store loading
62
+ @st.cache_resource
63
+ def load_vector_store(selected_embedding_model):
64
+ if selected_embedding_model == "OpenAI":
65
+ embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key=os.getenv("OPENAI_API_KEY"))
66
+ return FAISS.load_local("faiss_index_medical_OpenAI", embeddings, allow_dangerous_deserialization=True)
67
+ else:
68
+ embeddings = HuggingFaceEmbeddings(model_name="abhinand/MedEmbed-large-v0.1")
69
+ return FAISS.load_local("faiss_index_medical_MedEmbed", embeddings, allow_dangerous_deserialization=True)
70
+
71
+ # Load the selected vector store
72
+ vector_store = load_vector_store(selected_embedding_model)
73
+ llm = get_llm(selected_model)
74
+
75
+ # Main App Logic
76
+ def main():
77
+ st.session_state['knowledge_base'] = vector_store
78
+ st.header("Ask a Question")
79
+
80
+ question = st.text_input("Enter your question")
81
+ if st.button("Get Answer"):
82
+ knowledge_base = st.session_state['knowledge_base']
83
+ retriever = knowledge_base.as_retriever(search_kwargs={"k": k})
84
+ compressor = FlashrankRerank()
85
+ compression_retriever = ContextualCompressionRetriever(
86
+ base_compressor=compressor, base_retriever=retriever
87
+ )
88
+
89
+ system_prompt = """
90
+ You are a friendly and knowledgeable assistant who is an expert in medical education...
91
+ """
92
+
93
+ template = f"""
94
+ {system_prompt}
95
+ -------------------------------
96
+ Context: {{context}}
97
+ Question: {{question}}
98
+ Answer:
99
+ """
100
+
101
+ prompt = PromptTemplate(
102
+ template=template,
103
+ input_variables=['context', 'question']
104
+ )
105
+
106
+ qa_chain = RetrievalQA.from_chain_type(
107
+ llm,
108
+ retriever=compression_retriever,
109
+ return_source_documents=True,
110
+ chain_type_kwargs={"prompt": prompt}
111
+ )
112
+
113
+ response = qa_chain.invoke({"query": question})
114
+ st.write(f"**Answer:** {response['result']}")
115
+
116
+ if __name__ == "__main__":
117
+ main()
faiss_index_medical_MedEmbed/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55c4ea45d2aa23d75f67e252d8f5e02e7b2b3c55324cc247e622752677b0ae68
3
+ size 5873709
faiss_index_medical_MedEmbed/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be1ef50261c1eb4e4526269e9839f84000a4fb76640e063be2406979aef5d4b2
3
+ size 2787575
faiss_index_medical_OpenAI/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a7de27af2193ddaa323c93cc83e4e5ec41d45776e1f9832954ab7bf92101532
3
+ size 17621037
faiss_index_medical_OpenAI/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3899b2bffd0764a966d8f25aabccd32a99b3cacecfcfd2f69e3b4cf3487e6dd3
3
+ size 2787575
medical.png ADDED
medical_documents/14.Medicine (1).pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18cebaeb63c2b575edae04918575433efc9a9e3ed6d62c60a9164218a0d46d6e
3
+ size 8776919
requirements.txt ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ boto3
2
+ awscli
3
+ chromadb==0.4.14
4
+ rank-bm25
5
+ python-docx
6
+ langchain
7
+ langchain-community
8
+ sentence-transformers
9
+ pypdf
10
+ rapidocr-onnxruntime
11
+ pymupdf
12
+ llama-index-core
13
+ streamlit
14
+ llama_index
15
+ llama-index-llms-bedrock
16
+ faiss-cpu
17
+ langchain_openai
18
+ python-dotenv
19
+ transformers
20
+ sentence-transformers
21
+ unstructured
22
+ unstructured[pdf]
23
+ pymupdf4llm
24
+ requests
25
+ beautifulsoup4
26
+ selenium
27
+ PyPDF2
28
+ playwright
29
+ #!playwright install
30
+ nest_asyncio
31
+ firecrawl
32
+ langchain-cohere
33
+ cohere-aws
34
+ flashrank
35
+ langchain-openai