Genzo1010 commited on
Commit
0532c05
Β·
1 Parent(s): 9de0a6d

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +2 -13
  2. app.py +113 -0
  3. off_load.py +163 -0
  4. requirements.txt +13 -0
README.md CHANGED
@@ -1,13 +1,2 @@
1
- ---
2
- title: CosmicNexus
3
- emoji: 😻
4
- colorFrom: yellow
5
- colorTo: gray
6
- sdk: streamlit
7
- sdk_version: 1.28.1
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # medbot
2
+ A chatbot that summarizes the patient's report and let them chat on the data
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_chat import message
3
+ from langchain.chains import ConversationalRetrievalChain
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.llms import LlamaCpp
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.memory import ConversationBufferMemory
9
+ from langchain.document_loaders import PyPDFLoader
10
+ import os
11
+ import tempfile
12
+
13
+
14
+
15
+
16
+ def initialize_session_state():
17
+ if 'history' not in st.session_state:
18
+ st.session_state['history'] = []
19
+
20
+ if 'generated' not in st.session_state:
21
+ st.session_state['generated'] = ["Hello! Ask me anything about πŸ€—"]
22
+
23
+ if 'past' not in st.session_state:
24
+ st.session_state['past'] = ["Hey! πŸ‘‹"]
25
+
26
+ def conversation_chat(query, chain, history):
27
+ result = chain({"question": query, "chat_history": history})
28
+ history.append((query, result["answer"]))
29
+ return result["answer"]
30
+
31
+ def display_chat_history(chain):
32
+ reply_container = st.container()
33
+ container = st.container()
34
+
35
+ with container:
36
+ with st.form(key='my_form', clear_on_submit=True):
37
+ user_input = st.text_input("Question:", placeholder="Ask about your PDF", key='input')
38
+ submit_button = st.form_submit_button(label='Send')
39
+
40
+ if submit_button and user_input:
41
+ with st.spinner('Generating response...'):
42
+ output = conversation_chat(user_input, chain, st.session_state['history'])
43
+
44
+ st.session_state['past'].append(user_input)
45
+ st.session_state['generated'].append(output)
46
+
47
+ if st.session_state['generated']:
48
+ with reply_container:
49
+ for i in range(len(st.session_state['generated'])):
50
+ message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="thumbs")
51
+ message(st.session_state["generated"][i], key=str(i), avatar_style="fun-emoji")
52
+
53
+ def create_conversational_chain(vector_store):
54
+ # Create llm
55
+ llm = LlamaCpp(
56
+ streaming = True,
57
+ model_path="mistral-7b-instruct-v0.1.Q2_K.gguf",
58
+ temperature=0.75,
59
+ top_p=1,
60
+ verbose=True,
61
+ n_ctx=4096
62
+ )
63
+
64
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
65
+
66
+ chain = ConversationalRetrievalChain.from_llm(llm=llm, chain_type='stuff',
67
+ retriever=vector_store.as_retriever(search_kwargs={"k": 2}),
68
+ memory=memory)
69
+ return chain
70
+
71
+ def main():
72
+ # Initialize session state
73
+ initialize_session_state()
74
+ st.title("MedReport Summarizer:")
75
+ # Initialize Streamlit
76
+ st.sidebar.title("Document Processing")
77
+ uploaded_files = st.sidebar.file_uploader("Upload files", accept_multiple_files=True)
78
+
79
+
80
+ if uploaded_files:
81
+ text = []
82
+ for file in uploaded_files:
83
+ file_extension = os.path.splitext(file.name)[1]
84
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
85
+ temp_file.write(file.read())
86
+ temp_file_path = temp_file.name
87
+
88
+ loader = None
89
+ if file_extension == ".pdf":
90
+ loader = PyPDFLoader(temp_file_path)
91
+
92
+ if loader:
93
+ text.extend(loader.load())
94
+ os.remove(temp_file_path)
95
+
96
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=20)
97
+ text_chunks = text_splitter.split_documents(text)
98
+
99
+ # Create embeddings
100
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
101
+ model_kwargs={'device': 'cpu'})
102
+
103
+ # Create vector store
104
+ vector_store = FAISS.from_documents(text_chunks, embedding=embeddings)
105
+
106
+ # Create the chain object
107
+ chain = create_conversational_chain(vector_store)
108
+
109
+
110
+ display_chat_history(chain)
111
+
112
+ if __name__ == "__main__":
113
+ main()
off_load.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import logging
3
+ from streamlit_chat import message
4
+ from langchain.chains import ConversationalRetrievalChain
5
+ from langchain.embeddings import HuggingFaceEmbeddings, CacheBackedEmbeddings, HuggingFaceInstructEmbeddings
6
+ from langchain.llms import LlamaCpp
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.memory import ConversationBufferMemory
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from langchain.document_loaders import PyPDFLoader
11
+ from langchain.storage import LocalFileStore
12
+ from langchain.llms import HuggingFaceHub
13
+ from langchain.embeddings import HuggingFaceInstructEmbeddings
14
+
15
+ from datetime import datetime
16
+ import os
17
+ import tempfile
18
+ import requests # Import requests here
19
+
20
+ now = datetime.now()
21
+ underlying_embeddings = HuggingFaceEmbeddings()
22
+
23
+
24
+
25
+ def initialize_session_state():
26
+ if 'history' not in st.session_state:
27
+ st.session_state['history'] = []
28
+
29
+ if 'generated' not in st.session_state:
30
+ st.session_state['generated'] = ["Hello! Ask me anything about πŸ€—"]
31
+
32
+ if 'past' not in st.session_state:
33
+ st.session_state['past'] = ["Hey! πŸ‘‹"]
34
+
35
+ def conversation_chat(query, chain, history):
36
+ result = chain({"question": query, "chat_history": history})
37
+ history.append((query, result["answer"]))
38
+ return result["answer"]
39
+
40
+ def cache_checker(question, question_cache, chain):
41
+ # Check if the response is already cached
42
+ logging.info("I'm here")
43
+ if question in question_cache:
44
+ response = question_cache[question]
45
+ logging.info("Response retrieved from cache.")
46
+ else:
47
+ # Perform the Q&A operation
48
+ response = chain({"question": question})
49
+ question_cache[question] = response["answer"]
50
+ logging.info("Response computed and cached.")
51
+
52
+ return response["answer"]
53
+
54
+ def display_chat_history(chain):
55
+ reply_container = st.container()
56
+ container = st.container()
57
+ question_cache = {}
58
+ with container:
59
+ with st.form(key='my_form', clear_on_submit=True):
60
+ user_input = st.text_input("Question:", placeholder="Ask about your PDF", key='input')
61
+ submit_button = st.form_submit_button(label='Send')
62
+
63
+ if submit_button and user_input:
64
+ with st.spinner('Generating response...'):
65
+ output = conversation_chat(user_input, chain, st.session_state['history'])
66
+ # Check if the question is being cached
67
+ if user_input:
68
+ if user_input in question_cache:
69
+ st.info("Response retrieved from cache.")
70
+ response = question_cache[user_input]
71
+ else:
72
+ st.info("Response computed.")
73
+ response = cache_checker(user_input, question_cache, chain)
74
+ question_cache[user_input] = response
75
+
76
+ # Display the response
77
+ st.write("Response:", response)
78
+
79
+ st.session_state['past'].append(user_input)
80
+ st.session_state['generated'].append(output)
81
+
82
+ if st.session_state['generated']:
83
+ with reply_container:
84
+ for i in range(len(st.session_state['generated'])):
85
+ message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="thumbs")
86
+ message(st.session_state["generated"][i], key=str(i), avatar_style="fun-emoji")
87
+
88
+ def create_conversational_chain(vector_store):
89
+ # Create llm
90
+ llm = LlamaCpp(
91
+ streaming=True,
92
+ model_path="mistral-7b-instruct-v0.1.Q2_K.gguf",
93
+ temperature=0.75,
94
+ top_p=1,
95
+ verbose=True,
96
+ n_ctx=4096
97
+ )
98
+ # llm = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={
99
+ # "temperature": 0.75,
100
+ # "n_ctx": 4096,
101
+ # "streaming":True,
102
+ # "top_p": 0.99,
103
+ # "verbose": True,
104
+ # "max_length": 4096
105
+ # })
106
+
107
+
108
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
109
+
110
+ chain = ConversationalRetrievalChain.from_llm(llm=llm, chain_type='stuff',
111
+ retriever=vector_store.as_retriever(search_kwargs={"k": 2}),
112
+ memory=memory)
113
+ return chain
114
+
115
+ def main():
116
+ # Initialize session state
117
+ initialize_session_state()
118
+ st.title("Medbot :books:")
119
+ # Initialize Streamlit
120
+ st.sidebar.title("Document Processing")
121
+ uploaded_files = st.sidebar.file_uploader("Upload files", accept_multiple_files=True)
122
+
123
+ if uploaded_files:
124
+ text = []
125
+ for file in uploaded_files:
126
+ file_extension = os.path.splitext(file.name)[1]
127
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
128
+ temp_file.write(file.read())
129
+ temp_file_path = temp_file.name
130
+ # Initialize cache store
131
+ cache_store = LocalFileStore("./cache/")
132
+ loader = None
133
+ if file_extension == ".pdf":
134
+ loader = PyPDFLoader(temp_file_path)
135
+
136
+ if loader:
137
+ text.extend(loader.load())
138
+ os.remove(temp_file_path)
139
+
140
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
141
+ text_chunks = text_splitter.split_documents(text)
142
+
143
+ # Create embeddings
144
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
145
+ model_kwargs={'device': 'cpu'})
146
+
147
+ # Create cache-backed embeddings
148
+ cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_store, namespace="embeddings")
149
+
150
+ # Cache the embeddings
151
+ #cache_store.save("embeddings", cached_embeddings)
152
+
153
+ # Create vector store
154
+ vector_store = FAISS.from_documents(text_chunks, embedding=cached_embeddings)
155
+
156
+
157
+ # Create the chain object
158
+ chain = create_conversational_chain(vector_store)
159
+
160
+ display_chat_history(chain)
161
+
162
+ if __name__ == "__main__":
163
+ main()
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ torch
3
+ accelerate
4
+ sentence_transformers
5
+ streamlit_chat
6
+ streamlit
7
+ faiss-cpu
8
+ tiktoken
9
+ huggingface-hub
10
+ pypdf
11
+ llama-cpp-python
12
+
13
+