shallou commited on
Commit
e6aa251
Β·
verified Β·
1 Parent(s): 427863b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -52
app.py CHANGED
@@ -1,4 +1,3 @@
1
- !pip install langchain-community # Install the missing module
2
  import streamlit as st
3
  import logging
4
  import os
@@ -6,6 +5,8 @@ import tempfile
6
  import shutil
7
  import pdfplumber
8
  import ollama
 
 
9
 
10
  from langchain_community.document_loaders import UnstructuredPDFLoader
11
  from langchain_community.embeddings import OllamaEmbeddings
@@ -35,18 +36,29 @@ logging.basicConfig(
35
 
36
  logger = logging.getLogger(__name__)
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  @st.cache_resource(show_spinner=True)
40
- def extract_model_names(
41
- models_info: Dict[str, List[Dict[str, Any]]],
42
- ) -> Tuple[str, ...]:
43
  """Extract model names from the provided models information."""
44
  logger.info("Extracting model names from models_info")
45
  model_names = tuple(model["name"] for model in models_info["models"])
46
  logger.info(f"Extracted model names: {model_names}")
47
  return model_names
48
 
49
-
50
  def create_vector_db(file_upload) -> Chroma:
51
  """Create a vector database from an uploaded PDF file."""
52
  logger.info(f"Creating vector DB from file upload: {file_upload.name}")
@@ -73,7 +85,6 @@ def create_vector_db(file_upload) -> Chroma:
73
  logger.info(f"Temporary directory {temp_dir} removed")
74
  return vector_db
75
 
76
-
77
  def process_question(question: str, vector_db: Chroma, selected_model: str) -> str:
78
  """Process a user question using the vector database and selected language model."""
79
  logger.info(f"Processing question: {question} using model: {selected_model}")
@@ -113,7 +124,6 @@ def process_question(question: str, vector_db: Chroma, selected_model: str) -> s
113
  logger.info("Question processed and response generated")
114
  return response
115
 
116
-
117
  @st.cache_data
118
  def extract_all_pages_as_images(file_upload) -> List[Any]:
119
  """Extract all pages from a PDF file as images."""
@@ -124,7 +134,6 @@ def extract_all_pages_as_images(file_upload) -> List[Any]:
124
  logger.info("PDF pages extracted as images")
125
  return pdf_pages
126
 
127
-
128
  def delete_vector_db(vector_db: Optional[Chroma]) -> None:
129
  """Delete the vector database and clear related session state."""
130
  logger.info("Deleting vector DB")
@@ -140,13 +149,16 @@ def delete_vector_db(vector_db: Optional[Chroma]) -> None:
140
  st.error("No vector database found to delete.")
141
  logger.warning("Attempted to delete vector DB, but none was found")
142
 
143
-
144
  def main() -> None:
145
  """Main function to run the Streamlit application."""
146
  st.subheader("🧠 Ollama PDF RAG playground", divider="gray", anchor=False)
147
 
148
- models_info = ollama.list()
149
- available_models = extract_model_names(models_info)
 
 
 
 
150
 
151
  col1, col2 = st.columns([1.5, 2])
152
 
@@ -154,44 +166,4 @@ def main() -> None:
154
  st.session_state["messages"] = []
155
 
156
  if "vector_db" not in st.session_state:
157
- st.session_state["vector_db"] = None
158
-
159
- if available_models:
160
- selected_model = col2.selectbox(
161
- "Pick a model available locally on your system ↓", available_models
162
- )
163
-
164
- file_upload = col1.file_uploader(
165
- "Upload a PDF file ↓", type="pdf", accept_multiple_files=False
166
- )
167
-
168
- if file_upload:
169
- st.session_state["file_upload"] = file_upload
170
- if st.session_state["vector_db"] is None:
171
- st.session_state["vector_db"] = create_vector_db(file_upload)
172
- pdf_pages = extract_all_pages_as_images(file_upload)
173
- st.session_state["pdf_pages"] = pdf_pages
174
-
175
- zoom_level = col1.slider(
176
- "Zoom Level", min_value=100, max_value=1000, value=700, step=50
177
- )
178
-
179
- with col1:
180
- with st.container(height=410, border=True):
181
- for page_image in pdf_pages:
182
- st.image(page_image, width=zoom_level)
183
-
184
- delete_collection = col1.button("⚠️ Delete collection", type="secondary")
185
-
186
- if delete_collection:
187
- delete_vector_db(st.session_state["vector_db"])
188
-
189
- with col2:
190
- message_container = st.container(height=500, border=True)
191
-
192
- for message in st.session_state["messages"]:
193
- avatar = "πŸ€–" if message["role"] == "assistant" else "😎"
194
- with message_container.chat_message(message["role"], avatar=avatar):
195
- st.markdown(message["content"])
196
-
197
-
 
 
1
  import streamlit as st
2
  import logging
3
  import os
 
5
  import shutil
6
  import pdfplumber
7
  import ollama
8
+ import time
9
+ import httpx
10
 
11
  from langchain_community.document_loaders import UnstructuredPDFLoader
12
  from langchain_community.embeddings import OllamaEmbeddings
 
36
 
37
  logger = logging.getLogger(__name__)
38
 
39
+ def ollama_list_with_retry(retries=3, delay=5):
40
+ """Attempt to list models from Ollama with retry logic."""
41
+ for attempt in range(retries):
42
+ try:
43
+ response = ollama.list()
44
+ logger.info("Successfully retrieved model list from Ollama")
45
+ return response
46
+ except httpx.ConnectError as e:
47
+ logger.error(f"Connection error: {e}. Attempt {attempt + 1} of {retries}")
48
+ if attempt < retries - 1:
49
+ time.sleep(delay)
50
+ else:
51
+ logger.error("All retry attempts failed. Cannot connect to Ollama service.")
52
+ raise
53
 
54
  @st.cache_resource(show_spinner=True)
55
+ def extract_model_names(models_info: Dict[str, List[Dict[str, Any]]]) -> Tuple[str, ...]:
 
 
56
  """Extract model names from the provided models information."""
57
  logger.info("Extracting model names from models_info")
58
  model_names = tuple(model["name"] for model in models_info["models"])
59
  logger.info(f"Extracted model names: {model_names}")
60
  return model_names
61
 
 
62
  def create_vector_db(file_upload) -> Chroma:
63
  """Create a vector database from an uploaded PDF file."""
64
  logger.info(f"Creating vector DB from file upload: {file_upload.name}")
 
85
  logger.info(f"Temporary directory {temp_dir} removed")
86
  return vector_db
87
 
 
88
  def process_question(question: str, vector_db: Chroma, selected_model: str) -> str:
89
  """Process a user question using the vector database and selected language model."""
90
  logger.info(f"Processing question: {question} using model: {selected_model}")
 
124
  logger.info("Question processed and response generated")
125
  return response
126
 
 
127
  @st.cache_data
128
  def extract_all_pages_as_images(file_upload) -> List[Any]:
129
  """Extract all pages from a PDF file as images."""
 
134
  logger.info("PDF pages extracted as images")
135
  return pdf_pages
136
 
 
137
  def delete_vector_db(vector_db: Optional[Chroma]) -> None:
138
  """Delete the vector database and clear related session state."""
139
  logger.info("Deleting vector DB")
 
149
  st.error("No vector database found to delete.")
150
  logger.warning("Attempted to delete vector DB, but none was found")
151
 
 
152
  def main() -> None:
153
  """Main function to run the Streamlit application."""
154
  st.subheader("🧠 Ollama PDF RAG playground", divider="gray", anchor=False)
155
 
156
+ try:
157
+ models_info = ollama_list_with_retry()
158
+ available_models = extract_model_names(models_info)
159
+ except httpx.ConnectError:
160
+ st.error("Could not connect to the Ollama service. Please check your setup and try again.")
161
+ return
162
 
163
  col1, col2 = st.columns([1.5, 2])
164
 
 
166
  st.session_state["messages"] = []
167
 
168
  if "vector_db" not in st.session_state:
169
+ st.session_state["vector_db"]