Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
!pip install langchain-community # Install the missing module
|
2 |
import streamlit as st
|
3 |
import logging
|
4 |
import os
|
@@ -6,6 +5,8 @@ import tempfile
|
|
6 |
import shutil
|
7 |
import pdfplumber
|
8 |
import ollama
|
|
|
|
|
9 |
|
10 |
from langchain_community.document_loaders import UnstructuredPDFLoader
|
11 |
from langchain_community.embeddings import OllamaEmbeddings
|
@@ -35,18 +36,29 @@ logging.basicConfig(
|
|
35 |
|
36 |
logger = logging.getLogger(__name__)
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
@st.cache_resource(show_spinner=True)
|
40 |
-
def extract_model_names(
|
41 |
-
models_info: Dict[str, List[Dict[str, Any]]],
|
42 |
-
) -> Tuple[str, ...]:
|
43 |
"""Extract model names from the provided models information."""
|
44 |
logger.info("Extracting model names from models_info")
|
45 |
model_names = tuple(model["name"] for model in models_info["models"])
|
46 |
logger.info(f"Extracted model names: {model_names}")
|
47 |
return model_names
|
48 |
|
49 |
-
|
50 |
def create_vector_db(file_upload) -> Chroma:
|
51 |
"""Create a vector database from an uploaded PDF file."""
|
52 |
logger.info(f"Creating vector DB from file upload: {file_upload.name}")
|
@@ -73,7 +85,6 @@ def create_vector_db(file_upload) -> Chroma:
|
|
73 |
logger.info(f"Temporary directory {temp_dir} removed")
|
74 |
return vector_db
|
75 |
|
76 |
-
|
77 |
def process_question(question: str, vector_db: Chroma, selected_model: str) -> str:
|
78 |
"""Process a user question using the vector database and selected language model."""
|
79 |
logger.info(f"Processing question: {question} using model: {selected_model}")
|
@@ -113,7 +124,6 @@ def process_question(question: str, vector_db: Chroma, selected_model: str) -> s
|
|
113 |
logger.info("Question processed and response generated")
|
114 |
return response
|
115 |
|
116 |
-
|
117 |
@st.cache_data
|
118 |
def extract_all_pages_as_images(file_upload) -> List[Any]:
|
119 |
"""Extract all pages from a PDF file as images."""
|
@@ -124,7 +134,6 @@ def extract_all_pages_as_images(file_upload) -> List[Any]:
|
|
124 |
logger.info("PDF pages extracted as images")
|
125 |
return pdf_pages
|
126 |
|
127 |
-
|
128 |
def delete_vector_db(vector_db: Optional[Chroma]) -> None:
|
129 |
"""Delete the vector database and clear related session state."""
|
130 |
logger.info("Deleting vector DB")
|
@@ -140,13 +149,16 @@ def delete_vector_db(vector_db: Optional[Chroma]) -> None:
|
|
140 |
st.error("No vector database found to delete.")
|
141 |
logger.warning("Attempted to delete vector DB, but none was found")
|
142 |
|
143 |
-
|
144 |
def main() -> None:
|
145 |
"""Main function to run the Streamlit application."""
|
146 |
st.subheader("π§ Ollama PDF RAG playground", divider="gray", anchor=False)
|
147 |
|
148 |
-
|
149 |
-
|
|
|
|
|
|
|
|
|
150 |
|
151 |
col1, col2 = st.columns([1.5, 2])
|
152 |
|
@@ -154,44 +166,4 @@ def main() -> None:
|
|
154 |
st.session_state["messages"] = []
|
155 |
|
156 |
if "vector_db" not in st.session_state:
|
157 |
-
st.session_state["vector_db"]
|
158 |
-
|
159 |
-
if available_models:
|
160 |
-
selected_model = col2.selectbox(
|
161 |
-
"Pick a model available locally on your system β", available_models
|
162 |
-
)
|
163 |
-
|
164 |
-
file_upload = col1.file_uploader(
|
165 |
-
"Upload a PDF file β", type="pdf", accept_multiple_files=False
|
166 |
-
)
|
167 |
-
|
168 |
-
if file_upload:
|
169 |
-
st.session_state["file_upload"] = file_upload
|
170 |
-
if st.session_state["vector_db"] is None:
|
171 |
-
st.session_state["vector_db"] = create_vector_db(file_upload)
|
172 |
-
pdf_pages = extract_all_pages_as_images(file_upload)
|
173 |
-
st.session_state["pdf_pages"] = pdf_pages
|
174 |
-
|
175 |
-
zoom_level = col1.slider(
|
176 |
-
"Zoom Level", min_value=100, max_value=1000, value=700, step=50
|
177 |
-
)
|
178 |
-
|
179 |
-
with col1:
|
180 |
-
with st.container(height=410, border=True):
|
181 |
-
for page_image in pdf_pages:
|
182 |
-
st.image(page_image, width=zoom_level)
|
183 |
-
|
184 |
-
delete_collection = col1.button("β οΈ Delete collection", type="secondary")
|
185 |
-
|
186 |
-
if delete_collection:
|
187 |
-
delete_vector_db(st.session_state["vector_db"])
|
188 |
-
|
189 |
-
with col2:
|
190 |
-
message_container = st.container(height=500, border=True)
|
191 |
-
|
192 |
-
for message in st.session_state["messages"]:
|
193 |
-
avatar = "π€" if message["role"] == "assistant" else "π"
|
194 |
-
with message_container.chat_message(message["role"], avatar=avatar):
|
195 |
-
st.markdown(message["content"])
|
196 |
-
|
197 |
-
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import logging
|
3 |
import os
|
|
|
5 |
import shutil
|
6 |
import pdfplumber
|
7 |
import ollama
|
8 |
+
import time
|
9 |
+
import httpx
|
10 |
|
11 |
from langchain_community.document_loaders import UnstructuredPDFLoader
|
12 |
from langchain_community.embeddings import OllamaEmbeddings
|
|
|
36 |
|
37 |
logger = logging.getLogger(__name__)
|
38 |
|
39 |
+
def ollama_list_with_retry(retries=3, delay=5):
|
40 |
+
"""Attempt to list models from Ollama with retry logic."""
|
41 |
+
for attempt in range(retries):
|
42 |
+
try:
|
43 |
+
response = ollama.list()
|
44 |
+
logger.info("Successfully retrieved model list from Ollama")
|
45 |
+
return response
|
46 |
+
except httpx.ConnectError as e:
|
47 |
+
logger.error(f"Connection error: {e}. Attempt {attempt + 1} of {retries}")
|
48 |
+
if attempt < retries - 1:
|
49 |
+
time.sleep(delay)
|
50 |
+
else:
|
51 |
+
logger.error("All retry attempts failed. Cannot connect to Ollama service.")
|
52 |
+
raise
|
53 |
|
54 |
@st.cache_resource(show_spinner=True)
|
55 |
+
def extract_model_names(models_info: Dict[str, List[Dict[str, Any]]]) -> Tuple[str, ...]:
|
|
|
|
|
56 |
"""Extract model names from the provided models information."""
|
57 |
logger.info("Extracting model names from models_info")
|
58 |
model_names = tuple(model["name"] for model in models_info["models"])
|
59 |
logger.info(f"Extracted model names: {model_names}")
|
60 |
return model_names
|
61 |
|
|
|
62 |
def create_vector_db(file_upload) -> Chroma:
|
63 |
"""Create a vector database from an uploaded PDF file."""
|
64 |
logger.info(f"Creating vector DB from file upload: {file_upload.name}")
|
|
|
85 |
logger.info(f"Temporary directory {temp_dir} removed")
|
86 |
return vector_db
|
87 |
|
|
|
88 |
def process_question(question: str, vector_db: Chroma, selected_model: str) -> str:
|
89 |
"""Process a user question using the vector database and selected language model."""
|
90 |
logger.info(f"Processing question: {question} using model: {selected_model}")
|
|
|
124 |
logger.info("Question processed and response generated")
|
125 |
return response
|
126 |
|
|
|
127 |
@st.cache_data
|
128 |
def extract_all_pages_as_images(file_upload) -> List[Any]:
|
129 |
"""Extract all pages from a PDF file as images."""
|
|
|
134 |
logger.info("PDF pages extracted as images")
|
135 |
return pdf_pages
|
136 |
|
|
|
137 |
def delete_vector_db(vector_db: Optional[Chroma]) -> None:
|
138 |
"""Delete the vector database and clear related session state."""
|
139 |
logger.info("Deleting vector DB")
|
|
|
149 |
st.error("No vector database found to delete.")
|
150 |
logger.warning("Attempted to delete vector DB, but none was found")
|
151 |
|
|
|
152 |
def main() -> None:
|
153 |
"""Main function to run the Streamlit application."""
|
154 |
st.subheader("π§ Ollama PDF RAG playground", divider="gray", anchor=False)
|
155 |
|
156 |
+
try:
|
157 |
+
models_info = ollama_list_with_retry()
|
158 |
+
available_models = extract_model_names(models_info)
|
159 |
+
except httpx.ConnectError:
|
160 |
+
st.error("Could not connect to the Ollama service. Please check your setup and try again.")
|
161 |
+
return
|
162 |
|
163 |
col1, col2 = st.columns([1.5, 2])
|
164 |
|
|
|
166 |
st.session_state["messages"] = []
|
167 |
|
168 |
if "vector_db" not in st.session_state:
|
169 |
+
st.session_state["vector_db"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|