Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -35,7 +35,7 @@ uploaded_file = st.sidebar.file_uploader("Upload a medical text book (image)", t
|
|
35 |
# Initialize the parser
|
36 |
parser = LlamaParse(
|
37 |
result_type="markdown",
|
38 |
-
parsing_instruction="You are given medical
|
39 |
use_vendor_multimodal_model=True,
|
40 |
vendor_multimodal_model_name="gpt-4o-mini-2024-07-18",
|
41 |
show_progress=True,
|
@@ -46,14 +46,8 @@ parser = LlamaParse(
|
|
46 |
language="en"
|
47 |
)
|
48 |
|
49 |
-
#
|
50 |
-
|
51 |
-
mime_type, _ = guess_type(image_path)
|
52 |
-
if mime_type is None:
|
53 |
-
mime_type = 'image/png'
|
54 |
-
with open(image_path, "rb") as image_file:
|
55 |
-
base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')
|
56 |
-
return f"data:{mime_type};base64,{base64_encoded_data}"
|
57 |
|
58 |
# Upload and process file
|
59 |
if uploaded_file:
|
@@ -70,6 +64,15 @@ if uploaded_file:
|
|
70 |
st.write("File successfully processed!")
|
71 |
st.write(f"Processed file: {uploaded_file.name}")
|
72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
# Function to get sorted image files
|
74 |
def get_page_number(file_name):
|
75 |
match = re.search(r"-page-(\d+)\.jpg$", str(file_name))
|
@@ -97,8 +100,11 @@ def get_text_nodes(md_json_objs, image_dir) -> t.List[TextNode]:
|
|
97 |
nodes.append(node)
|
98 |
return nodes
|
99 |
|
100 |
-
# Load text nodes
|
101 |
-
|
|
|
|
|
|
|
102 |
|
103 |
# Setup index and LLM
|
104 |
embed_model = OpenAIEmbedding(model="text-embedding-3-large")
|
@@ -168,4 +174,4 @@ query_engine = MultimodalQueryEngine(QA_PROMPT, retriever, gpt_4o_mm)
|
|
168 |
if query_text:
|
169 |
st.write("Querying...")
|
170 |
response = query_engine.custom_query(query_text)
|
171 |
-
st.markdown(response.response)
|
|
|
35 |
# Initialize the parser
|
36 |
parser = LlamaParse(
|
37 |
result_type="markdown",
|
38 |
+
parsing_instruction="You are given a medical textbook on medicine",
|
39 |
use_vendor_multimodal_model=True,
|
40 |
vendor_multimodal_model_name="gpt-4o-mini-2024-07-18",
|
41 |
show_progress=True,
|
|
|
46 |
language="en"
|
47 |
)
|
48 |
|
49 |
+
# Initialize md_json_objs as an empty list
|
50 |
+
md_json_objs = []
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
# Upload and process file
|
53 |
if uploaded_file:
|
|
|
64 |
st.write("File successfully processed!")
|
65 |
st.write(f"Processed file: {uploaded_file.name}")
|
66 |
|
67 |
+
# Function to encode image to data URL
|
68 |
+
def local_image_to_data_url(image_path):
|
69 |
+
mime_type, _ = guess_type(image_path)
|
70 |
+
if mime_type is None:
|
71 |
+
mime_type = 'image/png'
|
72 |
+
with open(image_path, "rb") as image_file:
|
73 |
+
base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')
|
74 |
+
return f"data:{mime_type};base64,{base64_encoded_data}"
|
75 |
+
|
76 |
# Function to get sorted image files
|
77 |
def get_page_number(file_name):
|
78 |
match = re.search(r"-page-(\d+)\.jpg$", str(file_name))
|
|
|
100 |
nodes.append(node)
|
101 |
return nodes
|
102 |
|
103 |
+
# Load text nodes if md_json_objs is not empty
|
104 |
+
if md_json_objs:
|
105 |
+
text_nodes = get_text_nodes(md_json_objs, "data_images")
|
106 |
+
else:
|
107 |
+
text_nodes = []
|
108 |
|
109 |
# Setup index and LLM
|
110 |
embed_model = OpenAIEmbedding(model="text-embedding-3-large")
|
|
|
174 |
if query_text:
|
175 |
st.write("Querying...")
|
176 |
response = query_engine.custom_query(query_text)
|
177 |
+
st.markdown(response.response)
|