Spaces:
Running
Running
phyloforfun
commited on
Commit
·
dbaeac5
1
Parent(s):
67f7ed6
Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing
Browse files- app.py +48 -5
- vouchervision/OCR_google_cloud_vision.py +3 -0
- vouchervision/utils_hf.py +32 -1
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import streamlit as st
|
2 |
-
import yaml, os, json, random, time, re, torch, random, warnings, shutil, sys
|
3 |
import seaborn as sns
|
4 |
import plotly.graph_objs as go
|
5 |
from PIL import Image
|
@@ -14,7 +14,7 @@ from vouchervision.vouchervision_main import voucher_vision
|
|
14 |
from vouchervision.general_utils import test_GPU, get_cfg_from_full_path, summarize_expense_report, validate_dir
|
15 |
from vouchervision.model_maps import ModelMaps
|
16 |
from vouchervision.API_validation import APIvalidation
|
17 |
-
from vouchervision.utils_hf import setup_streamlit_config, save_uploaded_file, save_uploaded_local
|
18 |
from vouchervision.data_project import convert_pdf_to_jpg
|
19 |
from vouchervision.utils_LLM import check_system_gpus
|
20 |
|
@@ -42,7 +42,7 @@ if 'config' not in st.session_state:
|
|
42 |
st.session_state.config, st.session_state.dir_home = build_VV_config(loaded_cfg=None)
|
43 |
setup_streamlit_config(st.session_state.dir_home)
|
44 |
|
45 |
-
|
46 |
|
47 |
########################################################################################################
|
48 |
### Global constants ####
|
@@ -273,7 +273,7 @@ def content_input_images(col_left, col_right):
|
|
273 |
if st.session_state.is_hf:
|
274 |
if uploaded_files:
|
275 |
# Clear input image gallery and input list
|
276 |
-
|
277 |
|
278 |
for uploaded_file in uploaded_files:
|
279 |
# Determine the file type
|
@@ -336,6 +336,45 @@ def content_input_images(col_left, col_right):
|
|
336 |
pass
|
337 |
# elif st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] != st.session_state.config['leafmachine']['project']['dir_images_local']):
|
338 |
elif (st.session_state['dir_images_local_TEMP'] != st.session_state.config['leafmachine']['project']['dir_images_local']):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
339 |
dir_images_local = st.session_state.config['leafmachine']['project']['dir_images_local']
|
340 |
count_n_imgs = list_jpg_files(dir_images_local)
|
341 |
st.session_state['processing_add_on'] = count_n_imgs
|
@@ -412,6 +451,10 @@ def delete_directory(dir_path):
|
|
412 |
|
413 |
|
414 |
def clear_image_gallery():
|
|
|
|
|
|
|
|
|
415 |
delete_directory(st.session_state['dir_uploaded_images'])
|
416 |
delete_directory(st.session_state['dir_uploaded_images_small'])
|
417 |
validate_dir(st.session_state['dir_uploaded_images'])
|
@@ -423,7 +466,7 @@ def use_test_image():
|
|
423 |
st.session_state.config['leafmachine']['project']['dir_images_local'] = os.path.join(st.session_state.dir_home,'demo','demo_images')
|
424 |
n_images = len([f for f in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']) if os.path.isfile(os.path.join(st.session_state.config['leafmachine']['project']['dir_images_local'], f))])
|
425 |
st.session_state['processing_add_on'] = n_images
|
426 |
-
|
427 |
st.session_state['uploader_idk'] += 1
|
428 |
for file in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']):
|
429 |
file_path = save_uploaded_file(os.path.join(st.session_state.dir_home,'demo','demo_images'), file)
|
|
|
1 |
import streamlit as st
|
2 |
+
import yaml, os, json, random, time, re, torch, random, warnings, shutil, sys, glob
|
3 |
import seaborn as sns
|
4 |
import plotly.graph_objs as go
|
5 |
from PIL import Image
|
|
|
14 |
from vouchervision.general_utils import test_GPU, get_cfg_from_full_path, summarize_expense_report, validate_dir
|
15 |
from vouchervision.model_maps import ModelMaps
|
16 |
from vouchervision.API_validation import APIvalidation
|
17 |
+
from vouchervision.utils_hf import setup_streamlit_config, save_uploaded_file, save_uploaded_local, save_uploaded_file_local
|
18 |
from vouchervision.data_project import convert_pdf_to_jpg
|
19 |
from vouchervision.utils_LLM import check_system_gpus
|
20 |
|
|
|
42 |
st.session_state.config, st.session_state.dir_home = build_VV_config(loaded_cfg=None)
|
43 |
setup_streamlit_config(st.session_state.dir_home)
|
44 |
|
45 |
+
st.session_state['is_hf'] = True
|
46 |
|
47 |
########################################################################################################
|
48 |
### Global constants ####
|
|
|
273 |
if st.session_state.is_hf:
|
274 |
if uploaded_files:
|
275 |
# Clear input image gallery and input list
|
276 |
+
clear_image_uploads()
|
277 |
|
278 |
for uploaded_file in uploaded_files:
|
279 |
# Determine the file type
|
|
|
336 |
pass
|
337 |
# elif st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] != st.session_state.config['leafmachine']['project']['dir_images_local']):
|
338 |
elif (st.session_state['dir_images_local_TEMP'] != st.session_state.config['leafmachine']['project']['dir_images_local']):
|
339 |
+
has_pdf = False
|
340 |
+
clear_image_uploads()
|
341 |
+
|
342 |
+
for input_file in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']):
|
343 |
+
if input_file.split('.')[1].lower() in ['jpg','jpeg']:
|
344 |
+
pass
|
345 |
+
elif input_file.split('.')[1].lower() in ['pdf',]:
|
346 |
+
has_pdf = True
|
347 |
+
# Handle PDF files
|
348 |
+
file_path = save_uploaded_file_local(st.session_state.config['leafmachine']['project']['dir_images_local'], st.session_state['dir_uploaded_images'], input_file)
|
349 |
+
# Convert each page of the PDF to an image
|
350 |
+
n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=200)#st.session_state.config['leafmachine']['project']['dir_images_local'])
|
351 |
+
|
352 |
+
|
353 |
+
# pdf_files_pattern = os.path.join(st.session_state['dir_uploaded_images'], '*.pdf')
|
354 |
+
# for pdf_file in glob.glob(pdf_files_pattern):
|
355 |
+
# os.remove(pdf_file)
|
356 |
+
|
357 |
+
# # Update the input list for each page image
|
358 |
+
# converted_files = os.listdir(st.session_state['dir_uploaded_images'])
|
359 |
+
# for file_name in converted_files:
|
360 |
+
# if file_name.lower().endswith('.jpg'):
|
361 |
+
# jpg_file_path = os.path.join(st.session_state['dir_uploaded_images'], file_name)
|
362 |
+
# st.session_state['input_list'].append(jpg_file_path)
|
363 |
+
|
364 |
+
# # Optionally, create a thumbnail for the gallery
|
365 |
+
# img = Image.open(jpg_file_path)
|
366 |
+
# img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
|
367 |
+
# file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images'], st.session_state['dir_uploaded_images_small'], file_name, img)
|
368 |
+
# st.session_state['input_list_small'].append(file_path_small)
|
369 |
+
|
370 |
+
# st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images']
|
371 |
+
|
372 |
+
else:
|
373 |
+
pass
|
374 |
+
# st.warning("Inputs must be '.PDF' or '.jpg' or '.jpeg'")
|
375 |
+
if has_pdf:
|
376 |
+
st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images']
|
377 |
+
|
378 |
dir_images_local = st.session_state.config['leafmachine']['project']['dir_images_local']
|
379 |
count_n_imgs = list_jpg_files(dir_images_local)
|
380 |
st.session_state['processing_add_on'] = count_n_imgs
|
|
|
451 |
|
452 |
|
453 |
def clear_image_gallery():
|
454 |
+
delete_directory(st.session_state['dir_uploaded_images_small'])
|
455 |
+
validate_dir(st.session_state['dir_uploaded_images_small'])
|
456 |
+
|
457 |
+
def clear_image_uploads():
|
458 |
delete_directory(st.session_state['dir_uploaded_images'])
|
459 |
delete_directory(st.session_state['dir_uploaded_images_small'])
|
460 |
validate_dir(st.session_state['dir_uploaded_images'])
|
|
|
466 |
st.session_state.config['leafmachine']['project']['dir_images_local'] = os.path.join(st.session_state.dir_home,'demo','demo_images')
|
467 |
n_images = len([f for f in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']) if os.path.isfile(os.path.join(st.session_state.config['leafmachine']['project']['dir_images_local'], f))])
|
468 |
st.session_state['processing_add_on'] = n_images
|
469 |
+
clear_image_uploads()
|
470 |
st.session_state['uploader_idk'] += 1
|
471 |
for file in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']):
|
472 |
file_path = save_uploaded_file(os.path.join(st.session_state.dir_home,'demo','demo_images'), file)
|
vouchervision/OCR_google_cloud_vision.py
CHANGED
@@ -144,6 +144,9 @@ class OCREngine:
|
|
144 |
|
145 |
def init_gemini_vision(self):
|
146 |
pass
|
|
|
|
|
|
|
147 |
|
148 |
|
149 |
def detect_text_craft(self):
|
|
|
144 |
|
145 |
def init_gemini_vision(self):
|
146 |
pass
|
147 |
+
|
148 |
+
def init_gpt4_vision(self):
|
149 |
+
pass
|
150 |
|
151 |
|
152 |
def detect_text_craft(self):
|
vouchervision/utils_hf.py
CHANGED
@@ -6,7 +6,7 @@ import base64
|
|
6 |
from PIL import Image
|
7 |
from PIL import Image
|
8 |
from io import BytesIO
|
9 |
-
from shutil import copyfileobj
|
10 |
|
11 |
# from vouchervision.general_utils import get_cfg_from_full_path
|
12 |
|
@@ -37,6 +37,37 @@ def setup_streamlit_config(dir_home):
|
|
37 |
f.write(config_content.strip())
|
38 |
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
def save_uploaded_file(directory, img_file, image=None):
|
42 |
if not os.path.exists(directory):
|
|
|
6 |
from PIL import Image
|
7 |
from PIL import Image
|
8 |
from io import BytesIO
|
9 |
+
from shutil import copyfileobj, copyfile
|
10 |
|
11 |
# from vouchervision.general_utils import get_cfg_from_full_path
|
12 |
|
|
|
37 |
f.write(config_content.strip())
|
38 |
|
39 |
|
40 |
+
def save_uploaded_file_local(directory_in, directory_out, img_file_name, image=None):
|
41 |
+
if not os.path.exists(directory_out):
|
42 |
+
os.makedirs(directory_out)
|
43 |
+
|
44 |
+
# Assuming img_file_name includes the extension
|
45 |
+
img_file_base, img_file_ext = os.path.splitext(img_file_name)
|
46 |
+
|
47 |
+
full_path_out = os.path.join(directory_out, img_file_name)
|
48 |
+
full_path_in = os.path.join(directory_in, img_file_name)
|
49 |
+
|
50 |
+
# Check if the file extension is .pdf (or add other conditions for different file types)
|
51 |
+
if img_file_ext.lower() == '.pdf':
|
52 |
+
# Copy the file from the input directory to the output directory
|
53 |
+
copyfile(full_path_in, full_path_out)
|
54 |
+
return full_path_out
|
55 |
+
else:
|
56 |
+
if image is None:
|
57 |
+
try:
|
58 |
+
with Image.open(full_path_in) as image:
|
59 |
+
image.save(full_path_out, "JPEG")
|
60 |
+
# Return the full path of the saved image
|
61 |
+
return full_path_out
|
62 |
+
except:
|
63 |
+
pass
|
64 |
+
else:
|
65 |
+
try:
|
66 |
+
image.save(full_path_out, "JPEG")
|
67 |
+
return full_path_out
|
68 |
+
except:
|
69 |
+
pass
|
70 |
+
|
71 |
|
72 |
def save_uploaded_file(directory, img_file, image=None):
|
73 |
if not os.path.exists(directory):
|