Spaces:
Running
Running
phyloforfun
commited on
Commit
·
aedd7d9
1
Parent(s):
5ada5b1
Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing
Browse files
app.py
CHANGED
@@ -94,15 +94,16 @@ def content_input_images_hf():
|
|
94 |
|
95 |
|
96 |
|
97 |
-
def create_download_button(zip_filepath):
|
98 |
-
with
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
|
|
106 |
|
107 |
|
108 |
def delete_directory(dir_path):
|
@@ -373,7 +374,7 @@ class JSONReport:
|
|
373 |
# Display the map if map_data is not empty
|
374 |
if not map_data.empty:
|
375 |
with self.json_map:
|
376 |
-
st.map(map_data, zoom=4, size='size', color='color')
|
377 |
|
378 |
def set_text(self, text_main=None, text_middle=None, text_right=None):
|
379 |
if text_main:
|
@@ -1287,7 +1288,7 @@ def build_LLM_prompt_config():
|
|
1287 |
if new_filename:
|
1288 |
if check_unique_mapping_assignments():
|
1289 |
if check_prompt_yaml_filename(new_filename):
|
1290 |
-
save_prompt_yaml(new_filename)
|
1291 |
else:
|
1292 |
st.error("File name can only contain letters, numbers, underscores, and dashes. Cannot contain spaces.")
|
1293 |
else:
|
@@ -1587,7 +1588,7 @@ def content_header():
|
|
1587 |
|
1588 |
path_custom_prompts = os.path.join(st.session_state.dir_home,'custom_prompts',st.session_state.config['leafmachine']['project']['prompt_version'])
|
1589 |
# Call the machine function.
|
1590 |
-
st.session_state['formatted_json'], st.session_state['formatted_json_WFO'], st.session_state['formatted_json_GEO'], total_cost, n_failed_OCR, n_failed_LLM_calls = voucher_vision(None,
|
1591 |
st.session_state.dir_home,
|
1592 |
path_custom_prompts,
|
1593 |
None,
|
@@ -1609,7 +1610,8 @@ def content_header():
|
|
1609 |
else:
|
1610 |
st.info(f":money_with_wings: This run cost :heavy_dollar_sign:{total_cost:.4f}")
|
1611 |
st.balloons()
|
1612 |
-
|
|
|
1613 |
else:
|
1614 |
st.button("Start Processing", type='primary', disabled=True)
|
1615 |
st.error(":heavy_exclamation_mark: Required API keys not set. Please visit the 'API Keys' tab and set the Google Vision OCR API key and at least one LLM key.")
|
@@ -1617,7 +1619,8 @@ def content_header():
|
|
1617 |
if st.session_state['formatted_json']:
|
1618 |
if st.session_state['hold_output']:
|
1619 |
st.session_state['json_report'].set_JSON(st.session_state['formatted_json'], st.session_state['formatted_json_WFO'], st.session_state['formatted_json_GEO'])
|
1620 |
-
|
|
|
1621 |
|
1622 |
|
1623 |
|
@@ -2361,7 +2364,8 @@ if 'settings_filename' not in st.session_state:
|
|
2361 |
st.session_state['settings_filename'] = None
|
2362 |
if 'loaded_settings_filename' not in st.session_state:
|
2363 |
st.session_state['loaded_settings_filename'] = None
|
2364 |
-
|
|
|
2365 |
|
2366 |
# Initialize session_state variables if they don't exist
|
2367 |
if 'prompt_info' not in st.session_state:
|
|
|
94 |
|
95 |
|
96 |
|
97 |
+
def create_download_button(zip_filepath, col):
|
98 |
+
with col:
|
99 |
+
with open(zip_filepath, 'rb') as f:
|
100 |
+
bytes_io = BytesIO(f.read())
|
101 |
+
st.download_button(
|
102 |
+
label=f"Download Results for{st.session_state['processing_add_on']}",type='primary',
|
103 |
+
data=bytes_io,
|
104 |
+
file_name=os.path.basename(zip_filepath),
|
105 |
+
mime='application/zip'
|
106 |
+
)
|
107 |
|
108 |
|
109 |
def delete_directory(dir_path):
|
|
|
374 |
# Display the map if map_data is not empty
|
375 |
if not map_data.empty:
|
376 |
with self.json_map:
|
377 |
+
st.map(map_data, zoom=4, size='size', color='color', use_container_width=True)
|
378 |
|
379 |
def set_text(self, text_main=None, text_middle=None, text_right=None):
|
380 |
if text_main:
|
|
|
1288 |
if new_filename:
|
1289 |
if check_unique_mapping_assignments():
|
1290 |
if check_prompt_yaml_filename(new_filename):
|
1291 |
+
save_prompt_yaml(new_filename, col_left_save)
|
1292 |
else:
|
1293 |
st.error("File name can only contain letters, numbers, underscores, and dashes. Cannot contain spaces.")
|
1294 |
else:
|
|
|
1588 |
|
1589 |
path_custom_prompts = os.path.join(st.session_state.dir_home,'custom_prompts',st.session_state.config['leafmachine']['project']['prompt_version'])
|
1590 |
# Call the machine function.
|
1591 |
+
st.session_state['formatted_json'], st.session_state['formatted_json_WFO'], st.session_state['formatted_json_GEO'], total_cost, n_failed_OCR, n_failed_LLM_calls, st.session_state['zip_filepath'] = voucher_vision(None,
|
1592 |
st.session_state.dir_home,
|
1593 |
path_custom_prompts,
|
1594 |
None,
|
|
|
1610 |
else:
|
1611 |
st.info(f":money_with_wings: This run cost :heavy_dollar_sign:{total_cost:.4f}")
|
1612 |
st.balloons()
|
1613 |
+
if st.session_state['zip_filepath']:
|
1614 |
+
create_download_button(st.session_state['zip_filepath'], col_run_1)
|
1615 |
else:
|
1616 |
st.button("Start Processing", type='primary', disabled=True)
|
1617 |
st.error(":heavy_exclamation_mark: Required API keys not set. Please visit the 'API Keys' tab and set the Google Vision OCR API key and at least one LLM key.")
|
|
|
1619 |
if st.session_state['formatted_json']:
|
1620 |
if st.session_state['hold_output']:
|
1621 |
st.session_state['json_report'].set_JSON(st.session_state['formatted_json'], st.session_state['formatted_json_WFO'], st.session_state['formatted_json_GEO'])
|
1622 |
+
if st.session_state['zip_filepath']:
|
1623 |
+
create_download_button(st.session_state['zip_filepath'], col_run_1)
|
1624 |
|
1625 |
|
1626 |
|
|
|
2364 |
st.session_state['settings_filename'] = None
|
2365 |
if 'loaded_settings_filename' not in st.session_state:
|
2366 |
st.session_state['loaded_settings_filename'] = None
|
2367 |
+
if 'zip_filepath' not in st.session_state:
|
2368 |
+
st.session_state['zip_filepath'] = None
|
2369 |
|
2370 |
# Initialize session_state variables if they don't exist
|
2371 |
if 'prompt_info' not in st.session_state:
|
vouchervision/API_validation.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import os, io, openai, vertexai, json
|
2 |
import google.generativeai as genai
|
3 |
from mistralai.client import MistralClient
|
4 |
from mistralai.models.chat_completion import ChatMessage
|
@@ -171,12 +171,24 @@ class APIvalidation:
|
|
171 |
except Exception as e: # Replace with a more specific exception if possible
|
172 |
return False
|
173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
def check_google_vertex_genai_api_key(self):
|
175 |
results = {"palm2": False, "gemini": False}
|
176 |
if self.cfg_private:
|
177 |
try:
|
178 |
# Assuming genai and vertexai are clients for Google services
|
179 |
-
os.environ["GOOGLE_API_KEY"] = self.cfg_private['google_palm']['google_palm_api']
|
|
|
180 |
# genai.configure(api_key=self.cfg_private['google_palm']['google_palm_api'])
|
181 |
vertexai.init(project= self.cfg_private['google_palm']['project_id'], location=self.cfg_private['google_palm']['location'])
|
182 |
|
@@ -209,7 +221,7 @@ class APIvalidation:
|
|
209 |
try:
|
210 |
# Assuming genai and vertexai are clients for Google services
|
211 |
# os.environ["GOOGLE_API_KEY"] = os.getenv('PALM_API_KEY')
|
212 |
-
genai.configure(api_key=os.getenv('
|
213 |
vertexai.init(project= os.getenv('GOOGLE_PROJECT_ID'), location=os.getenv('GOOGLE_LOCATION'))
|
214 |
|
215 |
try:
|
|
|
1 |
+
import os, io, openai, vertexai, json, tempfile
|
2 |
import google.generativeai as genai
|
3 |
from mistralai.client import MistralClient
|
4 |
from mistralai.models.chat_completion import ChatMessage
|
|
|
171 |
except Exception as e: # Replace with a more specific exception if possible
|
172 |
return False
|
173 |
|
174 |
+
def get_google_credentials(self):
|
175 |
+
# Convert JSON key from string to a dictionary
|
176 |
+
service_account_json_str = os.getenv('google_service_account_json')
|
177 |
+
|
178 |
+
with tempfile.NamedTemporaryFile(mode="w+", delete=False,suffix=".json") as temp:
|
179 |
+
temp.write(service_account_json_str)
|
180 |
+
temp_filename = temp.name
|
181 |
+
|
182 |
+
return temp_filename
|
183 |
+
|
184 |
+
|
185 |
def check_google_vertex_genai_api_key(self):
|
186 |
results = {"palm2": False, "gemini": False}
|
187 |
if self.cfg_private:
|
188 |
try:
|
189 |
# Assuming genai and vertexai are clients for Google services
|
190 |
+
# os.environ["GOOGLE_API_KEY"] = self.cfg_private['google_palm']['google_palm_api']
|
191 |
+
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.get_google_credentials()
|
192 |
# genai.configure(api_key=self.cfg_private['google_palm']['google_palm_api'])
|
193 |
vertexai.init(project= self.cfg_private['google_palm']['project_id'], location=self.cfg_private['google_palm']['location'])
|
194 |
|
|
|
221 |
try:
|
222 |
# Assuming genai and vertexai are clients for Google services
|
223 |
# os.environ["GOOGLE_API_KEY"] = os.getenv('PALM_API_KEY')
|
224 |
+
genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))
|
225 |
vertexai.init(project= os.getenv('GOOGLE_PROJECT_ID'), location=os.getenv('GOOGLE_LOCATION'))
|
226 |
|
227 |
try:
|
vouchervision/LLM_local_MistralAI.py
CHANGED
@@ -12,6 +12,7 @@ from vouchervision.utils_taxonomy_WFO import validate_taxonomy_WFO
|
|
12 |
from vouchervision.utils_geolocate_HERE import validate_coordinates_here
|
13 |
|
14 |
'''
|
|
|
15 |
https://python.langchain.com/docs/integrations/llms/huggingface_pipelines
|
16 |
'''
|
17 |
|
|
|
12 |
from vouchervision.utils_geolocate_HERE import validate_coordinates_here
|
13 |
|
14 |
'''
|
15 |
+
Local Pipielines:
|
16 |
https://python.langchain.com/docs/integrations/llms/huggingface_pipelines
|
17 |
'''
|
18 |
|
vouchervision/VoucherVision_Config_Builder.py
CHANGED
@@ -41,7 +41,7 @@ def build_VV_config(loaded_cfg=None):
|
|
41 |
|
42 |
LLM_version_user = 'Azure GPT 3.5 Instruct' #'Azure GPT 4 Turbo 1106-preview'
|
43 |
prompt_version = 'version_5.yaml' # from ["Version 1", "Version 1 No Domain Knowledge", "Version 2"]
|
44 |
-
use_LeafMachine2_collage_images =
|
45 |
do_create_OCR_helper_image = True
|
46 |
|
47 |
batch_size = 500
|
|
|
41 |
|
42 |
LLM_version_user = 'Azure GPT 3.5 Instruct' #'Azure GPT 4 Turbo 1106-preview'
|
43 |
prompt_version = 'version_5.yaml' # from ["Version 1", "Version 1 No Domain Knowledge", "Version 2"]
|
44 |
+
use_LeafMachine2_collage_images = False # Use LeafMachine2 collage images
|
45 |
do_create_OCR_helper_image = True
|
46 |
|
47 |
batch_size = 500
|
vouchervision/utils_VoucherVision.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import openai
|
2 |
-
import os, json, glob, shutil, yaml, torch, logging
|
3 |
import openpyxl
|
4 |
from openpyxl import Workbook, load_workbook
|
5 |
import vertexai
|
@@ -408,12 +408,17 @@ class VoucherVision():
|
|
408 |
else:
|
409 |
return False
|
410 |
|
411 |
-
|
|
|
|
|
412 |
|
|
|
|
|
|
|
413 |
|
|
|
414 |
|
415 |
|
416 |
-
|
417 |
def set_API_keys(self):
|
418 |
if self.is_hf:
|
419 |
openai_api_key = os.getenv('OPENAI_API_KEY')
|
@@ -425,6 +430,7 @@ class VoucherVision():
|
|
425 |
open_cage_api_key = os.getenv('open_cage_geocode')
|
426 |
google_project_id = os.getenv('GOOGLE_PROJECT_ID')
|
427 |
google_project_location = os.getenv('GOOGLE_LOCATION')
|
|
|
428 |
|
429 |
|
430 |
|
|
|
1 |
import openai
|
2 |
+
import os, json, glob, shutil, yaml, torch, logging, tempfile
|
3 |
import openpyxl
|
4 |
from openpyxl import Workbook, load_workbook
|
5 |
import vertexai
|
|
|
408 |
else:
|
409 |
return False
|
410 |
|
411 |
+
def get_google_credentials(self):
|
412 |
+
# Convert JSON key from string to a dictionary
|
413 |
+
service_account_json_str = os.getenv('google_service_account_json')
|
414 |
|
415 |
+
with tempfile.NamedTemporaryFile(mode="w+", delete=False,suffix=".json") as temp:
|
416 |
+
temp.write(service_account_json_str)
|
417 |
+
temp_filename = temp.name
|
418 |
|
419 |
+
return temp_filename
|
420 |
|
421 |
|
|
|
422 |
def set_API_keys(self):
|
423 |
if self.is_hf:
|
424 |
openai_api_key = os.getenv('OPENAI_API_KEY')
|
|
|
430 |
open_cage_api_key = os.getenv('open_cage_geocode')
|
431 |
google_project_id = os.getenv('GOOGLE_PROJECT_ID')
|
432 |
google_project_location = os.getenv('GOOGLE_LOCATION')
|
433 |
+
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.get_google_credentials()
|
434 |
|
435 |
|
436 |
|
vouchervision/vouchervision_main.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
'''
|
2 |
VoucherVision - based on LeafMachine2 Processes
|
3 |
'''
|
4 |
-
import os, inspect, sys, logging, subprocess
|
5 |
from time import perf_counter
|
6 |
currentdir = os.path.dirname(os.path.dirname(inspect.getfile(inspect.currentframe())))
|
7 |
parentdir = os.path.dirname(currentdir)
|
@@ -14,7 +14,7 @@ from data_project import Project_Info
|
|
14 |
from LM2_logger import start_logging
|
15 |
from fetch_data import fetch_data
|
16 |
from utils_VoucherVision import VoucherVision, space_saver
|
17 |
-
|
18 |
|
19 |
def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progress_report, json_report, path_api_cost=None, test_ind = None, is_hf = True, is_real_run=False):
|
20 |
t_overall = perf_counter()
|
@@ -78,7 +78,21 @@ def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progr
|
|
78 |
|
79 |
Voucher_Vision.close_logger_handlers()
|
80 |
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
def voucher_vision_OCR_test(cfg_file_path, dir_home, cfg_test, path_to_crop):
|
84 |
# get_n_overall = progress_report.get_n_overall()
|
|
|
1 |
'''
|
2 |
VoucherVision - based on LeafMachine2 Processes
|
3 |
'''
|
4 |
+
import os, inspect, sys, logging, subprocess, shutil
|
5 |
from time import perf_counter
|
6 |
currentdir = os.path.dirname(os.path.dirname(inspect.getfile(inspect.currentframe())))
|
7 |
parentdir = os.path.dirname(currentdir)
|
|
|
14 |
from LM2_logger import start_logging
|
15 |
from fetch_data import fetch_data
|
16 |
from utils_VoucherVision import VoucherVision, space_saver
|
17 |
+
from vouchervision.utils_hf import upload_to_drive
|
18 |
|
19 |
def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progress_report, json_report, path_api_cost=None, test_ind = None, is_hf = True, is_real_run=False):
|
20 |
t_overall = perf_counter()
|
|
|
78 |
|
79 |
Voucher_Vision.close_logger_handlers()
|
80 |
|
81 |
+
zip_filepath = None
|
82 |
+
if is_hf:
|
83 |
+
# Create Higging Face zip file
|
84 |
+
dir_to_zip = os.path.join(Dirs.dir_home, Dirs.run_name)
|
85 |
+
zip_filename = Dirs.run_name
|
86 |
+
|
87 |
+
# Creating a zip file
|
88 |
+
zip_filepath = make_zipfile(dir_to_zip, zip_filename)
|
89 |
+
upload_to_drive(zip_filepath, zip_filename)
|
90 |
+
|
91 |
+
return last_JSON_response, final_WFO_record, final_GEO_record, total_cost, Voucher_Vision.n_failed_OCR, Voucher_Vision.n_failed_LLM_calls, zip_filepath
|
92 |
+
|
93 |
+
def make_zipfile(source_dir, output_filename):
|
94 |
+
shutil.make_archive(output_filename, 'zip', source_dir)
|
95 |
+
return output_filename + '.zip'
|
96 |
|
97 |
def voucher_vision_OCR_test(cfg_file_path, dir_home, cfg_test, path_to_crop):
|
98 |
# get_n_overall = progress_report.get_n_overall()
|