phyloforfun commited on
Commit
aedd7d9
·
1 Parent(s): 5ada5b1

Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing

Browse files
app.py CHANGED
@@ -94,15 +94,16 @@ def content_input_images_hf():
94
 
95
 
96
 
97
- def create_download_button(zip_filepath):
98
- with open(zip_filepath, 'rb') as f:
99
- bytes_io = BytesIO(f.read())
100
- st.download_button(
101
- label=f"Download Results for{st.session_state['processing_add_on']}",type='primary',
102
- data=bytes_io,
103
- file_name=os.path.basename(zip_filepath),
104
- mime='application/zip'
105
- )
 
106
 
107
 
108
  def delete_directory(dir_path):
@@ -373,7 +374,7 @@ class JSONReport:
373
  # Display the map if map_data is not empty
374
  if not map_data.empty:
375
  with self.json_map:
376
- st.map(map_data, zoom=4, size='size', color='color')
377
 
378
  def set_text(self, text_main=None, text_middle=None, text_right=None):
379
  if text_main:
@@ -1287,7 +1288,7 @@ def build_LLM_prompt_config():
1287
  if new_filename:
1288
  if check_unique_mapping_assignments():
1289
  if check_prompt_yaml_filename(new_filename):
1290
- save_prompt_yaml(new_filename)
1291
  else:
1292
  st.error("File name can only contain letters, numbers, underscores, and dashes. Cannot contain spaces.")
1293
  else:
@@ -1587,7 +1588,7 @@ def content_header():
1587
 
1588
  path_custom_prompts = os.path.join(st.session_state.dir_home,'custom_prompts',st.session_state.config['leafmachine']['project']['prompt_version'])
1589
  # Call the machine function.
1590
- st.session_state['formatted_json'], st.session_state['formatted_json_WFO'], st.session_state['formatted_json_GEO'], total_cost, n_failed_OCR, n_failed_LLM_calls = voucher_vision(None,
1591
  st.session_state.dir_home,
1592
  path_custom_prompts,
1593
  None,
@@ -1609,7 +1610,8 @@ def content_header():
1609
  else:
1610
  st.info(f":money_with_wings: This run cost :heavy_dollar_sign:{total_cost:.4f}")
1611
  st.balloons()
1612
-
 
1613
  else:
1614
  st.button("Start Processing", type='primary', disabled=True)
1615
  st.error(":heavy_exclamation_mark: Required API keys not set. Please visit the 'API Keys' tab and set the Google Vision OCR API key and at least one LLM key.")
@@ -1617,7 +1619,8 @@ def content_header():
1617
  if st.session_state['formatted_json']:
1618
  if st.session_state['hold_output']:
1619
  st.session_state['json_report'].set_JSON(st.session_state['formatted_json'], st.session_state['formatted_json_WFO'], st.session_state['formatted_json_GEO'])
1620
-
 
1621
 
1622
 
1623
 
@@ -2361,7 +2364,8 @@ if 'settings_filename' not in st.session_state:
2361
  st.session_state['settings_filename'] = None
2362
  if 'loaded_settings_filename' not in st.session_state:
2363
  st.session_state['loaded_settings_filename'] = None
2364
-
 
2365
 
2366
  # Initialize session_state variables if they don't exist
2367
  if 'prompt_info' not in st.session_state:
 
94
 
95
 
96
 
97
+ def create_download_button(zip_filepath, col):
98
+ with col:
99
+ with open(zip_filepath, 'rb') as f:
100
+ bytes_io = BytesIO(f.read())
101
+ st.download_button(
102
+ label=f"Download Results for{st.session_state['processing_add_on']}",type='primary',
103
+ data=bytes_io,
104
+ file_name=os.path.basename(zip_filepath),
105
+ mime='application/zip'
106
+ )
107
 
108
 
109
  def delete_directory(dir_path):
 
374
  # Display the map if map_data is not empty
375
  if not map_data.empty:
376
  with self.json_map:
377
+ st.map(map_data, zoom=4, size='size', color='color', use_container_width=True)
378
 
379
  def set_text(self, text_main=None, text_middle=None, text_right=None):
380
  if text_main:
 
1288
  if new_filename:
1289
  if check_unique_mapping_assignments():
1290
  if check_prompt_yaml_filename(new_filename):
1291
+ save_prompt_yaml(new_filename, col_left_save)
1292
  else:
1293
  st.error("File name can only contain letters, numbers, underscores, and dashes. Cannot contain spaces.")
1294
  else:
 
1588
 
1589
  path_custom_prompts = os.path.join(st.session_state.dir_home,'custom_prompts',st.session_state.config['leafmachine']['project']['prompt_version'])
1590
  # Call the machine function.
1591
+ st.session_state['formatted_json'], st.session_state['formatted_json_WFO'], st.session_state['formatted_json_GEO'], total_cost, n_failed_OCR, n_failed_LLM_calls, st.session_state['zip_filepath'] = voucher_vision(None,
1592
  st.session_state.dir_home,
1593
  path_custom_prompts,
1594
  None,
 
1610
  else:
1611
  st.info(f":money_with_wings: This run cost :heavy_dollar_sign:{total_cost:.4f}")
1612
  st.balloons()
1613
+ if st.session_state['zip_filepath']:
1614
+ create_download_button(st.session_state['zip_filepath'], col_run_1)
1615
  else:
1616
  st.button("Start Processing", type='primary', disabled=True)
1617
  st.error(":heavy_exclamation_mark: Required API keys not set. Please visit the 'API Keys' tab and set the Google Vision OCR API key and at least one LLM key.")
 
1619
  if st.session_state['formatted_json']:
1620
  if st.session_state['hold_output']:
1621
  st.session_state['json_report'].set_JSON(st.session_state['formatted_json'], st.session_state['formatted_json_WFO'], st.session_state['formatted_json_GEO'])
1622
+ if st.session_state['zip_filepath']:
1623
+ create_download_button(st.session_state['zip_filepath'], col_run_1)
1624
 
1625
 
1626
 
 
2364
  st.session_state['settings_filename'] = None
2365
  if 'loaded_settings_filename' not in st.session_state:
2366
  st.session_state['loaded_settings_filename'] = None
2367
+ if 'zip_filepath' not in st.session_state:
2368
+ st.session_state['zip_filepath'] = None
2369
 
2370
  # Initialize session_state variables if they don't exist
2371
  if 'prompt_info' not in st.session_state:
vouchervision/API_validation.py CHANGED
@@ -1,4 +1,4 @@
1
- import os, io, openai, vertexai, json
2
  import google.generativeai as genai
3
  from mistralai.client import MistralClient
4
  from mistralai.models.chat_completion import ChatMessage
@@ -171,12 +171,24 @@ class APIvalidation:
171
  except Exception as e: # Replace with a more specific exception if possible
172
  return False
173
 
 
 
 
 
 
 
 
 
 
 
 
174
  def check_google_vertex_genai_api_key(self):
175
  results = {"palm2": False, "gemini": False}
176
  if self.cfg_private:
177
  try:
178
  # Assuming genai and vertexai are clients for Google services
179
- os.environ["GOOGLE_API_KEY"] = self.cfg_private['google_palm']['google_palm_api']
 
180
  # genai.configure(api_key=self.cfg_private['google_palm']['google_palm_api'])
181
  vertexai.init(project= self.cfg_private['google_palm']['project_id'], location=self.cfg_private['google_palm']['location'])
182
 
@@ -209,7 +221,7 @@ class APIvalidation:
209
  try:
210
  # Assuming genai and vertexai are clients for Google services
211
  # os.environ["GOOGLE_API_KEY"] = os.getenv('PALM_API_KEY')
212
- genai.configure(api_key=os.getenv('PALM_API_KEY'))
213
  vertexai.init(project= os.getenv('GOOGLE_PROJECT_ID'), location=os.getenv('GOOGLE_LOCATION'))
214
 
215
  try:
 
1
+ import os, io, openai, vertexai, json, tempfile
2
  import google.generativeai as genai
3
  from mistralai.client import MistralClient
4
  from mistralai.models.chat_completion import ChatMessage
 
171
  except Exception as e: # Replace with a more specific exception if possible
172
  return False
173
 
174
+ def get_google_credentials(self):
175
+ # Convert JSON key from string to a dictionary
176
+ service_account_json_str = os.getenv('google_service_account_json')
177
+
178
+ with tempfile.NamedTemporaryFile(mode="w+", delete=False,suffix=".json") as temp:
179
+ temp.write(service_account_json_str)
180
+ temp_filename = temp.name
181
+
182
+ return temp_filename
183
+
184
+
185
  def check_google_vertex_genai_api_key(self):
186
  results = {"palm2": False, "gemini": False}
187
  if self.cfg_private:
188
  try:
189
  # Assuming genai and vertexai are clients for Google services
190
+ # os.environ["GOOGLE_API_KEY"] = self.cfg_private['google_palm']['google_palm_api']
191
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.get_google_credentials()
192
  # genai.configure(api_key=self.cfg_private['google_palm']['google_palm_api'])
193
  vertexai.init(project= self.cfg_private['google_palm']['project_id'], location=self.cfg_private['google_palm']['location'])
194
 
 
221
  try:
222
  # Assuming genai and vertexai are clients for Google services
223
  # os.environ["GOOGLE_API_KEY"] = os.getenv('PALM_API_KEY')
224
+ genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))
225
  vertexai.init(project= os.getenv('GOOGLE_PROJECT_ID'), location=os.getenv('GOOGLE_LOCATION'))
226
 
227
  try:
vouchervision/LLM_local_MistralAI.py CHANGED
@@ -12,6 +12,7 @@ from vouchervision.utils_taxonomy_WFO import validate_taxonomy_WFO
12
  from vouchervision.utils_geolocate_HERE import validate_coordinates_here
13
 
14
  '''
 
15
  https://python.langchain.com/docs/integrations/llms/huggingface_pipelines
16
  '''
17
 
 
12
  from vouchervision.utils_geolocate_HERE import validate_coordinates_here
13
 
14
  '''
15
+ Local Pipielines:
16
  https://python.langchain.com/docs/integrations/llms/huggingface_pipelines
17
  '''
18
 
vouchervision/VoucherVision_Config_Builder.py CHANGED
@@ -41,7 +41,7 @@ def build_VV_config(loaded_cfg=None):
41
 
42
  LLM_version_user = 'Azure GPT 3.5 Instruct' #'Azure GPT 4 Turbo 1106-preview'
43
  prompt_version = 'version_5.yaml' # from ["Version 1", "Version 1 No Domain Knowledge", "Version 2"]
44
- use_LeafMachine2_collage_images = True # Use LeafMachine2 collage images
45
  do_create_OCR_helper_image = True
46
 
47
  batch_size = 500
 
41
 
42
  LLM_version_user = 'Azure GPT 3.5 Instruct' #'Azure GPT 4 Turbo 1106-preview'
43
  prompt_version = 'version_5.yaml' # from ["Version 1", "Version 1 No Domain Knowledge", "Version 2"]
44
+ use_LeafMachine2_collage_images = False # Use LeafMachine2 collage images
45
  do_create_OCR_helper_image = True
46
 
47
  batch_size = 500
vouchervision/utils_VoucherVision.py CHANGED
@@ -1,5 +1,5 @@
1
  import openai
2
- import os, json, glob, shutil, yaml, torch, logging
3
  import openpyxl
4
  from openpyxl import Workbook, load_workbook
5
  import vertexai
@@ -408,12 +408,17 @@ class VoucherVision():
408
  else:
409
  return False
410
 
411
-
 
 
412
 
 
 
 
413
 
 
414
 
415
 
416
-
417
  def set_API_keys(self):
418
  if self.is_hf:
419
  openai_api_key = os.getenv('OPENAI_API_KEY')
@@ -425,6 +430,7 @@ class VoucherVision():
425
  open_cage_api_key = os.getenv('open_cage_geocode')
426
  google_project_id = os.getenv('GOOGLE_PROJECT_ID')
427
  google_project_location = os.getenv('GOOGLE_LOCATION')
 
428
 
429
 
430
 
 
1
  import openai
2
+ import os, json, glob, shutil, yaml, torch, logging, tempfile
3
  import openpyxl
4
  from openpyxl import Workbook, load_workbook
5
  import vertexai
 
408
  else:
409
  return False
410
 
411
+ def get_google_credentials(self):
412
+ # Convert JSON key from string to a dictionary
413
+ service_account_json_str = os.getenv('google_service_account_json')
414
 
415
+ with tempfile.NamedTemporaryFile(mode="w+", delete=False,suffix=".json") as temp:
416
+ temp.write(service_account_json_str)
417
+ temp_filename = temp.name
418
 
419
+ return temp_filename
420
 
421
 
 
422
  def set_API_keys(self):
423
  if self.is_hf:
424
  openai_api_key = os.getenv('OPENAI_API_KEY')
 
430
  open_cage_api_key = os.getenv('open_cage_geocode')
431
  google_project_id = os.getenv('GOOGLE_PROJECT_ID')
432
  google_project_location = os.getenv('GOOGLE_LOCATION')
433
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.get_google_credentials()
434
 
435
 
436
 
vouchervision/vouchervision_main.py CHANGED
@@ -1,7 +1,7 @@
1
  '''
2
  VoucherVision - based on LeafMachine2 Processes
3
  '''
4
- import os, inspect, sys, logging, subprocess
5
  from time import perf_counter
6
  currentdir = os.path.dirname(os.path.dirname(inspect.getfile(inspect.currentframe())))
7
  parentdir = os.path.dirname(currentdir)
@@ -14,7 +14,7 @@ from data_project import Project_Info
14
  from LM2_logger import start_logging
15
  from fetch_data import fetch_data
16
  from utils_VoucherVision import VoucherVision, space_saver
17
-
18
 
19
  def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progress_report, json_report, path_api_cost=None, test_ind = None, is_hf = True, is_real_run=False):
20
  t_overall = perf_counter()
@@ -78,7 +78,21 @@ def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progr
78
 
79
  Voucher_Vision.close_logger_handlers()
80
 
81
- return last_JSON_response, final_WFO_record, final_GEO_record, total_cost, Voucher_Vision.n_failed_OCR, Voucher_Vision.n_failed_LLM_calls
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  def voucher_vision_OCR_test(cfg_file_path, dir_home, cfg_test, path_to_crop):
84
  # get_n_overall = progress_report.get_n_overall()
 
1
  '''
2
  VoucherVision - based on LeafMachine2 Processes
3
  '''
4
+ import os, inspect, sys, logging, subprocess, shutil
5
  from time import perf_counter
6
  currentdir = os.path.dirname(os.path.dirname(inspect.getfile(inspect.currentframe())))
7
  parentdir = os.path.dirname(currentdir)
 
14
  from LM2_logger import start_logging
15
  from fetch_data import fetch_data
16
  from utils_VoucherVision import VoucherVision, space_saver
17
+ from vouchervision.utils_hf import upload_to_drive
18
 
19
  def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progress_report, json_report, path_api_cost=None, test_ind = None, is_hf = True, is_real_run=False):
20
  t_overall = perf_counter()
 
78
 
79
  Voucher_Vision.close_logger_handlers()
80
 
81
+ zip_filepath = None
82
+ if is_hf:
83
+ # Create Higging Face zip file
84
+ dir_to_zip = os.path.join(Dirs.dir_home, Dirs.run_name)
85
+ zip_filename = Dirs.run_name
86
+
87
+ # Creating a zip file
88
+ zip_filepath = make_zipfile(dir_to_zip, zip_filename)
89
+ upload_to_drive(zip_filepath, zip_filename)
90
+
91
+ return last_JSON_response, final_WFO_record, final_GEO_record, total_cost, Voucher_Vision.n_failed_OCR, Voucher_Vision.n_failed_LLM_calls, zip_filepath
92
+
93
+ def make_zipfile(source_dir, output_filename):
94
+ shutil.make_archive(output_filename, 'zip', source_dir)
95
+ return output_filename + '.zip'
96
 
97
  def voucher_vision_OCR_test(cfg_file_path, dir_home, cfg_test, path_to_crop):
98
  # get_n_overall = progress_report.get_n_overall()