phyloforfun commited on
Commit
7a93196
·
1 Parent(s): 4d5e173

Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing

Browse files
app.py CHANGED
@@ -407,11 +407,14 @@ class JSONReport:
407
  return json.dumps(json_obj, indent=4, sort_keys=False)
408
 
409
 
 
410
  def does_private_file_exist():
411
  dir_home = os.path.dirname(os.path.dirname(__file__))
412
  path_cfg_private = os.path.join(dir_home, 'PRIVATE_DATA.yaml')
413
  return os.path.exists(path_cfg_private)
414
 
 
 
415
  def setup_streamlit_config(dir_home):
416
  # Define the directory path and filename
417
  dir_path = os.path.join(dir_home, ".streamlit")
@@ -436,6 +439,8 @@ def setup_streamlit_config(dir_home):
436
  with open(file_path, "w") as f:
437
  f.write(config_content.strip())
438
 
 
 
439
  def display_scrollable_results(JSON_results, test_results, OPT2, OPT3):
440
  """
441
  Display the results from JSON_results in a scrollable container.
@@ -479,6 +484,7 @@ def display_scrollable_results(JSON_results, test_results, OPT2, OPT3):
479
  st.markdown(results_html, unsafe_allow_html=True)
480
 
481
 
 
482
  def display_test_results(test_results, JSON_results, llm_version):
483
  if llm_version == 'gpt':
484
  OPT1, OPT2, OPT3 = TestOptionsGPT.get_options()
@@ -543,9 +549,13 @@ def display_test_results(test_results, JSON_results, llm_version):
543
  # proportional_rain("🥇", success_count, "💔", failure_count, font_size=72, falling_speed=5, animation_length="infinite")
544
  rain_emojis(test_results)
545
 
 
 
546
  def add_emoji_delay():
547
  time.sleep(0.3)
548
 
 
 
549
  def rain_emojis(test_results):
550
  # test_results = {
551
  # 'test1': True, # Test passed
@@ -585,17 +595,23 @@ def rain_emojis(test_results):
585
  )
586
  add_emoji_delay()
587
 
 
 
588
  def format_json(json_obj):
589
  try:
590
  return json.dumps(json.loads(json_obj), indent=4, sort_keys=False)
591
  except:
592
  return json.dumps(json_obj, indent=4, sort_keys=False)
593
 
 
 
594
  def get_prompt_versions(LLM_version):
595
  yaml_files = [f for f in os.listdir(os.path.join(st.session_state.dir_home, 'custom_prompts')) if f.endswith('.yaml')]
596
 
597
  return yaml_files
598
 
 
 
599
  def get_private_file():
600
  dir_home = os.path.dirname(os.path.dirname(__file__))
601
  path_cfg_private = os.path.join(dir_home, 'PRIVATE_DATA.yaml')
@@ -603,17 +619,7 @@ def get_private_file():
603
 
604
 
605
 
606
- # def create_private_file():
607
- # st.session_state.proceed_to_main = False
608
-
609
- # if st.session_state.private_file:
610
- # cfg_private = get_private_file()
611
- # create_private_file_0(cfg_private)
612
- # else:
613
- # st.title("VoucherVision")
614
- # create_private_file_0()
615
-
616
- def create_private_file():
617
  st.session_state.proceed_to_main = False
618
  st.title("VoucherVision")
619
  col_private,_= st.columns([12,2])
@@ -823,7 +829,7 @@ def test_API(api, message_loc, cfg_private,openai_api_key,azure_openai_api_versi
823
 
824
 
825
  def save_changes_to_API_keys(cfg_private,openai_api_key,azure_openai_api_version,azure_openai_api_key,
826
- azure_openai_api_base,azure_openai_organization,azure_openai_api_type,google_vision,google_palm):
827
  # Update the configuration dictionary with the new values
828
  cfg_private['openai']['OPENAI_API_KEY'] = openai_api_key
829
 
@@ -833,9 +839,9 @@ def save_changes_to_API_keys(cfg_private,openai_api_key,azure_openai_api_version
833
  cfg_private['openai_azure']['openai_organization'] = azure_openai_organization
834
  cfg_private['openai_azure']['openai_api_type'] = azure_openai_api_type
835
 
836
- cfg_private['google_cloud']['path_json_file'] = google_vision
837
 
838
- cfg_private['google_palm']['google_palm_api'] = google_palm
839
  # Call the function to write the updated configuration to the YAML file
840
  write_config_file(cfg_private, st.session_state.dir_home, filename="PRIVATE_DATA.yaml")
841
  st.session_state.private_file = does_private_file_exist()
@@ -1885,12 +1891,7 @@ def content_api_check():
1885
 
1886
  def content_collage_overlay():
1887
  st.write("---")
1888
- st.header('LeafMachine2 Label Collage')
1889
- col_cropped_1, col_cropped_2 = st.columns([4,4])
1890
-
1891
- st.write("---")
1892
- st.header('OCR Overlay Image')
1893
- col_ocr_1, col_ocr_2 = st.columns([4,4])
1894
 
1895
  demo_text_h = f"Google_OCR_Handwriting:\nHERBARIUM OF MARCUS W. LYON , JR . Tracaulon sagittatum Indiana : Porter Co. incal Springs edge wet subdunal woods 1927 TX 11 Ilowers pink UNIVERSITE HERBARIUM MICH University of Michigan Herbarium 1439649 copyright reserved PERSICARIA FEB 2 6 1965 cm "
1896
  demo_text_tr = f"trOCR:\nherbarium of marcus w. lyon jr. : : : tracaulon sagittatum indiana porter co. incal springs TX 11 Ilowers pink 1439649 copyright reserved D H U Q "
@@ -1900,7 +1901,8 @@ def content_collage_overlay():
1900
  demo_text_trh = demo_text_h + '\n' + demo_text_tr
1901
  demo_text_trp = demo_text_p + '\n' + demo_text_tr
1902
 
1903
- with col_cropped_1:
 
1904
  default_crops = st.session_state.config['leafmachine']['cropped_components']['save_cropped_annotations']
1905
  st.write("Prior to transcription, use LeafMachine2 to crop all labels from input images to create label collages for each specimen image. (Requires GPU)")
1906
  st.session_state.config['leafmachine']['use_RGB_label_images'] = st.checkbox("Use LeafMachine2 label collage for transcriptions", st.session_state.config['leafmachine'].get('use_RGB_label_images', False))
@@ -1910,7 +1912,6 @@ def content_collage_overlay():
1910
  options=['ruler', 'barcode','label', 'colorcard','map','envelope','photo','attached_item','weights',
1911
  'leaf_whole', 'leaf_partial', 'leaflet', 'seed_fruit_one', 'seed_fruit_many', 'flower_one', 'flower_many', 'bud','specimen','roots','wood'],default=default_crops)
1912
  st.session_state.config['leafmachine']['cropped_components']['save_cropped_annotations'] = option_selected_crops
1913
- with col_cropped_2:
1914
  # Load the image only if it's not already in the session state
1915
  if "demo_collage" not in st.session_state:
1916
  # ba = os.path.join(st.session_state.dir_home, 'demo', 'ba', 'ba2.png')
@@ -1918,12 +1919,14 @@ def content_collage_overlay():
1918
  st.session_state["demo_collage"] = Image.open(ba)
1919
 
1920
  # Display the image
1921
- st.image(st.session_state["demo_collage"], caption='LeafMachine2 Collage', output_format="PNG", width=500)
1922
- # st.image(st.session_state["demo_collage"], caption='LeafMachine2 Collage', output_format="JPEG")
 
1923
 
1924
 
1925
 
1926
- with col_ocr_1:
 
1927
  options = [":rainbow[Printed + Handwritten]", "Printed", "Use both models"]
1928
  captions = [
1929
  "Works well for both printed and handwritten text",
@@ -1933,7 +1936,7 @@ def content_collage_overlay():
1933
 
1934
  st.write('This will plot bounding boxes around all text that Google Vision was able to detect. If there are no boxes around text, then the OCR failed, so that missing text will not be seen by the LLM when it is creating the JSON object. The created image will be viewable in the VoucherVisionEditor.')
1935
 
1936
- do_create_OCR_helper_image = st.checkbox("Create image showing an overlay of the OCR detections",value=st.session_state.config['leafmachine']['do_create_OCR_helper_image'])
1937
  st.session_state.config['leafmachine']['do_create_OCR_helper_image'] = do_create_OCR_helper_image
1938
 
1939
 
@@ -1985,14 +1988,15 @@ def content_collage_overlay():
1985
  elif (OCR_option == 'hand') and do_use_trOCR:
1986
  st.text_area(label='Handwritten/Printed + trOCR',placeholder=demo_text_trh,disabled=True, label_visibility='visible', height=150)
1987
 
1988
- with col_ocr_2:
1989
  if "demo_overlay" not in st.session_state:
1990
  # ocr = os.path.join(st.session_state.dir_home,'demo', 'ba','ocr.png')
1991
- ocr = os.path.join(st.session_state.dir_home,'demo', 'ba','ocr2_low.png')
1992
  st.session_state["demo_overlay"] = Image.open(ocr)
1993
 
1994
- st.image(st.session_state["demo_overlay"], caption='OCR Overlay Images', output_format = "PNG", width=500)
1995
- # st.image(st.session_state["demo_overlay"], caption='OCR Overlay Images', output_format = "JPEG")
 
1996
 
1997
 
1998
 
@@ -2299,7 +2303,7 @@ st.set_page_config(layout="wide", page_icon='img/icon.ico', page_title='VoucherV
2299
 
2300
  # Parse the 'is_hf' argument and set it in session state
2301
  if 'is_hf' not in st.session_state:
2302
- st.session_state['is_hf'] = True
2303
 
2304
 
2305
  #################################################################################################################################################
 
407
  return json.dumps(json_obj, indent=4, sort_keys=False)
408
 
409
 
410
+
411
  def does_private_file_exist():
412
  dir_home = os.path.dirname(os.path.dirname(__file__))
413
  path_cfg_private = os.path.join(dir_home, 'PRIVATE_DATA.yaml')
414
  return os.path.exists(path_cfg_private)
415
 
416
+
417
+
418
  def setup_streamlit_config(dir_home):
419
  # Define the directory path and filename
420
  dir_path = os.path.join(dir_home, ".streamlit")
 
439
  with open(file_path, "w") as f:
440
  f.write(config_content.strip())
441
 
442
+
443
+
444
  def display_scrollable_results(JSON_results, test_results, OPT2, OPT3):
445
  """
446
  Display the results from JSON_results in a scrollable container.
 
484
  st.markdown(results_html, unsafe_allow_html=True)
485
 
486
 
487
+
488
  def display_test_results(test_results, JSON_results, llm_version):
489
  if llm_version == 'gpt':
490
  OPT1, OPT2, OPT3 = TestOptionsGPT.get_options()
 
549
  # proportional_rain("🥇", success_count, "💔", failure_count, font_size=72, falling_speed=5, animation_length="infinite")
550
  rain_emojis(test_results)
551
 
552
+
553
+
554
  def add_emoji_delay():
555
  time.sleep(0.3)
556
 
557
+
558
+
559
  def rain_emojis(test_results):
560
  # test_results = {
561
  # 'test1': True, # Test passed
 
595
  )
596
  add_emoji_delay()
597
 
598
+
599
+
600
  def format_json(json_obj):
601
  try:
602
  return json.dumps(json.loads(json_obj), indent=4, sort_keys=False)
603
  except:
604
  return json.dumps(json_obj, indent=4, sort_keys=False)
605
 
606
+
607
+
608
  def get_prompt_versions(LLM_version):
609
  yaml_files = [f for f in os.listdir(os.path.join(st.session_state.dir_home, 'custom_prompts')) if f.endswith('.yaml')]
610
 
611
  return yaml_files
612
 
613
+
614
+
615
  def get_private_file():
616
  dir_home = os.path.dirname(os.path.dirname(__file__))
617
  path_cfg_private = os.path.join(dir_home, 'PRIVATE_DATA.yaml')
 
619
 
620
 
621
 
622
+ def create_private_file(): #################################################################################### UPDATE
 
 
 
 
 
 
 
 
 
 
623
  st.session_state.proceed_to_main = False
624
  st.title("VoucherVision")
625
  col_private,_= st.columns([12,2])
 
829
 
830
 
831
  def save_changes_to_API_keys(cfg_private,openai_api_key,azure_openai_api_version,azure_openai_api_key,
832
+ azure_openai_api_base,azure_openai_organization,azure_openai_api_type,google_vision,google_palm): ######################################## UPDATE
833
  # Update the configuration dictionary with the new values
834
  cfg_private['openai']['OPENAI_API_KEY'] = openai_api_key
835
 
 
839
  cfg_private['openai_azure']['openai_organization'] = azure_openai_organization
840
  cfg_private['openai_azure']['openai_api_type'] = azure_openai_api_type
841
 
842
+ cfg_private['google']['GOOGLE_APPLICATION_CREDENTIALS'] = google_vision
843
 
844
+ cfg_private['google']['GOOGLE_PALM_API'] = google_palm
845
  # Call the function to write the updated configuration to the YAML file
846
  write_config_file(cfg_private, st.session_state.dir_home, filename="PRIVATE_DATA.yaml")
847
  st.session_state.private_file = does_private_file_exist()
 
1891
 
1892
  def content_collage_overlay():
1893
  st.write("---")
1894
+ col_collage, col_overlay = st.columns([4,4])
 
 
 
 
 
1895
 
1896
  demo_text_h = f"Google_OCR_Handwriting:\nHERBARIUM OF MARCUS W. LYON , JR . Tracaulon sagittatum Indiana : Porter Co. incal Springs edge wet subdunal woods 1927 TX 11 Ilowers pink UNIVERSITE HERBARIUM MICH University of Michigan Herbarium 1439649 copyright reserved PERSICARIA FEB 2 6 1965 cm "
1897
  demo_text_tr = f"trOCR:\nherbarium of marcus w. lyon jr. : : : tracaulon sagittatum indiana porter co. incal springs TX 11 Ilowers pink 1439649 copyright reserved D H U Q "
 
1901
  demo_text_trh = demo_text_h + '\n' + demo_text_tr
1902
  demo_text_trp = demo_text_p + '\n' + demo_text_tr
1903
 
1904
+ with col_collage:
1905
+ st.header('LeafMachine2 Label Collage')
1906
  default_crops = st.session_state.config['leafmachine']['cropped_components']['save_cropped_annotations']
1907
  st.write("Prior to transcription, use LeafMachine2 to crop all labels from input images to create label collages for each specimen image. (Requires GPU)")
1908
  st.session_state.config['leafmachine']['use_RGB_label_images'] = st.checkbox("Use LeafMachine2 label collage for transcriptions", st.session_state.config['leafmachine'].get('use_RGB_label_images', False))
 
1912
  options=['ruler', 'barcode','label', 'colorcard','map','envelope','photo','attached_item','weights',
1913
  'leaf_whole', 'leaf_partial', 'leaflet', 'seed_fruit_one', 'seed_fruit_many', 'flower_one', 'flower_many', 'bud','specimen','roots','wood'],default=default_crops)
1914
  st.session_state.config['leafmachine']['cropped_components']['save_cropped_annotations'] = option_selected_crops
 
1915
  # Load the image only if it's not already in the session state
1916
  if "demo_collage" not in st.session_state:
1917
  # ba = os.path.join(st.session_state.dir_home, 'demo', 'ba', 'ba2.png')
 
1919
  st.session_state["demo_collage"] = Image.open(ba)
1920
 
1921
  # Display the image
1922
+ with st.expander(":frame_with_picture: View an example of the LeafMachine2 collage image"):
1923
+ st.image(st.session_state["demo_collage"], caption='LeafMachine2 Collage', output_format="PNG")
1924
+ # st.image(st.session_state["demo_collage"], caption='LeafMachine2 Collage', output_format="JPEG")
1925
 
1926
 
1927
 
1928
+ with col_overlay:
1929
+ st.header('OCR Overlay Image')
1930
  options = [":rainbow[Printed + Handwritten]", "Printed", "Use both models"]
1931
  captions = [
1932
  "Works well for both printed and handwritten text",
 
1936
 
1937
  st.write('This will plot bounding boxes around all text that Google Vision was able to detect. If there are no boxes around text, then the OCR failed, so that missing text will not be seen by the LLM when it is creating the JSON object. The created image will be viewable in the VoucherVisionEditor.')
1938
 
1939
+ do_create_OCR_helper_image = st.checkbox("Create image showing an overlay of the OCR detections",value=st.session_state.config['leafmachine']['do_create_OCR_helper_image'],disabled=True)
1940
  st.session_state.config['leafmachine']['do_create_OCR_helper_image'] = do_create_OCR_helper_image
1941
 
1942
 
 
1988
  elif (OCR_option == 'hand') and do_use_trOCR:
1989
  st.text_area(label='Handwritten/Printed + trOCR',placeholder=demo_text_trh,disabled=True, label_visibility='visible', height=150)
1990
 
1991
+
1992
  if "demo_overlay" not in st.session_state:
1993
  # ocr = os.path.join(st.session_state.dir_home,'demo', 'ba','ocr.png')
1994
+ ocr = os.path.join(st.session_state.dir_home,'demo', 'ba','ocr2.png')
1995
  st.session_state["demo_overlay"] = Image.open(ocr)
1996
 
1997
+ with st.expander(":frame_with_picture: View an example of the OCR overlay image"):
1998
+ st.image(st.session_state["demo_overlay"], caption='OCR Overlay Images', output_format = "PNG")
1999
+ # st.image(st.session_state["demo_overlay"], caption='OCR Overlay Images', output_format = "JPEG")
2000
 
2001
 
2002
 
 
2303
 
2304
  # Parse the 'is_hf' argument and set it in session state
2305
  if 'is_hf' not in st.session_state:
2306
+ st.session_state['is_hf'] = False
2307
 
2308
 
2309
  #################################################################################################################################################
vouchervision/API_validation.py CHANGED
@@ -56,10 +56,10 @@ class APIvalidation:
56
  # Initialize the Azure OpenAI client
57
  model = AzureChatOpenAI(
58
  deployment_name = 'gpt-35-turbo',#'gpt-35-turbo',
59
- openai_api_version = self.cfg_private['openai_azure']['api_version'],
60
- openai_api_key = self.cfg_private['openai_azure']['openai_api_key'],
61
- azure_endpoint = self.cfg_private['openai_azure']['openai_api_base'],
62
- openai_organization = self.cfg_private['openai_azure']['openai_organization'],
63
  )
64
  msg = HumanMessage(content="hello")
65
  # self.llm_object.temperature = self.config.get('temperature')
@@ -103,7 +103,7 @@ class APIvalidation:
103
  def check_mistral_api_key(self):
104
  try:
105
  if not self.is_hf:
106
- client = MistralClient(api_key=self.cfg_private['mistral']['mistral_key'])
107
  else:
108
  client = MistralClient(api_key=os.getenv('MISTRAL_API_KEY'))
109
 
@@ -184,7 +184,8 @@ class APIvalidation:
184
  print(f"palm2 fail [{test_response_palm}]")
185
 
186
  except Exception as e:
187
- print(f"palm2 fail2 [{test_response_palm}]")
 
188
 
189
 
190
  try:
@@ -198,7 +199,8 @@ class APIvalidation:
198
  print(f"gemini fail [{test_response_gemini}]")
199
 
200
  except Exception as e:
201
- print(f"palm2 fail2 [{test_response_gemini}]")
 
202
 
203
  return results
204
 
@@ -210,12 +212,12 @@ class APIvalidation:
210
  credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
211
  return credentials
212
  else:
213
- with open(self.cfg_private['google_cloud']['GOOGLE_APPLICATION_CREDENTIALS'], 'r') as file:
214
  data = json.load(file)
215
- creds_json_str = json.dumps(data)
216
- credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
217
- os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = creds_json_str
218
- return credentials
219
 
220
 
221
 
@@ -223,28 +225,30 @@ class APIvalidation:
223
  missing_keys = []
224
  present_keys = []
225
 
226
- if not self.is_hf:
227
- k_OPENAI_API_KEY = self.cfg_private['openai']['OPENAI_API_KEY']
228
- k_openai_azure = self.cfg_private['openai_azure']['api_version']
229
- # k_google_palm_api = self.cfg_private['google_palm']['google_palm_api']
230
-
231
- k_project_id = self.cfg_private['google_palm']['project_id']
232
- k_location = self.cfg_private['google_palm']['location']
233
- k_google_application_credentials = self.cfg_private['google_cloud']['GOOGLE_APPLICATION_CREDENTIALS']
234
-
235
- k_mistral = self.cfg_private['mistral']['mistral_key']
236
- k_here = self.cfg_private['here']['api_key']
237
- k_opencage = self.cfg_private['open_cage_geocode']['api_key']
238
- else:
239
  k_OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
240
  k_openai_azure = os.getenv('AZURE_API_VERSION')
241
- # k_google_palm_api = os.getenv('PALM_API_KEY')
242
 
 
243
  k_project_id = os.getenv('GOOGLE_PROJECT_ID')
244
  k_location = os.getenv('GOOGLE_LOCATION')
 
245
  k_mistral = os.getenv('MISTRAL_API_KEY')
246
  k_here = os.getenv('HERE_API_KEY')
247
- k_opencage = os.getenv('open_cage_geocode')
 
 
 
 
 
 
 
 
 
 
 
 
 
248
 
249
 
250
  # Check each key and add to the respective list
@@ -285,7 +289,7 @@ class APIvalidation:
285
 
286
  # Google PALM2/Gemini key check
287
  if self.has_API_key(k_google_application_credentials) and self.has_API_key(k_project_id) and self.has_API_key(k_location): ##################
288
- vertexai.init(project=os.getenv('GOOGLE_PROJECT_ID'), location=os.getenv('GOOGLE_LOCATION'), credentials=self.get_google_credentials())
289
  google_results = self.check_google_vertex_genai_api_key()
290
  if google_results['palm2']:
291
  present_keys.append('Palm2 (Valid)')
 
56
  # Initialize the Azure OpenAI client
57
  model = AzureChatOpenAI(
58
  deployment_name = 'gpt-35-turbo',#'gpt-35-turbo',
59
+ openai_api_version = self.cfg_private['openai_azure']['OPENAI_API_VERSION'],
60
+ openai_api_key = self.cfg_private['openai_azure']['OPENAI_API_KEY'],
61
+ azure_endpoint = self.cfg_private['openai_azure']['OPENAI_API_BASE'],
62
+ openai_organization = self.cfg_private['openai_azure']['OPENAI_ORGANIZATION'],
63
  )
64
  msg = HumanMessage(content="hello")
65
  # self.llm_object.temperature = self.config.get('temperature')
 
103
  def check_mistral_api_key(self):
104
  try:
105
  if not self.is_hf:
106
+ client = MistralClient(api_key=self.cfg_private['mistral']['MISTRAL_API_KEY'])
107
  else:
108
  client = MistralClient(api_key=os.getenv('MISTRAL_API_KEY'))
109
 
 
184
  print(f"palm2 fail [{test_response_palm}]")
185
 
186
  except Exception as e:
187
+ # print(f"palm2 fail2 [{e}]")
188
+ print(f"palm2 fail2")
189
 
190
 
191
  try:
 
199
  print(f"gemini fail [{test_response_gemini}]")
200
 
201
  except Exception as e:
202
+ # print(f"palm2 fail2 [{e}]")
203
+ print(f"palm2 fail2")
204
 
205
  return results
206
 
 
212
  credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
213
  return credentials
214
  else:
215
+ with open(self.cfg_private['google']['GOOGLE_APPLICATION_CREDENTIALS'], 'r') as file:
216
  data = json.load(file)
217
+ creds_json_str = json.dumps(data)
218
+ credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
219
+ os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = creds_json_str
220
+ return credentials
221
 
222
 
223
 
 
225
  missing_keys = []
226
  present_keys = []
227
 
228
+ if self.is_hf:
 
 
 
 
 
 
 
 
 
 
 
 
229
  k_OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
230
  k_openai_azure = os.getenv('AZURE_API_VERSION')
 
231
 
232
+ k_google_application_credentials = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
233
  k_project_id = os.getenv('GOOGLE_PROJECT_ID')
234
  k_location = os.getenv('GOOGLE_LOCATION')
235
+
236
  k_mistral = os.getenv('MISTRAL_API_KEY')
237
  k_here = os.getenv('HERE_API_KEY')
238
+ k_opencage = os.getenv('OPENCAGE_API_KEY')
239
+ else:
240
+ k_OPENAI_API_KEY = self.cfg_private['openai']['OPENAI_API_KEY']
241
+ k_openai_azure = self.cfg_private['openai_azure']['OPENAI_API_VERSION']
242
+
243
+ k_project_id = self.cfg_private['google']['GOOGLE_PROJECT_ID']
244
+ k_location = self.cfg_private['google']['GOOGLE_LOCATION']
245
+ k_google_application_credentials = self.cfg_private['google']['GOOGLE_APPLICATION_CREDENTIALS']
246
+
247
+ k_mistral = self.cfg_private['mistral']['MISTRAL_API_KEY']
248
+ k_here = self.cfg_private['here']['API_KEY']
249
+ k_opencage = self.cfg_private['open_cage_geocode']['API_KEY']
250
+
251
+
252
 
253
 
254
  # Check each key and add to the respective list
 
289
 
290
  # Google PALM2/Gemini key check
291
  if self.has_API_key(k_google_application_credentials) and self.has_API_key(k_project_id) and self.has_API_key(k_location): ##################
292
+ vertexai.init(project=k_project_id, location=k_location, credentials=self.get_google_credentials())
293
  google_results = self.check_google_vertex_genai_api_key()
294
  if google_results['palm2']:
295
  present_keys.append('Palm2 (Valid)')
vouchervision/VoucherVision_Config_Builder.py CHANGED
@@ -594,13 +594,14 @@ def check_if_usable(is_hf): ####################################################
594
 
595
  has_key_openai = has_API_key(cfg_private['openai']['OPENAI_API_KEY'])
596
 
597
- has_key_azure_openai = has_API_key(cfg_private['openai_azure']['api_version'])
598
-
599
- has_key_palm2 = has_API_key(cfg_private['google_palm']['google_palm_api'])
600
 
601
- has_key_google_OCR = has_API_key(cfg_private['google_cloud']['path_json_file'])
 
 
602
 
603
- if has_key_google_OCR and (has_key_azure_openai or has_key_openai or has_key_palm2):
 
604
  return True
605
  else:
606
  return False
@@ -614,13 +615,11 @@ def check_API_key(dir_home, api_version):
614
 
615
  has_key_azure_openai = has_API_key(cfg_private['openai_azure']['api_version'])
616
 
617
- has_key_palm2 = has_API_key(cfg_private['google_palm']['google_palm_api'])
618
 
619
- has_key_google_OCR = has_API_key(cfg_private['google_cloud']['path_json_file'])
620
 
621
- if api_version == 'palm' and has_key_palm2:
622
- return True
623
- elif api_version in ['gpt','openai'] and has_key_openai:
624
  return True
625
  elif api_version in ['gpt-azure', 'azure_openai'] and has_key_azure_openai:
626
  return True
 
594
 
595
  has_key_openai = has_API_key(cfg_private['openai']['OPENAI_API_KEY'])
596
 
597
+ has_key_azure_openai = has_API_key(cfg_private['openai_azure']['OPENAI_API_VERSION'])
 
 
598
 
599
+ has_key_google_OCR = has_API_key(cfg_private['google']['GOOGLE_APPLICATION_CREDENTIALS'])
600
+
601
+ has_key_MISTRAL = has_API_key(cfg_private['mistral']['MISTRAL_API_KEY'])
602
 
603
+
604
+ if has_key_google_OCR and (has_key_azure_openai or has_key_openai or has_key_MISTRAL):
605
  return True
606
  else:
607
  return False
 
615
 
616
  has_key_azure_openai = has_API_key(cfg_private['openai_azure']['api_version'])
617
 
618
+ # has_key_palm2 = has_API_key(cfg_private['google_palm']['google_palm_api'])
619
 
620
+ has_key_google_OCR = has_API_key(cfg_private['google']['GOOGLE_APPLICATION_CREDENTIALS'])
621
 
622
+ if api_version in ['gpt','openai'] and has_key_openai:
 
 
623
  return True
624
  elif api_version in ['gpt-azure', 'azure_openai'] and has_key_azure_openai:
625
  return True
vouchervision/utils_VoucherVision.py CHANGED
@@ -417,7 +417,7 @@ class VoucherVision():
417
  credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
418
  return credentials
419
  else:
420
- with open(self.cfg_private['google_cloud']['GOOGLE_APPLICATION_CREDENTIALS'], 'r') as file:
421
  data = json.load(file)
422
  creds_json_str = json.dumps(data)
423
  credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
@@ -432,15 +432,15 @@ class VoucherVision():
432
  self.cfg_private = get_cfg_from_full_path(self.path_cfg_private)
433
 
434
  k_openai = self.cfg_private['openai']['OPENAI_API_KEY']
435
- k_openai_azure = self.cfg_private['openai_azure']['api_version']
436
 
437
- k_google_project_id = self.cfg_private['google_palm']['project_id']
438
- k_google_location = self.cfg_private['google_palm']['location']
439
- k_google_application_credentials = self.cfg_private['google_cloud']['GOOGLE_APPLICATION_CREDENTIALS']
440
 
441
- k_mistral = self.cfg_private['mistral']['mistral_key']
442
- k_here = self.cfg_private['here']['api_key']
443
- k_opencage = self.cfg_private['open_cage_geocode']['api_key']
444
  else:
445
  self.dir_home = os.path.dirname(os.path.dirname(__file__))
446
  self.path_cfg_private = None
@@ -471,10 +471,11 @@ class VoucherVision():
471
 
472
 
473
  ### Google - OCR, Palm2, Gemini
474
- if self.is_hf:
475
- if self.has_key_google_application_credentials and self.has_key_google_project_id and self.has_key_google_location:
476
  vertexai.init(project=os.getenv('GOOGLE_PROJECT_ID'), location=os.getenv('GOOGLE_LOCATION'), credentials=self.get_google_credentials())
477
-
 
478
 
479
  ### OpenAI
480
  if self.has_key_openai:
@@ -502,10 +503,10 @@ class VoucherVision():
502
  # Initialize the Azure OpenAI client
503
  self.llm = AzureChatOpenAI(
504
  deployment_name = 'gpt-35-turbo',#'gpt-35-turbo',
505
- openai_api_version = self.cfg_private['openai_azure']['api_version'],
506
- openai_api_key = self.cfg_private['openai_azure']['openai_api_key'],
507
- azure_endpoint = self.cfg_private['openai_azure']['openai_api_base'],
508
- openai_organization = self.cfg_private['openai_azure']['openai_organization'],
509
  )
510
  self.has_key_azure_openai = True
511
 
@@ -515,7 +516,7 @@ class VoucherVision():
515
  if self.is_hf:
516
  pass # Already set
517
  else:
518
- os.environ['MISTRAL_API_KEY'] = self.cfg_private['mistral']['mistral_key']
519
 
520
 
521
  ### HERE
@@ -523,8 +524,8 @@ class VoucherVision():
523
  if self.is_hf:
524
  pass # Already set
525
  else:
526
- os.environ['HERE_APP_ID'] = self.cfg_private['here']['app_id']
527
- os.environ['HERE_API_KEY'] = self.cfg_private['here']['api_key']
528
 
529
 
530
  ### HERE
@@ -532,7 +533,7 @@ class VoucherVision():
532
  if self.is_hf:
533
  pass # Already set
534
  else:
535
- os.environ['OPENCAGE_API_KEY'] = self.cfg_private['open_cage_geocode']['api_key']
536
 
537
 
538
 
 
417
  credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
418
  return credentials
419
  else:
420
+ with open(self.cfg_private['google']['GOOGLE_APPLICATION_CREDENTIALS'], 'r') as file:
421
  data = json.load(file)
422
  creds_json_str = json.dumps(data)
423
  credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
 
432
  self.cfg_private = get_cfg_from_full_path(self.path_cfg_private)
433
 
434
  k_openai = self.cfg_private['openai']['OPENAI_API_KEY']
435
+ k_openai_azure = self.cfg_private['openai_azure']['OPENAI_API_VERSION']
436
 
437
+ k_google_project_id = self.cfg_private['google']['GOOGLE_PROJECT_ID']
438
+ k_google_location = self.cfg_private['google']['GOOGLE_LOCATION']
439
+ k_google_application_credentials = self.cfg_private['google']['GOOGLE_APPLICATION_CREDENTIALS']
440
 
441
+ k_mistral = self.cfg_private['mistral']['MISTRAL_API_KEY']
442
+ k_here = self.cfg_private['here']['API_KEY']
443
+ k_opencage = self.cfg_private['open_cage_geocode']['API_KEY']
444
  else:
445
  self.dir_home = os.path.dirname(os.path.dirname(__file__))
446
  self.path_cfg_private = None
 
471
 
472
 
473
  ### Google - OCR, Palm2, Gemini
474
+ if self.has_key_google_application_credentials and self.has_key_google_project_id and self.has_key_google_location:
475
+ if self.is_hf:
476
  vertexai.init(project=os.getenv('GOOGLE_PROJECT_ID'), location=os.getenv('GOOGLE_LOCATION'), credentials=self.get_google_credentials())
477
+ else:
478
+ vertexai.init(project=k_google_project_id, location=k_google_location, credentials=self.get_google_credentials())
479
 
480
  ### OpenAI
481
  if self.has_key_openai:
 
503
  # Initialize the Azure OpenAI client
504
  self.llm = AzureChatOpenAI(
505
  deployment_name = 'gpt-35-turbo',#'gpt-35-turbo',
506
+ openai_api_version = self.cfg_private['openai_azure']['OPENAI_API_VERSION'],
507
+ openai_api_key = self.cfg_private['openai_azure']['OPENAI_API_KEY'],
508
+ azure_endpoint = self.cfg_private['openai_azure']['OPENAI_API_BASE'],
509
+ openai_organization = self.cfg_private['openai_azure']['OPENAI_ORGANIZATION'],
510
  )
511
  self.has_key_azure_openai = True
512
 
 
516
  if self.is_hf:
517
  pass # Already set
518
  else:
519
+ os.environ['MISTRAL_API_KEY'] = self.cfg_private['mistral']['MISTRAL_API_KEY']
520
 
521
 
522
  ### HERE
 
524
  if self.is_hf:
525
  pass # Already set
526
  else:
527
+ os.environ['HERE_APP_ID'] = self.cfg_private['here']['APP_ID']
528
+ os.environ['HERE_API_KEY'] = self.cfg_private['here']['API_KEY']
529
 
530
 
531
  ### HERE
 
533
  if self.is_hf:
534
  pass # Already set
535
  else:
536
+ os.environ['OPENCAGE_API_KEY'] = self.cfg_private['open_cage_geocode']['API_KEY']
537
 
538
 
539