phyloforfun commited on
Commit
bd72568
·
1 Parent(s): 28ebe52

add mammal prompt, fix bug

Browse files
api_cost/api_cost.yaml CHANGED
@@ -102,6 +102,10 @@ MISTRAL_SMALL:
102
  ################
103
  # Local Models
104
  ################
 
 
 
 
105
  LOCAL_MIXTRAL_8X7B_INSTRUCT_V01:
106
  in: 0.0
107
  out: 0.0
@@ -113,4 +117,4 @@ LOCAL_CPU_MISTRAL_7B_INSTRUCT_V02_GGUF:
113
  out: 0.0
114
  phyloforfun/mistral-7b-instruct-v2-bnb-4bit__HLT_MICH_Angiospermae_SLTPvC_v1-0_medium_OCR-C25-L25-E50-R05:
115
  in: 0.0
116
- out: 0.0
 
102
  ################
103
  # Local Models
104
  ################
105
+ # mistralai/Mistral-Nemo-Instruct-2407
106
+ LOCAL_MISTRAL_NEMO_INSTRUCT_2407:
107
+ in: 0.0
108
+ out: 0.0
109
  LOCAL_MIXTRAL_8X7B_INSTRUCT_V01:
110
  in: 0.0
111
  out: 0.0
 
117
  out: 0.0
118
  phyloforfun/mistral-7b-instruct-v2-bnb-4bit__HLT_MICH_Angiospermae_SLTPvC_v1-0_medium_OCR-C25-L25-E50-R05:
119
  in: 0.0
120
+ out: 0.0
app.py CHANGED
@@ -2226,13 +2226,13 @@ def content_collage_overlay():
2226
  # Set the options for the radio button with corresponding indices
2227
  # Set the options for the transcription method radio button
2228
  options = {
2229
- 0: "Use LeafMachine2 label collage for transcriptions",
2230
- 1: "Use original images for transcriptions",
2231
  2: "Use specimen collage for transcriptions"
2232
  }
2233
 
2234
  # Determine the default index based on the current configuration
2235
- default_index = st.session_state.config['leafmachine'].get('use_RGB_label_images', 0)
2236
 
2237
  # Create the radio button for transcription method selection
2238
  selected_option = st.radio(
 
2226
  # Set the options for the radio button with corresponding indices
2227
  # Set the options for the transcription method radio button
2228
  options = {
2229
+ 0: "Use original images for transcriptions",
2230
+ 1: "Use LeafMachine2 label collage for transcriptions",
2231
  2: "Use specimen collage for transcriptions"
2232
  }
2233
 
2234
  # Determine the default index based on the current configuration
2235
+ default_index = st.session_state.config['leafmachine'].get('use_RGB_label_images', 1)
2236
 
2237
  # Create the radio button for transcription method selection
2238
  selected_option = st.radio(
pages/prompt_builder.py CHANGED
@@ -19,6 +19,20 @@ def create_download_button_yaml(file_path, selected_yaml_file, key_val):
19
  )
20
 
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  def upload_local_prompt_to_server(dir_prompt):
23
  uploaded_file = st.file_uploader("Upload a custom prompt file", type=['yaml'])
24
  if uploaded_file is not None:
@@ -31,10 +45,14 @@ def upload_local_prompt_to_server(dir_prompt):
31
  with open(file_path, 'wb') as f:
32
  f.write(uploaded_file.getbuffer())
33
  st.success(f"Saved file {file_name} in {dir_prompt}")
 
 
 
34
  else:
35
  st.error("Please upload a .yaml file that you previously created using this Prompt Builder tool.")
36
 
37
 
 
38
  def save_prompt_yaml(filename, col):
39
  yaml_content = {
40
  'prompt_author': st.session_state['prompt_author'],
@@ -207,6 +225,9 @@ def build_LLM_prompt_config():
207
  st.write('##')
208
  create_download_button_yaml(download_file_path, st.session_state['selected_yaml_file'],key_val=345798)
209
 
 
 
 
210
  # Prompt Author Information
211
  st.write("---")
212
  st.header("Prompt Author Information")
 
19
  )
20
 
21
 
22
+ # def upload_local_prompt_to_server(dir_prompt):
23
+ # uploaded_file = st.file_uploader("Upload a custom prompt file", type=['yaml'])
24
+ # if uploaded_file is not None:
25
+ # # Check the file extension
26
+ # file_name = uploaded_file.name
27
+ # if file_name.endswith('.yaml'):
28
+ # file_path = os.path.join(dir_prompt, file_name)
29
+
30
+ # # Save the file
31
+ # with open(file_path, 'wb') as f:
32
+ # f.write(uploaded_file.getbuffer())
33
+ # st.success(f"Saved file {file_name} in {dir_prompt}")
34
+ # else:
35
+ # st.error("Please upload a .yaml file that you previously created using this Prompt Builder tool.")
36
  def upload_local_prompt_to_server(dir_prompt):
37
  uploaded_file = st.file_uploader("Upload a custom prompt file", type=['yaml'])
38
  if uploaded_file is not None:
 
45
  with open(file_path, 'wb') as f:
46
  f.write(uploaded_file.getbuffer())
47
  st.success(f"Saved file {file_name} in {dir_prompt}")
48
+
49
+ # Update the prompt list
50
+ st.session_state['yaml_files'] = [f for f in os.listdir(dir_prompt) if f.endswith('.yaml')]
51
  else:
52
  st.error("Please upload a .yaml file that you previously created using this Prompt Builder tool.")
53
 
54
 
55
+
56
  def save_prompt_yaml(filename, col):
57
  yaml_content = {
58
  'prompt_author': st.session_state['prompt_author'],
 
225
  st.write('##')
226
  create_download_button_yaml(download_file_path, st.session_state['selected_yaml_file'],key_val=345798)
227
 
228
+
229
+ upload_local_prompt_to_server(dir_prompt)
230
+
231
  # Prompt Author Information
232
  st.write("---")
233
  st.header("Prompt Author Information")
vouchervision/general_utils.py CHANGED
@@ -1311,12 +1311,12 @@ def create_specimen_collage(cfg, logger, dir_home, Project, Dirs):
1311
 
1312
  # After processing, delete the original images, leaving only the _collage images
1313
  # This is used just in case the HF version puts them there
1314
- for filename in filenames:
1315
- if not filename.endswith('_collage.jpg'):
1316
- file_path = os.path.join(Dirs.save_original, filename)
1317
- if os.path.exists(file_path):
1318
- os.remove(file_path)
1319
- logger.info(f"Deleted original image: {file_path}")
1320
 
1321
  def crop_component_from_yolo_coords(anno_type, Dirs, analysis, all_detections, full_image, filename, save_per_image, save_per_class, save_list):
1322
  height = analysis['height']
 
1311
 
1312
  # After processing, delete the original images, leaving only the _collage images
1313
  # This is used just in case the HF version puts them there
1314
+ # for filename in filenames:
1315
+ # if not filename.endswith('_collage.jpg'):
1316
+ # file_path = os.path.join(Dirs.save_original, filename)
1317
+ # if os.path.exists(file_path):
1318
+ # os.remove(file_path)
1319
+ # logger.info(f"Deleted original image: {file_path}")
1320
 
1321
  def crop_component_from_yolo_coords(anno_type, Dirs, analysis, all_detections, full_image, filename, save_per_image, save_per_class, save_list):
1322
  height = analysis['height']
vouchervision/model_maps.py CHANGED
@@ -34,6 +34,8 @@ class ModelMaps:
34
 
35
  'LOCAL_MIXTRAL_8X7B_INSTRUCT_V01': '#000000', # Black
36
  'LOCAL_MISTRAL_7B_INSTRUCT_V02': '#4a4a4a', # Gray
 
 
37
 
38
  'LOCAL_CPU_MISTRAL_7B_INSTRUCT_V02_GGUF': '#bababa', # Gray
39
 
@@ -78,7 +80,8 @@ class ModelMaps:
78
  'Open Mistral 7B',
79
  ]
80
 
81
- MODELS_LOCAL = ['LOCAL Mixtral 8x7B Instruct v0.1',
 
82
  'LOCAL Mistral 7B Instruct v0.2',
83
  'LOCAL CPU Mistral 7B Instruct v0.2 GGUF',
84
  'phyloforfun/mistral-7b-instruct-v2-bnb-4bit__HLT_MICH_Angiospermae_SLTPvC_v1-0_medium_OCR-C25-L25-E50-R05']
@@ -124,6 +127,7 @@ class ModelMaps:
124
  'Open Mixtral 8x7B': 'OPEN_MIXTRAL_8X7B',
125
  'Open Mistral 7B': 'OPEN_MISTRAL_7B',
126
 
 
127
  'LOCAL Mixtral 8x7B Instruct v0.1': 'LOCAL_MIXTRAL_8X7B_INSTRUCT_V01',
128
  'LOCAL Mistral 7B Instruct v0.2': 'LOCAL_MISTRAL_7B_INSTRUCT_V02',
129
 
@@ -166,6 +170,7 @@ class ModelMaps:
166
  'Open Mixtral 8x7B': has_key_mistral,
167
  'Open Mistral 7B': has_key_mistral,
168
 
 
169
  'LOCAL Mixtral 8x7B Instruct v0.1': True,
170
  'LOCAL Mistral 7B Instruct v0.2': True,
171
 
@@ -208,6 +213,7 @@ class ModelMaps:
208
  'Open Mixtral 8x7B': False,
209
  'Open Mistral 7B': False,
210
 
 
211
  'LOCAL Mixtral 8x7B Instruct v0.1': False,
212
  'LOCAL Mistral 7B Instruct v0.2': False,
213
 
@@ -304,11 +310,15 @@ class ModelMaps:
304
 
305
 
306
  ### Mistral LOCAL
 
 
 
 
307
  elif key == 'LOCAL_MIXTRAL_8X7B_INSTRUCT_V01':
308
  return 'Mixtral-8x7B-Instruct-v0.1'
309
 
310
  elif key == 'LOCAL_MISTRAL_7B_INSTRUCT_V02':
311
- return 'Mistral-7B-Instruct-v0.2'
312
 
313
  ### Mistral LOCAL CPU
314
  elif key == 'LOCAL_CPU_MISTRAL_7B_INSTRUCT_V02_GGUF':
 
34
 
35
  'LOCAL_MIXTRAL_8X7B_INSTRUCT_V01': '#000000', # Black
36
  'LOCAL_MISTRAL_7B_INSTRUCT_V02': '#4a4a4a', # Gray
37
+ # mistralai/Mistral-Nemo-Instruct-2407
38
+ 'LOCAL_MISTRAL_NEMO_INSTRUCT_2407': '#000000', # Black
39
 
40
  'LOCAL_CPU_MISTRAL_7B_INSTRUCT_V02_GGUF': '#bababa', # Gray
41
 
 
80
  'Open Mistral 7B',
81
  ]
82
 
83
+ MODELS_LOCAL = ['LOCAL Mistral Nemo Instruct 2407',
84
+ 'LOCAL Mixtral 8x7B Instruct v0.1',
85
  'LOCAL Mistral 7B Instruct v0.2',
86
  'LOCAL CPU Mistral 7B Instruct v0.2 GGUF',
87
  'phyloforfun/mistral-7b-instruct-v2-bnb-4bit__HLT_MICH_Angiospermae_SLTPvC_v1-0_medium_OCR-C25-L25-E50-R05']
 
127
  'Open Mixtral 8x7B': 'OPEN_MIXTRAL_8X7B',
128
  'Open Mistral 7B': 'OPEN_MISTRAL_7B',
129
 
130
+ 'LOCAL Mistral Nemo Instruct 2407': 'LOCAL_MISTRAL_NEMO_INSTRUCT_2407',
131
  'LOCAL Mixtral 8x7B Instruct v0.1': 'LOCAL_MIXTRAL_8X7B_INSTRUCT_V01',
132
  'LOCAL Mistral 7B Instruct v0.2': 'LOCAL_MISTRAL_7B_INSTRUCT_V02',
133
 
 
170
  'Open Mixtral 8x7B': has_key_mistral,
171
  'Open Mistral 7B': has_key_mistral,
172
 
173
+ 'LOCAL Mistral Nemo Instruct 2407': True,
174
  'LOCAL Mixtral 8x7B Instruct v0.1': True,
175
  'LOCAL Mistral 7B Instruct v0.2': True,
176
 
 
213
  'Open Mixtral 8x7B': False,
214
  'Open Mistral 7B': False,
215
 
216
+ 'LOCAL Mistral Nemo Instruct 2407': False,
217
  'LOCAL Mixtral 8x7B Instruct v0.1': False,
218
  'LOCAL Mistral 7B Instruct v0.2': False,
219
 
 
310
 
311
 
312
  ### Mistral LOCAL
313
+ #LOCAL_MISTRAL_NEMO_INSTRUCT_2407 'LOCAL Mistral Nemo Instruct 2407 mistralai/Mistral-Nemo-Instruct-2407
314
+ elif key == 'LOCAL_MISTRAL_NEMO_INSTRUCT_2407':
315
+ return 'Mistral-Nemo-Instruct-2407'
316
+
317
  elif key == 'LOCAL_MIXTRAL_8X7B_INSTRUCT_V01':
318
  return 'Mixtral-8x7B-Instruct-v0.1'
319
 
320
  elif key == 'LOCAL_MISTRAL_7B_INSTRUCT_V02':
321
+ return 'Mistral-7B-Instruct-v0.3'
322
 
323
  ### Mistral LOCAL CPU
324
  elif key == 'LOCAL_CPU_MISTRAL_7B_INSTRUCT_V02_GGUF':
vouchervision/utils_LLM_JSON_validation.py CHANGED
@@ -12,8 +12,8 @@ def validate_and_align_JSON_keys_with_template(data, JSON_dict_structure):
12
  data[key] = ''
13
  elif isinstance(value, str):
14
  if value.lower() in ['unknown','not provided', 'missing', 'na', 'none', 'n/a', 'null', 'unspecified',
15
- 'TBD',
16
- 'not provided in the text', 'not found in the text',
17
  'not in the text', 'not provided', 'not found',
18
  'not provided in the ocr', 'not found in the ocr',
19
  'not in the ocr',
@@ -29,7 +29,7 @@ def validate_and_align_JSON_keys_with_template(data, JSON_dict_structure):
29
  'not in the ocr text',
30
  'Not provided in ocr text',
31
  'not provided in ocr text',
32
- 'n/a n/a','n/a, n/a',
33
  'n/a, n/a, n/a','n/a n/a, n/a','n/a, n/a n/a','n/a n/a n/a',
34
  'n/a, n/a, n/a, n/a','n/a n/a n/a n/a','n/a n/a, n/a, n/a','n/a, n/a n/a, n/a','n/a, n/a, n/a n/a',
35
  'n/a n/a n/a, n/a','n/a, n/a n/a n/a',
 
12
  data[key] = ''
13
  elif isinstance(value, str):
14
  if value.lower() in ['unknown','not provided', 'missing', 'na', 'none', 'n/a', 'null', 'unspecified',
15
+ 'TBD', 'tbd',
16
+ 'not provided in the text', 'not found in the text', 'Not found in OCR text', 'not found in ocr text',
17
  'not in the text', 'not provided', 'not found',
18
  'not provided in the ocr', 'not found in the ocr',
19
  'not in the ocr',
 
29
  'not in the ocr text',
30
  'Not provided in ocr text',
31
  'not provided in ocr text',
32
+ 'n/a n/a','n/a, n/a','Not applicable','not applicable',
33
  'n/a, n/a, n/a','n/a n/a, n/a','n/a, n/a n/a','n/a n/a n/a',
34
  'n/a, n/a, n/a, n/a','n/a n/a n/a n/a','n/a n/a, n/a, n/a','n/a, n/a n/a, n/a','n/a, n/a, n/a n/a',
35
  'n/a n/a n/a, n/a','n/a, n/a n/a n/a',
vouchervision/utils_VoucherVision.py CHANGED
@@ -164,7 +164,7 @@ class VoucherVision():
164
 
165
 
166
  def map_dir_labels(self):
167
- if self.cfg['leafmachine']['use_RGB_label_images']:
168
  self.dir_labels = os.path.join(self.Dirs.save_per_annotation_class,'label')
169
  else:
170
  self.dir_labels = self.Dirs.save_original
@@ -353,7 +353,7 @@ class VoucherVision():
353
  elif header.value == "path_to_crop":
354
  sheet.cell(row=next_row, column=i, value=path_to_crop)
355
  elif header.value == "path_to_original":
356
- if self.cfg['leafmachine']['use_RGB_label_images']:
357
  fname = os.path.basename(path_to_crop)
358
  base = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(path_to_crop))))
359
  path_to_original = os.path.join(base, 'Original_Images', fname)
 
164
 
165
 
166
  def map_dir_labels(self):
167
+ if self.cfg['leafmachine']['use_RGB_label_images'] in [1,2]:
168
  self.dir_labels = os.path.join(self.Dirs.save_per_annotation_class,'label')
169
  else:
170
  self.dir_labels = self.Dirs.save_original
 
353
  elif header.value == "path_to_crop":
354
  sheet.cell(row=next_row, column=i, value=path_to_crop)
355
  elif header.value == "path_to_original":
356
+ if self.cfg['leafmachine']['use_RGB_label_images'] in [1,2]:
357
  fname = os.path.basename(path_to_crop)
358
  base = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(path_to_crop))))
359
  path_to_original = os.path.join(base, 'Original_Images', fname)
vouchervision/utils_VoucherVision_parallel.py CHANGED
@@ -704,8 +704,8 @@ class VoucherVision():
704
  json_report.set_text(text_main='Sending batch to OCR and LLM')
705
 
706
  num_files = len(self.img_paths)
707
- # num_threads = min(num_files, 128)
708
- num_threads = 128
709
  counter = AtomicCounter()
710
 
711
  # Setup for parallel execution
 
704
  json_report.set_text(text_main='Sending batch to OCR and LLM')
705
 
706
  num_files = len(self.img_paths)
707
+ num_threads = min(num_files, 128)
708
+ # num_threads = 128
709
  counter = AtomicCounter()
710
 
711
  # Setup for parallel execution