phyloforfun commited on
Commit
67f7ed6
·
1 Parent(s): 0f3589f

Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing

Browse files
app.py CHANGED
@@ -42,6 +42,7 @@ if 'config' not in st.session_state:
42
  st.session_state.config, st.session_state.dir_home = build_VV_config(loaded_cfg=None)
43
  setup_streamlit_config(st.session_state.dir_home)
44
 
 
45
 
46
  ########################################################################################################
47
  ### Global constants ####
@@ -280,7 +281,7 @@ def content_input_images(col_left, col_right):
280
  # Handle PDF files
281
  file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
282
  # Convert each page of the PDF to an image
283
- n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=st.session_state.config['leafmachine']['project']['dir_images_local'])
284
  # Update the input list for each page image
285
  converted_files = os.listdir(st.session_state['dir_uploaded_images'])
286
  for file_name in converted_files:
 
42
  st.session_state.config, st.session_state.dir_home = build_VV_config(loaded_cfg=None)
43
  setup_streamlit_config(st.session_state.dir_home)
44
 
45
+ # st.session_state['is_hf'] = True
46
 
47
  ########################################################################################################
48
  ### Global constants ####
 
281
  # Handle PDF files
282
  file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
283
  # Convert each page of the PDF to an image
284
+ n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=200)#st.session_state.config['leafmachine']['project']['dir_images_local'])
285
  # Update the input list for each page image
286
  converted_files = os.listdir(st.session_state['dir_uploaded_images'])
287
  for file_name in converted_files:
vouchervision/OCR_GPT4.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.prompts import PromptTemplate
2
+ from langchain_core.output_parsers import JsonOutputParser
3
+ from langchain_core.pydantic_v1 import BaseModel, Field
4
+
5
+ class Transcription(BaseModel):
6
+ Transcription_Printed_Text: str = Field(description="The transcription of all printed text in the image.")
7
+ Transcription_Handwritten_Text: str = Field(description="The transcription of all handwritten text in the image.")
8
+
9
+ class OCRGPT4VisionPreview:
10
+ def __init__(self, logger, api_key, endpoint_url="https://gpt-4-vision-preview-api.com/ocr"):
11
+ self.logger = logger
12
+ self.api_key = api_key
13
+ self.endpoint_url = endpoint_url
14
+ self.parser = JsonOutputParser(pydantic_object=Transcription)
15
+
16
+ def transcribe_image(self, image_file):
17
+ self.logger.start_monitoring_usage()
18
+
19
+ headers = {"Authorization": f"Bearer {self.api_key}"}
20
+ files = {'image': open(image_file, 'rb')}
21
+ response = requests.post(self.endpoint_url, headers=headers, files=files)
22
+
23
+ if response.status_code == 200:
24
+ json_response = response.json()
25
+ transcription = self.parser.parse(json_response)
26
+ else:
27
+ self.logger.log_error("Failed to transcribe image")
28
+ transcription = {"Transcription": "Error"}
29
+
30
+ usage_report = self.logger.stop_monitoring_report_usage()
31
+
32
+ return transcription, usage_report
vouchervision/OCR_Gemini.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.messages import HumanMessage
2
+ from langchain_google_genai import ChatGoogleGenerativeAI
3
+ from langchain_core.output_parsers import JsonOutputParser
4
+ from langchain_core.pydantic_v1 import BaseModel, Field
5
+ import requests, logging, os, vertexai, json
6
+ from PIL import Image as PILImage
7
+ from io import BytesIO
8
+ import http.client
9
+ import typing
10
+ import urllib.request
11
+ from google.oauth2 import service_account
12
+ from vertexai.preview.generative_models import GenerativeModel, Image
13
+
14
+ class Transcription(BaseModel):
15
+ Transcription_Printed_Text: str = Field(description="The transcription of all printed text in the image.")
16
+ Transcription_Handwritten_Text: str = Field(description="The transcription of all handwritten text in the image.")
17
+
18
+ class OCRGeminiProVision:
19
+ def __init__(self, logger, model_name="gemini-pro-vision"):
20
+ self.logger = logger
21
+ self.llm = GenerativeModel(model_name)
22
+
23
+ # self.llm = ChatGoogleGenerativeAI(model=model_name)
24
+ self.parser = JsonOutputParser(pydantic_object=Transcription)
25
+
26
+ def image_to_vertex_image(self, image_path: str) -> Image:
27
+ """Converts a local image or image URL to a Vertex AI Image object."""
28
+ if image_path.startswith("http"):
29
+ # Load image from URL
30
+ with urllib.request.urlopen(image_path) as response:
31
+ response = typing.cast(http.client.HTTPResponse, response)
32
+ image_bytes = response.read()
33
+ else:
34
+ # Load image from local file
35
+ with open(image_path, 'rb') as img_file:
36
+ image_bytes = img_file.read()
37
+
38
+ return Image.from_bytes(image_bytes)
39
+
40
+ def combine_json_values(self, data, separator=" "):
41
+ """
42
+ Recursively traverses through a JSON-like dictionary or list,
43
+ combining all the values into a single string with a given separator.
44
+
45
+ :return: A single string containing all values from the input.
46
+ """
47
+ # Base case for strings, directly return the string
48
+ if isinstance(data, str):
49
+ return data
50
+
51
+ # If the data is a dictionary, iterate through its values
52
+ elif isinstance(data, dict):
53
+ combined_string = separator.join(self.combine_json_values(v, separator) for v in data.values())
54
+
55
+ # If the data is a list, iterate through its elements
56
+ elif isinstance(data, list):
57
+ combined_string = separator.join(self.combine_json_values(item, separator) for item in data)
58
+
59
+ # For other data types (e.g., numbers), convert to string directly
60
+ else:
61
+ combined_string = str(data)
62
+
63
+ return combined_string
64
+
65
+ def transcribe_image(self, image_file, prompt):
66
+ # Load the image
67
+ image = self.image_to_vertex_image(image_file)
68
+
69
+ # Convert the image to base64
70
+
71
+ # Construct the message
72
+ # message = HumanMessage(
73
+ # content=[
74
+ # {"type": "text",
75
+ # "text": prompt},
76
+ # {"type": "image", "image": image_base64},
77
+ # # {"type": "image", "image": f"data:image/png;base64,{image_base64}"},
78
+ # ]
79
+ # )
80
+
81
+ # Invoke the model
82
+ # direct_output = self.llm.invoke([message])
83
+ response = self.llm.generate_content(
84
+ [prompt, image]
85
+ )
86
+ direct_output = response.text[1:]
87
+ print(direct_output)
88
+
89
+ # Parse the output to JSON format using the specified schema.
90
+ try:
91
+ json_output = self.parser.parse(direct_output)
92
+ except:
93
+ json_output = direct_output
94
+
95
+ try:
96
+ str_output = self.combine_json_values(json_output)
97
+ except:
98
+ str_output = direct_output
99
+
100
+ return image, json_output, direct_output, str_output, None
101
+
102
+
103
+ PROMPT_OCR = """I need you to transcribe all of the text in this image.
104
+ Place the transcribed text into a JSON dictionary with this form {"Transcription_Printed_Text": "text","Transcription_Handwritten_Text": "text"}"""
105
+ PROMPT_ALL = """1. Refactor the unstructured OCR text into a dictionary based on the JSON structure outlined below.
106
+ 2. Map the unstructured OCR text to the appropriate JSON key and populate the field given the user-defined rules.
107
+ 3. JSON key values are permitted to remain empty strings if the corresponding information is not found in the unstructured OCR text.
108
+ 4. Duplicate dictionary fields are not allowed.
109
+ 5. Ensure all JSON keys are in camel case.
110
+ 6. Ensure new JSON field values follow sentence case capitalization.
111
+ 7. Ensure all key-value pairs in the JSON dictionary strictly adhere to the format and data types specified in the template.
112
+ 8. Ensure output JSON string is valid JSON format. It should not have trailing commas or unquoted keys.
113
+ 9. Only return a JSON dictionary represented as a string. You should not explain your answer.
114
+ This section provides rules for formatting each JSON value organized by the JSON key.
115
+ {catalogNumber Barcode identifier, typically a number with at least 6 digits, but fewer than 30 digits., order The full scientific name of the order in which the taxon is classified. Order must be capitalized., family The full scientific name of the family in which the taxon is classified. Family must be capitalized., scientificName The scientific name of the taxon including genus, specific epithet, and any lower classifications., scientificNameAuthorship The authorship information for the scientificName formatted according to the conventions of the applicable Darwin Core nomenclaturalCode., genus Taxonomic determination to genus. Genus must be capitalized. If genus is not present use the taxonomic family name followed by the word 'indet'., subgenus The full scientific name of the subgenus in which the taxon is classified. Values should include the genus to avoid homonym confusion., specificEpithet The name of the first or species epithet of the scientificName. Only include the species epithet., infraspecificEpithet The name of the lowest or terminal infraspecific epithet of the scientificName, excluding any rank designation., identifiedBy A comma separated list of names of people, groups, or organizations who assigned the taxon to the subject organism. This is not the specimen collector., recordedBy A comma separated list of names of people, groups, or organizations responsible for observing, recording, collecting, or presenting the original specimen. The primary collector or observer should be listed first., recordNumber An identifier given to the occurrence at the time it was recorded. Often serves as a link between field notes and an occurrence record, such as a specimen collector's number., verbatimEventDate The verbatim original representation of the date and time information for when the specimen was collected. Date of collection exactly as it appears on the label. Do not change the format or correct typos., eventDate Date the specimen was collected formatted as year-month-day, YYYY-MM_DD. If specific components of the date are unknown, they should be replaced with zeros. Examples \0000-00-00\ if the entire date is unknown, \YYYY-00-00\ if only the year is known, and \YYYY-MM-00\ if year and month are known but day is not., habitat A category or description of the habitat in which the specimen collection event occurred., occurrenceRemarks Text describing the specimen's geographic location. Text describing the appearance of the specimen. A statement about the presence or absence of a taxon at a the collection location. Text describing the significance of the specimen, such as a specific expedition or notable collection. Description of plant features such as leaf shape, size, color, stem texture, height, flower structure, scent, fruit or seed characteristics, root system type, overall growth habit and form, any notable aroma or secretions, presence of hairs or bristles, and any other distinguishing morphological or physiological characteristics., country The name of the country or major administrative unit in which the specimen was originally collected., stateProvince The name of the next smaller administrative region than country (state, province, canton, department, region, etc.) in which the specimen was originally collected., county The full, unabbreviated name of the next smaller administrative region than stateProvince (county, shire, department, parish etc.) in which the specimen was originally collected., municipality The full, unabbreviated name of the next smaller administrative region than county (city, municipality, etc.) in which the specimen was originally collected., locality Description of geographic location, landscape, landmarks, regional features, nearby places, or any contextual information aiding in pinpointing the exact origin or location of the specimen., degreeOfEstablishment Cultivated plants are intentionally grown by humans. In text descriptions, look for planting dates, garden locations, ornamental, cultivar names, garden, or farm to indicate cultivated plant. Use either - unknown or cultivated., decimalLatitude Latitude decimal coordinate. Correct and convert the verbatim location coordinates to conform with the decimal degrees GPS coordinate format., decimalLongitude Longitude decimal coordinate. Correct and convert the verbatim location coordinates to conform with the decimal degrees GPS coordinate format., verbatimCoordinates Verbatim location coordinates as they appear on the label. Do not convert formats. Possible coordinate types include [Lat, Long, UTM, TRS]., minimumElevationInMeters Minimum elevation or altitude in meters. Only if units are explicit then convert from feet (\ft\ or \ft.\\ or \feet\) to meters (\m\ or \m.\ or \meters\). Round to integer., maximumElevationInMeters Maximum elevation or altitude in meters. If only one elevation is present, then max_elevation should be set to the null_value. Only if units are explicit then convert from feet (\ft\ or \ft.\ or \feet\) to meters (\m\ or \m.\ or \meters\). Round to integer.}
116
+ Please populate the following JSON dictionary based on the rules and the unformatted OCR text
117
+ {
118
+ catalogNumber ,
119
+ order ,
120
+ family ,
121
+ scientificName ,
122
+ scientificNameAuthorship ,
123
+ genus ,
124
+ subgenus ,
125
+ specificEpithet ,
126
+ infraspecificEpithet ,
127
+ identifiedBy ,
128
+ recordedBy ,
129
+ recordNumber ,
130
+ verbatimEventDate ,
131
+ eventDate ,
132
+ habitat ,
133
+ occurrenceRemarks ,
134
+ country ,
135
+ stateProvince ,
136
+ county ,
137
+ municipality ,
138
+ locality ,
139
+ degreeOfEstablishment ,
140
+ decimalLatitude ,
141
+ decimalLongitude ,
142
+ verbatimCoordinates ,
143
+ minimumElevationInMeters ,
144
+ maximumElevationInMeters
145
+ }
146
+ """
147
+ def _get_google_credentials():
148
+ with open('D:/Dropbox/Servers/google_API/vouchervision-hf-a2c361d5d29d.json', 'r') as file:
149
+ data = json.load(file)
150
+ creds_json_str = json.dumps(data)
151
+ credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
152
+ os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = creds_json_str
153
+ os.environ['GOOGLE_API_KEY'] = 'AIzaSyAHOH1w1qV7C3jS4W7QFyoaTGUwZIgS5ig'
154
+ return credentials
155
+
156
+ if __name__ == '__main__':
157
+ vertexai.init(project='vouchervision-hf', location='us-central1', credentials=_get_google_credentials())
158
+
159
+ logger = logging.getLogger('LLaVA')
160
+ logger.setLevel(logging.DEBUG)
161
+
162
+ OCR_Gemini = OCRGeminiProVision(logger)
163
+ image, json_output, direct_output, str_output, usage_report = OCR_Gemini.transcribe_image(
164
+ # "C:/Users/Will/Downloads/gallery_short_gpt4t_trOCRhand/Cropped_Images/By_Class/label/MICH_7574789_Cyperaceae_Carex_scoparia.jpg",
165
+ # "D:/D_Desktop/usda_out/usda/Original_Images/4.jpg",
166
+ "D:/Dropbox/VoucherVision/demo/demo_images/MICH_16205594_Poaceae_Jouvea_pilosa.jpg",
167
+ PROMPT_OCR)
168
+ print('json_output')
169
+ print(json_output)
170
+ print('direct_output')
171
+ print(direct_output)
172
+ print('str_output')
173
+ print(str_output)
174
+ print('usage_report')
175
+ print(usage_report)
vouchervision/OCR_google_cloud_vision.py CHANGED
@@ -89,6 +89,10 @@ class OCREngine:
89
  self.trOCR_characters = None
90
  self.set_client()
91
  self.init_craft()
 
 
 
 
92
  if 'LLaVA' in self.OCR_option:
93
  self.init_llava()
94
 
@@ -123,9 +127,6 @@ class OCREngine:
123
 
124
  def init_llava(self):
125
 
126
- self.llava_prompt = """I need you to transcribe all of the text in this image.
127
- Place the transcribed text into a JSON dictionary with this form {"Transcription_Printed_Text": "text","Transcription_Handwritten_Text": "text"}"""
128
-
129
  self.model_path = "liuhaotian/" + self.cfg['leafmachine']['project']['OCR_option_llava']
130
  self.model_quant = self.cfg['leafmachine']['project']['OCR_option_llava_bit']
131
 
@@ -140,6 +141,9 @@ class OCREngine:
140
  use_4bit = True
141
 
142
  self.Llava = OCRllava(self.logger, model_path=self.model_path, load_in_4bit=use_4bit, load_in_8bit=False)
 
 
 
143
 
144
 
145
  def detect_text_craft(self):
@@ -684,7 +688,7 @@ class OCREngine:
684
  if 'LLaVA' in self.OCR_option: # This option does not produce an OCR helper image
685
  self.json_report.set_text(text_main=f'Working on LLaVA {self.Llava.model_path} transcription :construction:')
686
 
687
- image, json_output, direct_output, str_output, usage_report = self.Llava.transcribe_image(self.path, self.llava_prompt)
688
  self.logger.info(f"LLaVA Usage Report for Model {self.Llava.model_path}:\n{usage_report}")
689
 
690
  try:
 
89
  self.trOCR_characters = None
90
  self.set_client()
91
  self.init_craft()
92
+
93
+ self.multimodal_prompt = """I need you to transcribe all of the text in this image.
94
+ Place the transcribed text into a JSON dictionary with this form {"Transcription_Printed_Text": "text","Transcription_Handwritten_Text": "text"}"""
95
+
96
  if 'LLaVA' in self.OCR_option:
97
  self.init_llava()
98
 
 
127
 
128
  def init_llava(self):
129
 
 
 
 
130
  self.model_path = "liuhaotian/" + self.cfg['leafmachine']['project']['OCR_option_llava']
131
  self.model_quant = self.cfg['leafmachine']['project']['OCR_option_llava_bit']
132
 
 
141
  use_4bit = True
142
 
143
  self.Llava = OCRllava(self.logger, model_path=self.model_path, load_in_4bit=use_4bit, load_in_8bit=False)
144
+
145
+ def init_gemini_vision(self):
146
+ pass
147
 
148
 
149
  def detect_text_craft(self):
 
688
  if 'LLaVA' in self.OCR_option: # This option does not produce an OCR helper image
689
  self.json_report.set_text(text_main=f'Working on LLaVA {self.Llava.model_path} transcription :construction:')
690
 
691
+ image, json_output, direct_output, str_output, usage_report = self.Llava.transcribe_image(self.path, self.multimodal_prompt)
692
  self.logger.info(f"LLaVA Usage Report for Model {self.Llava.model_path}:\n{usage_report}")
693
 
694
  try:
vouchervision/OCR_llava.py CHANGED
@@ -94,7 +94,8 @@ LLaVA Models:
94
 
95
  # Define the desired data structure for the transcription.
96
  class Transcription(BaseModel):
97
- Transcription: str = Field(description="The transcription of all text in the image.")
 
98
 
99
  class OCRllava:
100
  def __init__(self, logger, model_path="liuhaotian/llava-v1.6-34b",load_in_4bit=False, load_in_8bit=False):
@@ -139,7 +140,7 @@ class OCRllava:
139
  # self.vision_tower.load_model()
140
  # self.vision_tower.to(device='cuda')
141
  # self.image_processor = self.vision_tower.image_processor
142
- self.parser = JsonOutputParser(pydantic_object=Transcription)
143
 
144
  def image_parser(self):
145
  sep = ","
 
94
 
95
  # Define the desired data structure for the transcription.
96
  class Transcription(BaseModel):
97
+ Transcription_Printed_Text: str = Field(description="The transcription of all printed text in the image.")
98
+ Transcription_Handwritten_Text: str = Field(description="The transcription of all handwritten text in the image.")
99
 
100
  class OCRllava:
101
  def __init__(self, logger, model_path="liuhaotian/llava-v1.6-34b",load_in_4bit=False, load_in_8bit=False):
 
140
  # self.vision_tower.load_model()
141
  # self.vision_tower.to(device='cuda')
142
  # self.image_processor = self.vision_tower.image_processor
143
+ self.parser = JsonOutputParser(pydantic_object=Transcription)
144
 
145
  def image_parser(self):
146
  sep = ","
vouchervision/model_maps.py CHANGED
@@ -4,7 +4,8 @@ class ModelMaps:
4
  'GPT_4': '#32CD32', # Lime Green
5
  'GPT_3_5': '#008000', # Green
6
  'GPT_3_5_INSTRUCT': '#3CB371', # Medium Sea Green
7
- 'GPT_4_TURBO': '#228B22', # Forest Green
 
8
  'GPT_4_32K': '#006400', # Dark Green
9
 
10
  'PALM2_TB_1': '#87CEEB', # Sky Blue
@@ -13,7 +14,8 @@ class ModelMaps:
13
  'GEMINI_PRO': '#1E00FF', #
14
 
15
  'AZURE_GPT_4': '#800080', # Purple
16
- 'AZURE_GPT_4_TURBO': '#9370DB', # Medium Purple
 
17
  'AZURE_GPT_4_32K': '#8A2BE2', # Blue Violet
18
  'AZURE_GPT_3_5_INSTRUCT': '#9400D3', # Dark Violet
19
  'AZURE_GPT_3_5': '#9932CC', # Dark Orchid
@@ -30,12 +32,14 @@ class ModelMaps:
30
 
31
  MODELS_OPENAI = ["GPT 4",
32
  "GPT 4 32k",
 
33
  "GPT 4 Turbo 1106-preview",
34
  "GPT 3.5",
35
  "GPT 3.5 Instruct",
36
 
37
  "Azure GPT 4",
38
  "Azure GPT 4 32k",
 
39
  "Azure GPT 4 Turbo 1106-preview",
40
  "Azure GPT 3.5",
41
  "Azure GPT 3.5 Instruct",]
@@ -58,13 +62,15 @@ class ModelMaps:
58
  version_mapping_cost = {
59
  'GPT 4 32k': 'GPT_4_32K',
60
  'GPT 4': 'GPT_4',
61
- 'GPT 4 Turbo 1106-preview': 'GPT_4_TURBO',
 
62
  'GPT 3.5 Instruct': 'GPT_3_5_INSTRUCT',
63
  'GPT 3.5': 'GPT_3_5',
64
 
65
  'Azure GPT 4 32k': 'AZURE_GPT_4_32K',
66
  'Azure GPT 4': 'AZURE_GPT_4',
67
- 'Azure GPT 4 Turbo 1106-preview': 'AZURE_GPT_4_TURBO',
 
68
  'Azure GPT 3.5 Instruct': 'AZURE_GPT_3_5_INSTRUCT',
69
  'Azure GPT 3.5': 'AZURE_GPT_3_5',
70
 
@@ -88,6 +94,7 @@ class ModelMaps:
88
  # Define the mapping for 'has_key' values
89
  version_has_key = {
90
  'GPT 4 Turbo 1106-preview': has_key_openai,
 
91
  'GPT 4': has_key_openai,
92
  'GPT 4 32k': has_key_openai,
93
  'GPT 3.5': has_key_openai,
@@ -97,6 +104,7 @@ class ModelMaps:
97
  'Azure GPT 3.5 Instruct': has_key_azure_openai,
98
  'Azure GPT 4': has_key_azure_openai,
99
  'Azure GPT 4 Turbo 1106-preview': has_key_azure_openai,
 
100
  'Azure GPT 4 32k': has_key_azure_openai,
101
 
102
  'PaLM 2 text-bison@001': has_key_google_application_credentials,
@@ -162,9 +170,12 @@ class ModelMaps:
162
  elif key == 'GPT_4_32K':
163
  return 'gpt-4-32k'
164
 
165
- elif key == 'GPT_4_TURBO':
166
  return 'gpt-4-1106-preview'
167
 
 
 
 
168
  ### Azure
169
  elif key == 'AZURE_GPT_3_5':
170
  return 'gpt-35-turbo-1106'
@@ -175,9 +186,12 @@ class ModelMaps:
175
  elif key == 'AZURE_GPT_4':
176
  return "gpt-4"
177
 
178
- elif key == 'AZURE_GPT_4_TURBO':
179
  return "gpt-4-1106-preview"
180
 
 
 
 
181
  elif key == 'AZURE_GPT_4_32K':
182
  return "gpt-4-32k"
183
 
 
4
  'GPT_4': '#32CD32', # Lime Green
5
  'GPT_3_5': '#008000', # Green
6
  'GPT_3_5_INSTRUCT': '#3CB371', # Medium Sea Green
7
+ 'GPT_4_TURBO_1106': '#228B22', # Forest Green
8
+ 'GPT_4_TURBO_0125': '#228B22', # Forest Green
9
  'GPT_4_32K': '#006400', # Dark Green
10
 
11
  'PALM2_TB_1': '#87CEEB', # Sky Blue
 
14
  'GEMINI_PRO': '#1E00FF', #
15
 
16
  'AZURE_GPT_4': '#800080', # Purple
17
+ 'AZURE_GPT_4_TURBO_1106': '#9370DB', # Medium Purple
18
+ 'AZURE_GPT_4_TURBO_0125': '#9370DB', # Medium Purple
19
  'AZURE_GPT_4_32K': '#8A2BE2', # Blue Violet
20
  'AZURE_GPT_3_5_INSTRUCT': '#9400D3', # Dark Violet
21
  'AZURE_GPT_3_5': '#9932CC', # Dark Orchid
 
32
 
33
  MODELS_OPENAI = ["GPT 4",
34
  "GPT 4 32k",
35
+ "GPT 4 Turbo 0125-preview",
36
  "GPT 4 Turbo 1106-preview",
37
  "GPT 3.5",
38
  "GPT 3.5 Instruct",
39
 
40
  "Azure GPT 4",
41
  "Azure GPT 4 32k",
42
+ "Azure GPT 4 Turbo 0125-preview",
43
  "Azure GPT 4 Turbo 1106-preview",
44
  "Azure GPT 3.5",
45
  "Azure GPT 3.5 Instruct",]
 
62
  version_mapping_cost = {
63
  'GPT 4 32k': 'GPT_4_32K',
64
  'GPT 4': 'GPT_4',
65
+ 'GPT 4 Turbo 0125-preview': 'GPT_4_TURBO_0125',
66
+ 'GPT 4 Turbo 1106-preview': 'GPT_4_TURBO_1106',
67
  'GPT 3.5 Instruct': 'GPT_3_5_INSTRUCT',
68
  'GPT 3.5': 'GPT_3_5',
69
 
70
  'Azure GPT 4 32k': 'AZURE_GPT_4_32K',
71
  'Azure GPT 4': 'AZURE_GPT_4',
72
+ 'Azure GPT 4 Turbo 0125-preview': 'AZURE_GPT_4_TURBO_0125',
73
+ 'Azure GPT 4 Turbo 1106-preview': 'AZURE_GPT_4_TURBO_1106',
74
  'Azure GPT 3.5 Instruct': 'AZURE_GPT_3_5_INSTRUCT',
75
  'Azure GPT 3.5': 'AZURE_GPT_3_5',
76
 
 
94
  # Define the mapping for 'has_key' values
95
  version_has_key = {
96
  'GPT 4 Turbo 1106-preview': has_key_openai,
97
+ 'GPT 4 Turbo 0125-preview': has_key_openai,
98
  'GPT 4': has_key_openai,
99
  'GPT 4 32k': has_key_openai,
100
  'GPT 3.5': has_key_openai,
 
104
  'Azure GPT 3.5 Instruct': has_key_azure_openai,
105
  'Azure GPT 4': has_key_azure_openai,
106
  'Azure GPT 4 Turbo 1106-preview': has_key_azure_openai,
107
+ 'Azure GPT 4 Turbo 0125-preview': has_key_azure_openai,
108
  'Azure GPT 4 32k': has_key_azure_openai,
109
 
110
  'PaLM 2 text-bison@001': has_key_google_application_credentials,
 
170
  elif key == 'GPT_4_32K':
171
  return 'gpt-4-32k'
172
 
173
+ elif key == 'GPT_4_TURBO_1106':
174
  return 'gpt-4-1106-preview'
175
 
176
+ elif key == 'GPT_4_TURBO_0125':
177
+ return 'gpt-4-0125-preview'
178
+
179
  ### Azure
180
  elif key == 'AZURE_GPT_3_5':
181
  return 'gpt-35-turbo-1106'
 
186
  elif key == 'AZURE_GPT_4':
187
  return "gpt-4"
188
 
189
+ elif key == 'AZURE_GPT_4_TURBO_1106':
190
  return "gpt-4-1106-preview"
191
 
192
+ elif key == 'AZURE_GPT_4_TURBO_0125':
193
+ return 'gpt-4-0125-preview'
194
+
195
  elif key == 'AZURE_GPT_4_32K':
196
  return "gpt-4-32k"
197
 
vouchervision/utils_hf.py CHANGED
@@ -6,6 +6,7 @@ import base64
6
  from PIL import Image
7
  from PIL import Image
8
  from io import BytesIO
 
9
 
10
  # from vouchervision.general_utils import get_cfg_from_full_path
11
 
@@ -40,29 +41,46 @@ def setup_streamlit_config(dir_home):
40
  def save_uploaded_file(directory, img_file, image=None):
41
  if not os.path.exists(directory):
42
  os.makedirs(directory)
 
 
 
43
  # Assuming the uploaded file is an image
44
- if image is None:
45
- try:
46
- with Image.open(img_file) as image:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  full_path = os.path.join(directory, img_file.name)
48
  image.save(full_path, "JPEG")
49
- # Return the full path of the saved image
50
- return full_path
51
- except:
52
- with Image.open(os.path.join(directory,img_file)) as image:
53
  full_path = os.path.join(directory, img_file)
54
  image.save(full_path, "JPEG")
55
- # Return the full path of the saved image
56
- return full_path
57
- else:
58
- try:
59
- full_path = os.path.join(directory, img_file.name)
60
- image.save(full_path, "JPEG")
61
- return full_path
62
- except:
63
- full_path = os.path.join(directory, img_file)
64
- image.save(full_path, "JPEG")
65
- return full_path
66
 
67
  def save_uploaded_local(directory, img_file, image=None):
68
  name = img_file.split(os.path.sep)[-1]
 
6
  from PIL import Image
7
  from PIL import Image
8
  from io import BytesIO
9
+ from shutil import copyfileobj
10
 
11
  # from vouchervision.general_utils import get_cfg_from_full_path
12
 
 
41
  def save_uploaded_file(directory, img_file, image=None):
42
  if not os.path.exists(directory):
43
  os.makedirs(directory)
44
+
45
+ full_path = os.path.join(directory, img_file.name)
46
+
47
  # Assuming the uploaded file is an image
48
+ if img_file.name.lower().endswith('.pdf'):
49
+ with open(full_path, 'wb') as out_file:
50
+ # If img_file is a file-like object (e.g., Django's UploadedFile),
51
+ # you can use copyfileobj or read chunks.
52
+ # If it's a path, you'd need to open and then save it.
53
+ if hasattr(img_file, 'read'):
54
+ # This is a file-like object
55
+ copyfileobj(img_file, out_file)
56
+ else:
57
+ # If img_file is a path string
58
+ with open(img_file, 'rb') as fd:
59
+ copyfileobj(fd, out_file)
60
+ return full_path
61
+ else:
62
+ if image is None:
63
+ try:
64
+ with Image.open(img_file) as image:
65
+ full_path = os.path.join(directory, img_file.name)
66
+ image.save(full_path, "JPEG")
67
+ # Return the full path of the saved image
68
+ return full_path
69
+ except:
70
+ with Image.open(os.path.join(directory,img_file)) as image:
71
+ full_path = os.path.join(directory, img_file)
72
+ image.save(full_path, "JPEG")
73
+ # Return the full path of the saved image
74
+ return full_path
75
+ else:
76
+ try:
77
  full_path = os.path.join(directory, img_file.name)
78
  image.save(full_path, "JPEG")
79
+ return full_path
80
+ except:
 
 
81
  full_path = os.path.join(directory, img_file)
82
  image.save(full_path, "JPEG")
83
+ return full_path
 
 
 
 
 
 
 
 
 
 
84
 
85
  def save_uploaded_local(directory, img_file, image=None):
86
  name = img_file.split(os.path.sep)[-1]