Spaces:
Running
Running
phyloforfun
commited on
Commit
·
089a1e9
1
Parent(s):
a1e2ec1
Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing
Browse files
app.py
CHANGED
@@ -1555,8 +1555,8 @@ def content_header():
|
|
1555 |
batch_progress_bar = st.progress(0)
|
1556 |
text_batch = st.empty() # Placeholder for current step name
|
1557 |
progress_report = ProgressReport(overall_progress_bar, batch_progress_bar, text_overall, text_batch)
|
1558 |
-
json_report = JSONReport(col_updates_1, col_json, col_json_WFO, col_json_GEO, col_json_map)
|
1559 |
-
|
1560 |
|
1561 |
with col_logo:
|
1562 |
show_header_welcome()
|
@@ -1572,9 +1572,12 @@ def content_header():
|
|
1572 |
|
1573 |
if check_if_usable(is_hf=st.session_state['is_hf']):
|
1574 |
if st.button(f"Start Processing{st.session_state['processing_add_on']}", type='primary',use_container_width=True):
|
1575 |
-
st.session_state['formatted_json'] =
|
1576 |
-
st.session_state['formatted_json_WFO'] =
|
1577 |
-
st.session_state['formatted_json_GEO'] =
|
|
|
|
|
|
|
1578 |
# Define number of overall steps
|
1579 |
progress_report.set_n_overall(N_STEPS)
|
1580 |
progress_report.update_overall(f"Starting VoucherVision...")
|
@@ -1589,7 +1592,7 @@ def content_header():
|
|
1589 |
path_custom_prompts,
|
1590 |
None,
|
1591 |
progress_report,
|
1592 |
-
json_report,
|
1593 |
path_api_cost=os.path.join(st.session_state.dir_home,'api_cost','api_cost.yaml'),
|
1594 |
is_hf = st.session_state['is_hf'],
|
1595 |
is_real_run=True)
|
@@ -1612,7 +1615,8 @@ def content_header():
|
|
1612 |
st.error(":heavy_exclamation_mark: Required API keys not set. Please visit the 'API Keys' tab and set the Google Vision OCR API key and at least one LLM key.")
|
1613 |
|
1614 |
if st.session_state['formatted_json']:
|
1615 |
-
|
|
|
1616 |
|
1617 |
|
1618 |
|
@@ -1937,17 +1941,17 @@ def content_collage_overlay():
|
|
1937 |
st.session_state.config['leafmachine']['project']['OCR_option'] = OCR_option
|
1938 |
st.markdown("Below is an example of what the LLM would see given the choice of OCR ensemble. One, two, or three version of OCR can be fed into the LLM prompt. Typically, 'printed + handwritten' works well. If you have a GPU then you can enable trOCR.")
|
1939 |
if (OCR_option == 'hand') and not do_use_trOCR:
|
1940 |
-
st.text_area(label='
|
1941 |
elif (OCR_option == 'normal') and not do_use_trOCR:
|
1942 |
-
st.text_area(label='Printed',placeholder=demo_text_p,disabled=True, label_visibility='visible')
|
1943 |
elif (OCR_option == 'both') and not do_use_trOCR:
|
1944 |
-
st.text_area(label='
|
1945 |
elif (OCR_option == 'both') and do_use_trOCR:
|
1946 |
-
st.text_area(label='
|
1947 |
elif (OCR_option == 'normal') and do_use_trOCR:
|
1948 |
-
st.text_area(label='Printed + trOCR',placeholder=demo_text_trp,disabled=True, label_visibility='visible')
|
1949 |
elif (OCR_option == 'hand') and do_use_trOCR:
|
1950 |
-
st.text_area(label='
|
1951 |
|
1952 |
with col_ocr_2:
|
1953 |
if "demo_overlay" not in st.session_state:
|
@@ -2263,7 +2267,7 @@ st.set_page_config(layout="wide", page_icon='img/icon.ico', page_title='VoucherV
|
|
2263 |
|
2264 |
# Parse the 'is_hf' argument and set it in session state
|
2265 |
if 'is_hf' not in st.session_state:
|
2266 |
-
st.session_state['is_hf'] =
|
2267 |
|
2268 |
|
2269 |
#################################################################################################################################################
|
@@ -2336,6 +2340,11 @@ if 'API_checked' not in st.session_state:
|
|
2336 |
if 'API_rechecked' not in st.session_state:
|
2337 |
st.session_state['API_rechecked'] = False
|
2338 |
|
|
|
|
|
|
|
|
|
|
|
2339 |
|
2340 |
if 'cost_openai' not in st.session_state:
|
2341 |
st.session_state['cost_openai'] = None
|
|
|
1555 |
batch_progress_bar = st.progress(0)
|
1556 |
text_batch = st.empty() # Placeholder for current step name
|
1557 |
progress_report = ProgressReport(overall_progress_bar, batch_progress_bar, text_overall, text_batch)
|
1558 |
+
# st.session_state['json_report'] = JSONReport(col_updates_1, col_json, col_json_WFO, col_json_GEO, col_json_map)
|
1559 |
+
st.session_state['hold_output'] = st.toggle('View Final Transcription')
|
1560 |
|
1561 |
with col_logo:
|
1562 |
show_header_welcome()
|
|
|
1572 |
|
1573 |
if check_if_usable(is_hf=st.session_state['is_hf']):
|
1574 |
if st.button(f"Start Processing{st.session_state['processing_add_on']}", type='primary',use_container_width=True):
|
1575 |
+
st.session_state['formatted_json'] = {}
|
1576 |
+
st.session_state['formatted_json_WFO'] = {}
|
1577 |
+
st.session_state['formatted_json_GEO'] = {}
|
1578 |
+
st.session_state['json_report'] = JSONReport(col_updates_1, col_json, col_json_WFO, col_json_GEO, col_json_map)
|
1579 |
+
st.session_state['json_report'].set_JSON(st.session_state['formatted_json'], st.session_state['formatted_json_WFO'], st.session_state['formatted_json_GEO'])
|
1580 |
+
|
1581 |
# Define number of overall steps
|
1582 |
progress_report.set_n_overall(N_STEPS)
|
1583 |
progress_report.update_overall(f"Starting VoucherVision...")
|
|
|
1592 |
path_custom_prompts,
|
1593 |
None,
|
1594 |
progress_report,
|
1595 |
+
st.session_state['json_report'],
|
1596 |
path_api_cost=os.path.join(st.session_state.dir_home,'api_cost','api_cost.yaml'),
|
1597 |
is_hf = st.session_state['is_hf'],
|
1598 |
is_real_run=True)
|
|
|
1615 |
st.error(":heavy_exclamation_mark: Required API keys not set. Please visit the 'API Keys' tab and set the Google Vision OCR API key and at least one LLM key.")
|
1616 |
|
1617 |
if st.session_state['formatted_json']:
|
1618 |
+
if st.session_state['hold_output']:
|
1619 |
+
st.session_state['json_report'].set_JSON(st.session_state['formatted_json'], st.session_state['formatted_json_WFO'], st.session_state['formatted_json_GEO'])
|
1620 |
|
1621 |
|
1622 |
|
|
|
1941 |
st.session_state.config['leafmachine']['project']['OCR_option'] = OCR_option
|
1942 |
st.markdown("Below is an example of what the LLM would see given the choice of OCR ensemble. One, two, or three version of OCR can be fed into the LLM prompt. Typically, 'printed + handwritten' works well. If you have a GPU then you can enable trOCR.")
|
1943 |
if (OCR_option == 'hand') and not do_use_trOCR:
|
1944 |
+
st.text_area(label='Handwritten/Printed',placeholder=demo_text_h,disabled=True, label_visibility='visible', height=150)
|
1945 |
elif (OCR_option == 'normal') and not do_use_trOCR:
|
1946 |
+
st.text_area(label='Printed',placeholder=demo_text_p,disabled=True, label_visibility='visible', height=150)
|
1947 |
elif (OCR_option == 'both') and not do_use_trOCR:
|
1948 |
+
st.text_area(label='Handwritten/Printed + Printed',placeholder=demo_text_b,disabled=True, label_visibility='visible', height=150)
|
1949 |
elif (OCR_option == 'both') and do_use_trOCR:
|
1950 |
+
st.text_area(label='Handwritten/Printed + Printed + trOCR',placeholder=demo_text_trb,disabled=True, label_visibility='visible', height=150)
|
1951 |
elif (OCR_option == 'normal') and do_use_trOCR:
|
1952 |
+
st.text_area(label='Printed + trOCR',placeholder=demo_text_trp,disabled=True, label_visibility='visible', height=150)
|
1953 |
elif (OCR_option == 'hand') and do_use_trOCR:
|
1954 |
+
st.text_area(label='Handwritten/Printed + trOCR',placeholder=demo_text_trh,disabled=True, label_visibility='visible', height=150)
|
1955 |
|
1956 |
with col_ocr_2:
|
1957 |
if "demo_overlay" not in st.session_state:
|
|
|
2267 |
|
2268 |
# Parse the 'is_hf' argument and set it in session state
|
2269 |
if 'is_hf' not in st.session_state:
|
2270 |
+
st.session_state['is_hf'] = True
|
2271 |
|
2272 |
|
2273 |
#################################################################################################################################################
|
|
|
2340 |
if 'API_rechecked' not in st.session_state:
|
2341 |
st.session_state['API_rechecked'] = False
|
2342 |
|
2343 |
+
if 'json_report' not in st.session_state:
|
2344 |
+
st.session_state['json_report'] = False
|
2345 |
+
if 'hold_output' not in st.session_state:
|
2346 |
+
st.session_state['hold_output'] - False
|
2347 |
+
|
2348 |
|
2349 |
if 'cost_openai' not in st.session_state:
|
2350 |
st.session_state['cost_openai'] = None
|