phyloforfun commited on
Commit
1d9ab62
·
1 Parent(s): ca048bb

Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing

Browse files
Files changed (3) hide show
  1. api_cost/api_cost.yaml +11 -2
  2. app.py +0 -2
  3. vouchervision/tool_wikipedia.py +22 -10
api_cost/api_cost.yaml CHANGED
@@ -7,7 +7,13 @@ GPT_4_32K:
7
  GPT_4:
8
  in: 0.03
9
  out: 0.06
10
- GPT_4_TURBO:
 
 
 
 
 
 
11
  in: 0.01
12
  out: 0.03
13
  GPT_3_5_INSTRUCT:
@@ -24,7 +30,10 @@ AZURE_GPT_4_32K:
24
  AZURE_GPT_4:
25
  in: 0.03
26
  out: 0.06
27
- AZURE_GPT_4_TURBO:
 
 
 
28
  in: 0.01
29
  out: 0.03
30
  AZURE_GPT_3_5_INSTRUCT:
 
7
  GPT_4:
8
  in: 0.03
9
  out: 0.06
10
+ # GPT_4_TURBO: ###############
11
+ # in: 0.01
12
+ # out: 0.03
13
+ GPT_4_TURBO_0125:
14
+ in: 0.01
15
+ out: 0.03
16
+ GPT_4_TURBO_1106:
17
  in: 0.01
18
  out: 0.03
19
  GPT_3_5_INSTRUCT:
 
30
  AZURE_GPT_4:
31
  in: 0.03
32
  out: 0.06
33
+ AZURE_GPT_4_TURBO_1106:
34
+ in: 0.01
35
+ out: 0.03
36
+ AZURE_GPT_4_TURBO_0125:
37
  in: 0.01
38
  out: 0.03
39
  AZURE_GPT_3_5_INSTRUCT:
app.py CHANGED
@@ -1561,7 +1561,6 @@ def content_project_settings(col):
1561
  st.session_state.config['leafmachine']['project']['dir_output'] = st.text_input("Output directory", st.session_state.config['leafmachine']['project'].get('dir_output', ''))
1562
 
1563
 
1564
- # @st.cache_data
1565
  def content_llm_cost():
1566
  st.write("---")
1567
  st.header('LLM Cost Calculator')
@@ -1596,7 +1595,6 @@ def content_llm_cost():
1596
  n_img = st.number_input("Number of Images", min_value=0, value=1000, step=100)
1597
 
1598
  # Function to find the model's Input and Output values
1599
- @st.cache_data
1600
  def find_model_values(model, all_dfs):
1601
  for df in all_dfs:
1602
  if model in df.keys():
 
1561
  st.session_state.config['leafmachine']['project']['dir_output'] = st.text_input("Output directory", st.session_state.config['leafmachine']['project'].get('dir_output', ''))
1562
 
1563
 
 
1564
  def content_llm_cost():
1565
  st.write("---")
1566
  st.header('LLM Cost Calculator')
 
1595
  n_img = st.number_input("Number of Images", min_value=0, value=1000, step=100)
1596
 
1597
  # Function to find the model's Input and Output values
 
1598
  def find_model_values(model, all_dfs):
1599
  for df in all_dfs:
1600
  if model in df.keys():
vouchervision/tool_wikipedia.py CHANGED
@@ -2,7 +2,8 @@ import itertools, wikipediaapi, requests, re, json
2
  from langchain_community.tools import WikipediaQueryRun
3
  from langchain_community.utilities import WikipediaAPIWrapper
4
  # from langchain_community.tools.wikidata.tool import WikidataAPIWrapper, WikidataQueryRun
5
-
 
6
 
7
  class WikipediaLinks():
8
 
@@ -370,13 +371,13 @@ class WikipediaLinks():
370
 
371
  self.info_packet['WIKI_TAXA']['DATA'].update(self.get_taxonbar_data(page.title))
372
 
373
- for back in page.backlinks:
374
- back = self.sanitize(back)
375
- if ':' not in back:
376
- link = self.sanitize(self.get_wikipedia_url(back))
377
- if link not in links:
378
- links.append(link)
379
- self.info_packet['WIKI_TAXA']['LINKS'][back] = link
380
 
381
 
382
  def extract_info_geo(self, page, opt=None):
@@ -577,5 +578,16 @@ if __name__ == '__main__':
577
  "minimumElevationInMeters": "",
578
  "maximumElevationInMeters": ""
579
  }
580
- Wiki = WikipediaLinks()
581
- info_packet= Wiki.gather_wikipedia_results(test_output)
 
 
 
 
 
 
 
 
 
 
 
 
2
  from langchain_community.tools import WikipediaQueryRun
3
  from langchain_community.utilities import WikipediaAPIWrapper
4
  # from langchain_community.tools.wikidata.tool import WikidataAPIWrapper, WikidataQueryRun
5
+ import cProfile
6
+ import pstats
7
 
8
  class WikipediaLinks():
9
 
 
371
 
372
  self.info_packet['WIKI_TAXA']['DATA'].update(self.get_taxonbar_data(page.title))
373
 
374
+ # for back in page.backlinks:
375
+ # back = self.sanitize(back)
376
+ # if ':' not in back:
377
+ # link = self.sanitize(self.get_wikipedia_url(back))
378
+ # if link not in links:
379
+ # links.append(link)
380
+ # self.info_packet['WIKI_TAXA']['LINKS'][back] = link
381
 
382
 
383
  def extract_info_geo(self, page, opt=None):
 
578
  "minimumElevationInMeters": "",
579
  "maximumElevationInMeters": ""
580
  }
581
+ do_print_profiler = True
582
+ if do_print_profiler:
583
+ profiler = cProfile.Profile()
584
+ profiler.enable()
585
+
586
+ Wiki = WikipediaLinks('D:/D_Desktop/usda_pdf/test.json')
587
+ info_packet= Wiki.gather_wikipedia_results(test_output)
588
+
589
+ if do_print_profiler:
590
+ profiler.disable()
591
+ stats = pstats.Stats(profiler).sort_stats('cumulative')
592
+ stats.print_stats(50)
593
+