Asaad Almutareb commited on
Commit
192f29d
·
1 Parent(s): 509813e

added helper function to create wikipedia urls from page title

Browse files
innovation_pathfinder_ai/structured_tools/structured_tools.py CHANGED
@@ -11,6 +11,7 @@ import arxiv
11
  from innovation_pathfinder_ai.source_container.container import (
12
  all_sources
13
  )
 
14
 
15
  @tool
16
  def arxiv_search(query: str) -> str:
@@ -74,5 +75,5 @@ def wikipedia_search(query: str) -> str:
74
  api_wrapper = WikipediaAPIWrapper()
75
  wikipedia_search = WikipediaQueryRun(api_wrapper=api_wrapper)
76
  wikipedia_results = wikipedia_search.run(query)
77
- all_sources += wikipedia_results
78
  return wikipedia_results
 
11
  from innovation_pathfinder_ai.source_container.container import (
12
  all_sources
13
  )
14
+ from innovation_pathfinder_ai.utils import create_wikipedia_urls_from_text
15
 
16
  @tool
17
  def arxiv_search(query: str) -> str:
 
75
  api_wrapper = WikipediaAPIWrapper()
76
  wikipedia_search = WikipediaQueryRun(api_wrapper=api_wrapper)
77
  wikipedia_results = wikipedia_search.run(query)
78
+ all_sources += create_wikipedia_urls_from_text(wikipedia_results)
79
  return wikipedia_results
innovation_pathfinder_ai/utils.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def create_wikipedia_urls_from_text(text):
2
+ """
3
+ Extracts page titles from a given text and constructs Wikipedia URLs for each title.
4
+
5
+ Args:
6
+ - text (str): A string containing multiple sections, each starting with "Page:" followed by the title.
7
+
8
+ Returns:
9
+ - list: A list of Wikipedia URLs constructed from the extracted titles.
10
+ """
11
+ # Split the text into sections based on "Page:" prefix
12
+ sections = text.split("Page: ")
13
+ # Remove the first item if it's empty (in case the text starts with "Page:")
14
+ if sections[0].strip() == "":
15
+ sections = sections[1:]
16
+
17
+ urls = [] # Initialize an empty list to store the URLs
18
+ for section in sections:
19
+ # Extract the title, which is the string up to the first newline
20
+ title = section.split("\n", 1)[0]
21
+ # Replace spaces with underscores for the URL
22
+ url_title = title.replace(" ", "_")
23
+ # Construct the URL and add it to the list
24
+ url = f"https://en.wikipedia.org/wiki/{url_title}"
25
+ print(url)
26
+ urls.append(url)
27
+ print(urls)
28
+
29
+ return urls