Spaces:

sabazo
/

innoSageAgentOne

Sleeping

Asaad Almutareb commited on Mar 9, 2024

Commit

192f29d

1 Parent(s): 509813e

added helper function to create wikipedia urls from page title

Files changed (2) hide show

innovation_pathfinder_ai/structured_tools/structured_tools.py CHANGED Viewed

@@ -11,6 +11,7 @@ import arxiv
 from innovation_pathfinder_ai.source_container.container import (
     all_sources
 )
 @tool
 def arxiv_search(query: str) -> str:
@@ -74,5 +75,5 @@ def wikipedia_search(query: str) -> str:
     api_wrapper = WikipediaAPIWrapper()
     wikipedia_search = WikipediaQueryRun(api_wrapper=api_wrapper)
     wikipedia_results = wikipedia_search.run(query)
-    all_sources += wikipedia_results
     return wikipedia_results

 from innovation_pathfinder_ai.source_container.container import (
     all_sources
 )
+from innovation_pathfinder_ai.utils import create_wikipedia_urls_from_text
 @tool
 def arxiv_search(query: str) -> str:
     api_wrapper = WikipediaAPIWrapper()
     wikipedia_search = WikipediaQueryRun(api_wrapper=api_wrapper)
     wikipedia_results = wikipedia_search.run(query)
+    all_sources += create_wikipedia_urls_from_text(wikipedia_results)
     return wikipedia_results

innovation_pathfinder_ai/utils.py ADDED Viewed

+def create_wikipedia_urls_from_text(text):
+    """
+    Extracts page titles from a given text and constructs Wikipedia URLs for each title.
+    Args:
+    - text (str): A string containing multiple sections, each starting with "Page:" followed by the title.
+    Returns:
+    - list: A list of Wikipedia URLs constructed from the extracted titles.
+    """
+    # Split the text into sections based on "Page:" prefix
+    sections = text.split("Page: ")
+    # Remove the first item if it's empty (in case the text starts with "Page:")
+    if sections[0].strip() == "":
+        sections = sections[1:]
+    urls = []  # Initialize an empty list to store the URLs
+    for section in sections:
+        # Extract the title, which is the string up to the first newline
+        title = section.split("\n", 1)[0]
+        # Replace spaces with underscores for the URL
+        url_title = title.replace(" ", "_")
+        # Construct the URL and add it to the list
+        url = f"https://en.wikipedia.org/wiki/{url_title}"
+        print(url)
+        urls.append(url)
+        print(urls)
+    return urls