fact-check / google_custom_search.py
Dustin Haring
Unfortunately there is a bunch of restructure and fixes and I hate single large commits. But essentially I integrated the Gemini prompt, I fixed an issue in the custom_search, I added API key variables, and I restructured to hopefully make datatset testing easier by using the test_on_dataset() function
c68f588
# This file/function uses a custom implemented Google Custom Search tool to search and return the top 5 news article titles for a given prompt
from googleapiclient.discovery import build
import pprint
# Defaults to using Dustin's GCP API keys
def custom_google_search(search_term, api_key="AIzaSyA4oDDFtPxAfmPC8EcfQrkByb9xKm2QfMc", cse_id="31e85635d41bd4040", num_results=5):
"""Performs a Google Custom Search and returns titles of top results.
Args:
search_term: The search query.
api_key: Your Google API Key.
cse_id: Your Google Custom Search Engine ID.
num_results: The number of desired results (default: 5).
Returns:
A list of titles for the top search results.
"""
service = build("customsearch", "v1", developerKey=api_key)
results = service.cse().list(
q=search_term,
cx=cse_id,
num=num_results # Adjust the number of results as needed
).execute()
search_result_titles = []
for item in results['items']:
# extended_title = item['title'] + item['snippet']
try:
# extended_title = item['pagemap']['metatags'][0]['og:description']
extended_title = "* '" + item['pagemap']['metatags'][0]['og:description'] + "'\n"
except:
# extended_title = item['title']
# extended_title = "- '" + item['title'] + "'\n"
extended_title = "- No Results\n"
# pprint.pprint(f"dah=={item['pagemap']['metatags'][0]['og:description']}")
search_result_titles.append(extended_title)
# search_result_titles = [item['title'] for item in results['items']]
return search_result_titles
if __name__ == "__main__":
# Examples
# search_phrase = "Californias governor rejects a bill to give unemployment checks to striking workers" # True
# search_phrase = "1,000 US troops deploying to build offshore port for Gaza aid" # True
# search_phrase = "More than 2500 migrants crossing the Mediterranean died or went missing this year" # True
# search_phrase = "Everything JK Rowling Would Be Willing To Do To Protect Her AntiTrans Views" # False
search_phrase = "Pat McAfee thought USC coach Lincoln Riley died because of President Biden's State of the Union address" # Unknown
top_titles = custom_google_search(search_phrase, my_api_key, my_cse_id)
print("Top Search Results:")
print("===================")
for title in top_titles:
print(f'{title}')