BotNews / search_engine.py
leandrocarneiro's picture
Upload 7 files
a2ee974 verified
raw
history blame
1.91 kB
# Created by Leandro Carneiro at 19/01/2024
# Description:
# ------------------------------------------------
import os.path
import time
from googlesearch import search
import requests
from bs4 import BeautifulSoup
import constants
def search_google(subject, sites):
try:
results = []
for site in sites:
print(' Buscando notícias no domínio: ' + site)
query = f"{subject} site:{site}"
sites_searched = search(query, num_results=constants.num_sites)
for s in sites_searched:
results.append(s)
#time.sleep(3)
print(' Total de sites encontrados: ' + str(len(results)))
return results
except Exception as e:
print(str(e))
return str(e)
def retrieve_text_from_site(sites):
try:
result = []
for site in sites:
print(' Baixando texto do site: ' + site)
response = requests.get(site)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
result.append(soup.get_text())
return result
except Exception as e:
return str(e)
def delete_base(local_base):
try:
for i in os.listdir(local_base):
file_path = os.path.join(local_base, i)
os.remove(file_path)
return 0
except Exception as e:
return str(e)
def save_on_base(sites, texts, local_base):
try:
for i in range(len(sites)):
filename = f'news{i}.txt'
with open(os.path.join(local_base, filename), 'w', encoding='utf-8') as file:
file.write(texts[i])
with open(os.path.join(local_base, 'filename_url.csv'), 'a', encoding='utf-8') as file:
file.write(filename + ';' + sites[i] + '\n')
return 0
except Exception as e:
return str(e)