from urllib.parse import parse_qs, urlparse from bs4 import BeautifulSoup import requests def scrapeGoogleSearch(query): finalResponse = [] searchUrl = f"https://www.google.com/search?q={query}" response = requests.get(searchUrl) if response.status_code == 200: soup = BeautifulSoup(response.text, 'html.parser') with open('soup_dump.html', 'w', encoding='utf-8') as file: file.write(soup.prettify()) results = soup.find('body') mainDiv = soup.find('div', attrs={'id': 'main'}) answerDiv = ( mainDiv.select_one('div.PqksIc') or mainDiv.select_one('div.BNeawe.iBp4i') ) if answerDiv: citationDateDiv = answerDiv.select_one('sub.gMUaMb.r0bn4c.rQMQod') citationDate = citationDateDiv.text if citationDateDiv else "" answerText = answerDiv.text.replace(citationDate, '').strip() citationText = f"Citation Date: {citationDate}" if citationDate else "" finalResponse.append(f"Verified Answer:\n====\n{answerText}\n{citationText}\n====\n\n") results = mainDiv.select('div.egMi0.kCrYT') resultsDesc = mainDiv.select('div.BNeawe.s3v9rd.AP7Wnd .BNeawe.s3v9rd.AP7Wnd:last-child') if results: finalResponse.append("Search Results:\n====\n") for (i, result) in enumerate(results[:10]): title = result.find('h3').text link = result.find('a')['href'] parsedUrl = urlparse(link) urlParams = parse_qs(parsedUrl.query) link = urlParams.get('q', [None])[0] desc = resultsDesc[i].text finalResponse.append(f"Title: {title}") finalResponse.append(f"Description: {desc}") finalResponse.append(f"Link: {link}\n") else: print("Failed to retrieve search results.") return "\n".join(finalResponse)