from googlesearch import search from bs4 import BeautifulSoup import requests # Initialize BART tokenizer and summarization pipeline tokenizer = BartTokenizer.from_pretrained('letgoofthepizza/Llama-3-8B-Instruct-ko-news-summary') summarizer = pipeline("summarization", model="letgoofthepizza/Llama-3-8B-Instruct-ko-news-summary") # Function to perform Google search and retrieve URLs, filtering by domain def google_search(query: str, num_results: int = 10): """Perform a Google search and retrieve the URLs of the search results.""" search_results = [] for url in search(query, num_results=num_results): search_results.append(url) return search_results # Function to fetch content from a URL and summarize it def fetch_and_summarize_url(url: str): try: response = requests.get(url) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') # Extract relevant content (e.g., paragraphs or sections) paragraphs = [p.text for p in soup.find_all('p')] combined_text = " ".join(paragraphs[:3]) # Combine first few paragraphs for summary # Summarize using the pipeline if combined_text.strip(): # Ensure there is text to summarize summary = summarizer(combined_text, max_length=200, min_length=50, do_sample=False) return summary[0]['summary_text'] else: return None except requests.RequestException as e: return None # Function to perform Google search and aggregate summaries def google_search_and_answer(question: str, keywords: str): search_query = f"{question} {keywords}" search_results = google_search(search_query) summaries = [] for url in search_results: fetched_summary = fetch_and_summarize_url(url) if fetched_summary: # Add additional logic to filter summaries based on relevance # Example: Check if either question or keywords are present in fetched_summary if question.lower() in fetched_summary.lower() or keywords.lower() in fetched_summary.lower(): summaries.append(fetched_summary) if summaries: return "\n\n".join(summaries) else: return "No relevant information found." # Main function to run the script def main(): print("Intelligent Assistant") question = input("Enter your query: ") keywords = input("Enter specific keywords (e.g., 'Q1 2024 financial results Tesla'): ") answer = google_search_and_answer(question, keywords) print("Answer:", answer) if __name__ == "__main__": main()