Shreyas94 commited on
Commit
871b845
·
verified ·
1 Parent(s): e26bc82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -75
app.py CHANGED
@@ -1,81 +1,69 @@
1
- import streamlit as st
2
- import wna_googlenews as wna
3
- import pandas as pd
4
- from transformers import pipeline
5
 
6
- st.set_page_config(layout="wide")
 
 
7
 
8
- st.title("WNA Google News App")
9
- st.subheader("Search for News and classify the headlines with sentiment analysis")
 
 
 
 
 
 
 
 
 
 
10
 
11
- query = st.text_input("Enter Query")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- models = [
14
- "j-hartmann/emotion-english-distilroberta-base",
15
- "SamLowe/roberta-base-go_emotions"
16
- # "distilbert/distilbert-base-uncased-finetuned-sst-2-english"
17
- ]
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- settings = {
20
- "langregion": "en/US",
21
- "period": "1d",
22
- "model": models[0],
23
- "number_of_pages": 5
24
- }
 
25
 
26
-
27
- with st.sidebar:
28
- st.title("Settings")
29
- # add language and country parameters
30
- st.header("Language and Country")
31
-
32
- settings["langregion"] = st.selectbox("Select Language", ["en/US", "fr/FR"])
33
- # input field for number of pages
34
- st.header("Number of Pages")
35
- settings["number_of_pages"] = st.number_input("Enter Number of Pages", min_value=1, max_value=10)
36
-
37
- settings["region"] = settings["langregion"].split("/")[0]
38
- settings["lang"] = settings["langregion"].split("/")[1]
39
-
40
- # add period parameter
41
- st.header("Period")
42
- settings["period"] = st.selectbox("Select Period", ["1d", "7d", "30d"])
43
- # Add models parameters
44
- st.header("Models")
45
- settings["model"] = st.selectbox("Select Model", models)
46
-
47
-
48
- if st.button("Search"):
49
- classifier = pipeline(task="text-classification", model=settings["model"], top_k=None)
50
- # display a loading progress
51
- with st.spinner("Loading last news ..."):
52
- allnews = wna.get_news(settings, query)
53
- st.dataframe(allnews)
54
- with st.spinner("Processing received news ..."):
55
- df = pd.DataFrame(columns=["sentence", "date","best","second"])
56
- # loop on each sentence and call classifier
57
- for curnews in allnews:
58
- #st.write(curnews)
59
- cur_sentence = curnews["title"]
60
- cur_date = curnews["date"]
61
- model_outputs = classifier(cur_sentence)
62
- cur_result = model_outputs[0]
63
- #st.write(cur_result)
64
- # get label 1
65
- label = cur_result[0]['label']
66
- score = cur_result[0]['score']
67
- percentage = round(score * 100, 2)
68
- str1 = label + " (" + str(percentage) + ")%"
69
- # get label 2
70
- label = cur_result[1]['label']
71
- score = cur_result[1]['score']
72
- percentage = round(score * 100, 2)
73
- str2 = label + " (" + str(percentage) + ")%"
74
- # insert cur_sentence and cur_result into dataframe
75
- df.loc[len(df.index)] = [cur_sentence, cur_date, str1, str2]
76
-
77
- # write info on the output
78
- st.write("Number of sentences:", len(df))
79
- st.write("Language:", settings["lang"], "Country:", settings["region"])
80
-
81
- st.dataframe(df)
 
1
+ from transformers import pipeline, BartTokenizer
2
+ from googlesearch import search
3
+ from bs4 import BeautifulSoup
4
+ import requests
5
 
6
+ # Initialize BART tokenizer and summarization pipeline
7
+ tokenizer = BartTokenizer.from_pretrained('letgoofthepizza/Llama-3-8B-Instruct-ko-news-summary')
8
+ summarizer = pipeline("summarization", model="letgoofthepizza/Llama-3-8B-Instruct-ko-news-summary")
9
 
10
+ # Function to perform Google search and retrieve URLs, filtering by domain
11
+ def google_search(query: str, num_results: int = 10):
12
+ """Perform a Google search and retrieve the URLs of the search results."""
13
+ search_results = []
14
+ try:
15
+ for url in search(query, num_results=num_results, domains=["tesla.com", "cnbc.com", "reuters.com", "bloomberg.com", "investopedia.com"]):
16
+ search_results.append(url)
17
+ except TypeError:
18
+ for url in search(query, num_results=num_results):
19
+ if any(domain in url for domain in ["tesla.com", "cnbc.com", "reuters.com", "bloomberg.com", "investopedia.com"]):
20
+ search_results.append(url)
21
+ return search_results
22
 
23
+ # Function to fetch content from a URL and summarize it
24
+ def fetch_and_summarize_url(url: str):
25
+ try:
26
+ response = requests.get(url)
27
+ response.raise_for_status()
28
+ soup = BeautifulSoup(response.text, 'html.parser')
29
+
30
+ # Extract relevant content (e.g., paragraphs or sections)
31
+ paragraphs = [p.text for p in soup.find_all('p')]
32
+ combined_text = " ".join(paragraphs[:3]) # Combine first few paragraphs for summary
33
+
34
+ # Summarize using the pipeline
35
+ if combined_text.strip(): # Ensure there is text to summarize
36
+ summary = summarizer(combined_text, max_length=200, min_length=50, do_sample=False)
37
+ return summary[0]['summary_text']
38
+ else:
39
+ return None
40
+ except requests.RequestException as e:
41
+ return None
42
 
43
+ # Function to perform Google search and aggregate summaries
44
+ def google_search_and_answer(question: str, keywords: str):
45
+ search_query = f"{question} {keywords}"
46
+ search_results = google_search(search_query)
47
+ summaries = []
48
+ for url in search_results:
49
+ fetched_summary = fetch_and_summarize_url(url)
50
+ if fetched_summary:
51
+ # Add additional logic to filter summaries based on relevance
52
+ # Example: Check if either question or keywords are present in fetched_summary
53
+ if question.lower() in fetched_summary.lower() or keywords.lower() in fetched_summary.lower():
54
+ summaries.append(fetched_summary)
55
+ if summaries:
56
+ return "\n\n".join(summaries)
57
+ else:
58
+ return "No relevant information found."
59
 
60
+ # Main function to run the script
61
+ def main():
62
+ print("Intelligent Assistant")
63
+ question = input("Enter your query: ")
64
+ keywords = input("Enter specific keywords (e.g., 'Q1 2024 financial results Tesla'): ")
65
+ answer = google_search_and_answer(question, keywords)
66
+ print("Answer:", answer)
67
 
68
+ if __name__ == "__main__":
69
+ main()