File size: 6,085 Bytes
aec0f90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
from flask import Flask, render_template
from flask_socketio import SocketIO
from transformers import pipeline
import yfinance as yf
import requests
from newspaper import Article
from newspaper import Config
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import plotly.graph_objs as go
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import BartForConditionalGeneration, BartTokenizer
import threading

app = Flask(__name__)
socketio = SocketIO(app)

news_data = []

# Set up sentiment analysis and summarization pipelines
finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone', num_labels=3)
sentiment_tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')
summarization_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large", forced_bos_token_id=0)
summarization_tokenizer = BartTokenizer.from_pretrained("facebook/bart-large")
sentiment_analysis_pipeline = pipeline("sentiment-analysis", model=finbert, tokenizer=sentiment_tokenizer, truncation=True, max_length=512)
summarization_pipeline = pipeline("summarization", model=summarization_model, tokenizer=summarization_tokenizer, max_length=512, truncation=True)

# News API setup
def get_news_articles_info(ticker_name):
    # URL of the search results page
    url = "https://www.marketwatch.com/search?q=" + ticker_name + "&ts=0&tab=All%20News"

    # Send an HTTP GET request to the URL
    response = requests.get(url)

    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(response.content, "html.parser")

    article_links = []
    
    for content in soup.find_all("h3",class_="article__headline"):
        for link in content.find_all("a"):
            if link['href'] != "#":
                article_links.append(link['href'])

    article_links = article_links[18:36]
    ticker_news_extracted = []
    count = 0
    for link in article_links:
        article_info = {}
        article_info['text'] = ''
        article_info['url'] = link
        try:
            url = article_info['url']
            url = requests.head(url, allow_redirects=True).url
            
            user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'
            config = Config()
            config.browser_user_agent = user_agent

            article = Article(url, config=config)
            article.download()
            article.parse()
            article_info['title'] = article.title
            article_info['text'] = article.text
            article_info['url'] = url
            count = count + 1
            print(count,url)
            socketio.emit('log',str(count) + " " + url)
        except Exception as error:
            print("Error",url,error)
            continue
        if article_info['text'] == '':
            print('No text',url)
            continue
        if count > 5:
            break
        ticker_news_extracted.append(article_info)
    return ticker_news_extracted

@app.route("/")
def index():
    global news_data 
    news_data = []
    socketio.emit('news_data',news_data)
    return render_template("index_socket.html")

@socketio.on('get_graph')
def get_graph(ticker):
    # Define the stock ticker symbol and the date range

    ticker_symbol = ticker  # Example: Apple Inc.
    end_date = datetime.today()
    start_date = end_date - timedelta(days=90)

    # Fetch historical data using yfinance
    data = yf.download(ticker_symbol, start=start_date, end=end_date)

    # Create a candlestick graph using Plotly
    fig = go.Figure(data=[go.Candlestick(x=data.index.strftime('%Y-%m-%d').tolist(),
                    open=data['Open'].tolist(),
                    high=data['High'].tolist(),
                    low=data['Low'].tolist(),
                    close=data['Close'].tolist())])

    # Customize the layout
    fig.update_layout(title=f'Candlestick Chart for {ticker_symbol} in the Last 90 Days',
                    xaxis_title='Date',
                    yaxis_title='Price',
                    xaxis_rangeslider_visible=False)

    # convert the fig to HTML DIV element
    graph_data = fig.to_plotly_json()
    socketio.emit('graph',graph_data)
    print("Graph is finished")

def get_sentiment(_index,_content):
    _sentiment = sentiment_analysis_pipeline(_content)[0]
    news_data[_index]['sentiment'] = _sentiment["label"]
    news_data[_index]['sentiment_score'] = round(_sentiment["score"], 3)
    socketio.emit('news_data',news_data)

def get_summary(_index,_content):
    _summary = summarization_pipeline(_content, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
    news_data[_index]['summary'] = _summary
    socketio.emit('news_data', news_data)


@socketio.on('get_news_data')
def get_news_data(ticker):

    global news_data 
    news_data = []
    socketio.emit('news_data',news_data)

    # Get top 5 news articles for the given ticker
    search_results = get_news_articles_info(ticker_name=ticker)
    articles = search_results[:5]

    for article in articles:
        news_data.append({
            "title": article['title'],
            "link": article['url'],
            "sentiment": 'Loading...',
            "sentiment_score": 'Loading...',
            "summary": 'Loading...',
            "Ticker": ticker
        })

    socketio.emit('news_data',news_data)

    threads_list = []

    for i in range(0,len(articles)):
        article = articles[i]
        link = article['url']
        content = article['text']

        t1 = threading.Thread(target=get_sentiment, args=(i,content))
        t2 = threading.Thread(target=get_summary, args=(i,content))

        t1.start()
        t2.start()

        threads_list.append(t1)
        threads_list.append(t2)

        socketio.emit('log',link + " Analysis started")

    for thread in threads_list:
        thread.join()
    
    socketio.emit('log','Analysis is finished')

if __name__ == "__main__":
    socketio.run(app, host='0.0.0.0', port=7860)