Spaces:
Build error
Build error
File size: 6,182 Bytes
bfb6182 a2f2d6c bfb6182 978b2e2 bfb6182 a2f2d6c bfb6182 978b2e2 bfb6182 978b2e2 bcb8c1d bfb6182 978b2e2 1e6a91d 978b2e2 5898e04 bfb6182 63f75f6 87cf337 bfb6182 a2f2d6c bcb8c1d 09342b7 bcb8c1d a2f2d6c 63f75f6 a2f2d6c f17d3de 63f75f6 bfb6182 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
import streamlit as st
from variables import *
from optimum.onnxruntime import ORTModelForSequenceClassification
from transformers import pipeline, AutoTokenizer
from optimum.pipelines import pipeline
import tweepy
import pandas as pd
import numpy as np
import plotly_express as px
import plotly.graph_objects as go
from datetime import datetime as dt
from st_aggrid import GridOptionsBuilder, AgGrid, GridUpdateMode, DataReturnMode
from datasets import Dataset
from huggingface_hub import Repository
st.sidebar.header("Sentiment Analysis Score")
extract_time = dt.strftime(dt.today(),"%d_%B_%y_%H_%M")
DATASET_REPO_URL = "https://huggingface.co/datasets/nickmuchi/fin_tweets"
DATA_FILENAME = "tweets_data.csv"
DATA_FILE = os.path.join("data", DATA_FILENAME)
tweet_file = os.path.join("tweets", DATA_FILE)
repo = Repository(
local_dir="output", clone_from=DATASET_REPO_URL
)
st.title('Live FinTwitter Sentiment & Topic Analysis with Tweepy and Transformers')
st.markdown(
"""
This app uses Tweepy to extract tweets from twitter based on a list of popular accounts that tweet about markets/finance:
- The stream of tweets is processed via HuggingFace models for finance tweet sentiment and topic analysis:
- [Topic Classification](https://huggingface.co/nickmuchi/finbert-tone-finetuned-finance-topic-classification)
- [Sentiment Analysis](https://huggingface.co/nickmuchi/finbert-tone-finetuned-fintwitter-classification)
- The resulting sentiments and corresponding tweets are displayed, with graphs tracking the live sentiment and topics of financial market tweets in the Visualisation tab.
"""
)
refresh_stream = st.button('Refresh Stream')
if "update_but" not in st.session_state:
st.session_state.update_but = False
if "topic" not in st.session_state:
st.session_state.tlist = False
if "time" not in st.session_state:
st.session_state.tlist = False
if "user" not in st.session_state:
st.session_state.tlist = False
if "tlist" not in st.session_state:
st.session_state.tlist = False
if refresh_stream or st.session_state.update_but:
st.session_state.update_but = True
client = tweepy.Client(CONFIG['bearer_token'], wait_on_rate_limit=True)
users = []
all_tweets = []
for res in tweepy.Paginator(client.get_list_tweets,
id="1083517925049266176",
user_fields=['username'],
tweet_fields=['created_at','text'],
expansions=['author_id'],
max_results=100):
all_tweets.append(res)
with st.spinner('Generating sentiment and topic classification of tweets...'):
tweets = [response.data for response in all_tweets]
users = [response.includes['users'] for response in all_tweets]
flat_users = [x for i in users for x in i]
flat_tweets = [x for i in tweets for x in i]
data = [(tweet.data['author_id'],tweet.data['text'],tweet.data['created_at']) for tweet in flat_tweets]
df = pd.DataFrame(data,columns=['author','tweet','creation_time'])
df['tweet'] = df['tweet'].replace(r'https?://\S+', '', regex=True).replace(r'www\S+', '', regex=True)
users = client.get_users(ids=df['author'].unique().tolist())
df_users = pd.DataFrame(data=list(set([(user.id,user.username) for user in users.data])),columns=['author','username'])
df_tweets = process_tweets(df,df_users)
#appending the new dataframe to csv
df_tweets.to_csv(tweet_file, mode='a', header=False, index=False)
current_commit = repo.push_to_hub()
print(current_commit)
# Get all tweets
tweet_list = df_tweets['tweet'].tolist()
topic = df_tweets['topic'].tolist()
username = df_tweets['username'].tolist()
creation_time = df_tweets['creation_time'].tolist()
st.session_state['tlist'] =tweet_list
st.session_state['topic'] = topic
st.session_state['user'] = username
st.session_state['time'] = creation_time
st.session_state['tdf'] = df_tweets
with st.container():
st.write("Table of Influential FinTweets")
gb = GridOptionsBuilder.from_dataframe(df_tweets)
gb.configure_pagination(paginationPageSize=30,paginationAutoPageSize=False) #Add pagination
gb.configure_side_bar() #Add a sidebar
gb.configure_selection('multiple', use_checkbox=True, groupSelectsChildren="Group checkbox select children")
gb.configure_column('tweet',wrapText=True,autoHeight=True)#Enable multi-row selection
gridOptions = gb.build()
AgGrid(
df_tweets,
gridOptions=gridOptions,
data_return_mode='AS_INPUT',
update_mode='MODEL_CHANGED',
fit_columns_on_grid_load=False,
enable_enterprise_modules=True,
theme='streamlit', #Add theme color to the table
height=550,
width='100%'
)
## Display sentiment score
pos_perc = df_tweets[df_tweets['sentiment']=='Bullish'].count()[0]*100/df_tweets.shape[0]
neg_perc = df_tweets[df_tweets['sentiment']=='Bearish'].count()[0]*100/df_tweets.shape[0]
neu_perc = df_tweets[df_tweets['sentiment']=='Neutral'].count()[0]*100/df_tweets.shape[0]
sentiment_score = neu_perc+pos_perc-neg_perc
fig_1 = go.Figure()
fig_1.add_trace(go.Indicator(
mode = "delta",
value = sentiment_score,
domain = {'row': 1, 'column': 1}))
fig_1.update_layout(
template = {'data' : {'indicator': [{
'title': {'text': "Sentiment Score"},
'mode' : "number+delta+gauge",
'delta' : {'reference': 50}}]
}},
autosize=False,
width=250,
height=250,
margin=dict(
l=5,
r=5,
b=5,
pad=2
)
)
with st.sidebar:
st.plotly_chart(fig_1)
st.markdown("![visitor badge](https://visitor-badge.glitch.me/badge?page_id=nickmuchi-fintweet-sentiment-analysis)") |