podsnigame commited on
Commit
dfa6c18
·
1 Parent(s): 164d241

Add application file

Browse files
Files changed (1) hide show
  1. app.py +95 -0
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import snscrape.modules.twitter as sntwitter
3
+ import pandas as pd
4
+ import plotly.express as px
5
+
6
+ st.set_page_config(page_title="Scraping Twitter")
7
+
8
+ st.title('Scraping Twitter')
9
+
10
+ # Input query and number of tweets to scrape
11
+ query = st.text_input('Enter a search query:', 'data science')
12
+ num_tweets = st.number_input(
13
+ 'Number of tweets to scrape:', min_value=1, max_value=1000000, step=1)
14
+
15
+ # Scrape tweets and store data in a dataframe
16
+
17
+
18
+ def scrape_tweets(query, num_tweets):
19
+ tweets_list = []
20
+ for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query + ' lang:id').get_items()):
21
+ if i >= num_tweets:
22
+ break
23
+ tweets_list.append([tweet.id, tweet.date, tweet.content, tweet.user.username,
24
+ tweet.user.followersCount, tweet.url, tweet.user.id])
25
+ tweets_df = pd.DataFrame(tweets_list, columns=[
26
+ 'Tweet Id', 'Datetime', 'Text', 'Username', 'Followers', 'URL', 'User Id'])
27
+ return tweets_df
28
+
29
+
30
+ if st.button('Scrape Tweets'):
31
+ tweets_df = scrape_tweets(query, num_tweets)
32
+ st.success('Scraping done!')
33
+
34
+ # Display data
35
+ st.write(tweets_df)
36
+
37
+ # Line plot of tweet count over time
38
+ tweets_df['Date'] = tweets_df['Datetime'].dt.date
39
+ tweets_by_date = tweets_df.groupby(
40
+ ['Date'])['Tweet Id'].count().reset_index()
41
+ fig = px.line(tweets_by_date, x='Date', y='Tweet Id')
42
+ st.plotly_chart(fig)
43
+
44
+ # Scatter plot of followers vs tweet count
45
+ fig = px.scatter(tweets_df, x='Followers', y='Tweet Id')
46
+ st.plotly_chart(fig)
47
+
48
+ # Username selection and interaction count
49
+ st.sidebar.title('Username and Interaction Count')
50
+ selected_username = st.sidebar.selectbox(
51
+ 'Select a username:', options=tweets_df['Username'].unique())
52
+ st.sidebar.write(
53
+ f'Interactions with @{selected_username}: {tweets_df[tweets_df["Username"] == selected_username].shape[0]}')
54
+
55
+ # Interaction count by username
56
+ interactions_by_user = tweets_df.groupby(['Username'])['Tweet Id'].count(
57
+ ).reset_index().sort_values(by=['Tweet Id'], ascending=False)
58
+ fig = px.bar(interactions_by_user, x='Username', y='Tweet Id')
59
+ st.plotly_chart(fig)
60
+
61
+ # Interaction count with selected username over time
62
+ tweets_by_date = tweets_df[tweets_df['Username'] == selected_username].groupby(
63
+ ['Date'])['Tweet Id'].count().reset_index()
64
+ fig = px.line(tweets_by_date, x='Date', y='Tweet Id')
65
+ st.plotly_chart(fig)
66
+
67
+ # Tweets involving selected username
68
+ st.write(f'Tweets involving @{selected_username}:')
69
+ st.write(tweets_df[tweets_df['Username'] == selected_username])
70
+
71
+ # Download as text file
72
+ if st.button('Download as Text File'):
73
+ t = '\t'.join(tweets_df.columns) + '\n'
74
+ for index, row in tweets_df.iterrows():
75
+ t += '\t'.join([str(elem) for elem in row.values]) + '\n'
76
+ with open('tweets.txt', 'w') as f:
77
+ f.write(t)
78
+ st.download_button(
79
+ label="Download Tweets as Text",
80
+ data=t,
81
+ file_name='tweets.txt',
82
+ mime='text/plain'
83
+ )
84
+
85
+ # Download as csv file
86
+ if st.button('Download as CSV File'):
87
+ csv = tweets_df.to_csv(index=False)
88
+ with open('tweets.csv', 'w') as f:
89
+ f.write(csv)
90
+ st.download_button(
91
+ label="Download Tweets as CSV",
92
+ data=csv,
93
+ file_name='tweets.csv',
94
+ mime='text/csv'
95
+ )