Spaces:
Runtime error
Runtime error
Commit
·
dfa6c18
1
Parent(s):
164d241
Add application file
Browse files
app.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import snscrape.modules.twitter as sntwitter
|
3 |
+
import pandas as pd
|
4 |
+
import plotly.express as px
|
5 |
+
|
6 |
+
st.set_page_config(page_title="Scraping Twitter")
|
7 |
+
|
8 |
+
st.title('Scraping Twitter')
|
9 |
+
|
10 |
+
# Input query and number of tweets to scrape
|
11 |
+
query = st.text_input('Enter a search query:', 'data science')
|
12 |
+
num_tweets = st.number_input(
|
13 |
+
'Number of tweets to scrape:', min_value=1, max_value=1000000, step=1)
|
14 |
+
|
15 |
+
# Scrape tweets and store data in a dataframe
|
16 |
+
|
17 |
+
|
18 |
+
def scrape_tweets(query, num_tweets):
|
19 |
+
tweets_list = []
|
20 |
+
for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query + ' lang:id').get_items()):
|
21 |
+
if i >= num_tweets:
|
22 |
+
break
|
23 |
+
tweets_list.append([tweet.id, tweet.date, tweet.content, tweet.user.username,
|
24 |
+
tweet.user.followersCount, tweet.url, tweet.user.id])
|
25 |
+
tweets_df = pd.DataFrame(tweets_list, columns=[
|
26 |
+
'Tweet Id', 'Datetime', 'Text', 'Username', 'Followers', 'URL', 'User Id'])
|
27 |
+
return tweets_df
|
28 |
+
|
29 |
+
|
30 |
+
if st.button('Scrape Tweets'):
|
31 |
+
tweets_df = scrape_tweets(query, num_tweets)
|
32 |
+
st.success('Scraping done!')
|
33 |
+
|
34 |
+
# Display data
|
35 |
+
st.write(tweets_df)
|
36 |
+
|
37 |
+
# Line plot of tweet count over time
|
38 |
+
tweets_df['Date'] = tweets_df['Datetime'].dt.date
|
39 |
+
tweets_by_date = tweets_df.groupby(
|
40 |
+
['Date'])['Tweet Id'].count().reset_index()
|
41 |
+
fig = px.line(tweets_by_date, x='Date', y='Tweet Id')
|
42 |
+
st.plotly_chart(fig)
|
43 |
+
|
44 |
+
# Scatter plot of followers vs tweet count
|
45 |
+
fig = px.scatter(tweets_df, x='Followers', y='Tweet Id')
|
46 |
+
st.plotly_chart(fig)
|
47 |
+
|
48 |
+
# Username selection and interaction count
|
49 |
+
st.sidebar.title('Username and Interaction Count')
|
50 |
+
selected_username = st.sidebar.selectbox(
|
51 |
+
'Select a username:', options=tweets_df['Username'].unique())
|
52 |
+
st.sidebar.write(
|
53 |
+
f'Interactions with @{selected_username}: {tweets_df[tweets_df["Username"] == selected_username].shape[0]}')
|
54 |
+
|
55 |
+
# Interaction count by username
|
56 |
+
interactions_by_user = tweets_df.groupby(['Username'])['Tweet Id'].count(
|
57 |
+
).reset_index().sort_values(by=['Tweet Id'], ascending=False)
|
58 |
+
fig = px.bar(interactions_by_user, x='Username', y='Tweet Id')
|
59 |
+
st.plotly_chart(fig)
|
60 |
+
|
61 |
+
# Interaction count with selected username over time
|
62 |
+
tweets_by_date = tweets_df[tweets_df['Username'] == selected_username].groupby(
|
63 |
+
['Date'])['Tweet Id'].count().reset_index()
|
64 |
+
fig = px.line(tweets_by_date, x='Date', y='Tweet Id')
|
65 |
+
st.plotly_chart(fig)
|
66 |
+
|
67 |
+
# Tweets involving selected username
|
68 |
+
st.write(f'Tweets involving @{selected_username}:')
|
69 |
+
st.write(tweets_df[tweets_df['Username'] == selected_username])
|
70 |
+
|
71 |
+
# Download as text file
|
72 |
+
if st.button('Download as Text File'):
|
73 |
+
t = '\t'.join(tweets_df.columns) + '\n'
|
74 |
+
for index, row in tweets_df.iterrows():
|
75 |
+
t += '\t'.join([str(elem) for elem in row.values]) + '\n'
|
76 |
+
with open('tweets.txt', 'w') as f:
|
77 |
+
f.write(t)
|
78 |
+
st.download_button(
|
79 |
+
label="Download Tweets as Text",
|
80 |
+
data=t,
|
81 |
+
file_name='tweets.txt',
|
82 |
+
mime='text/plain'
|
83 |
+
)
|
84 |
+
|
85 |
+
# Download as csv file
|
86 |
+
if st.button('Download as CSV File'):
|
87 |
+
csv = tweets_df.to_csv(index=False)
|
88 |
+
with open('tweets.csv', 'w') as f:
|
89 |
+
f.write(csv)
|
90 |
+
st.download_button(
|
91 |
+
label="Download Tweets as CSV",
|
92 |
+
data=csv,
|
93 |
+
file_name='tweets.csv',
|
94 |
+
mime='text/csv'
|
95 |
+
)
|