import streamlit as st import snscrape.modules.twitter as sntwitter import pandas as pd import plotly.express as px st.set_page_config(page_title="Scraping Twitter") st.title('Scraping Twitter') # Input query and number of tweets to scrape query = st.text_input('Enter a search query:', 'data science') num_tweets = st.number_input( 'Number of tweets to scrape:', min_value=1, max_value=1000000, step=1) # Scrape tweets and store data in a dataframe def scrape_tweets(query, num_tweets): tweets_list = [] for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query + ' lang:id').get_items()): if i >= num_tweets: break tweets_list.append([tweet.id, tweet.date, tweet.content, tweet.user.username, tweet.user.followersCount, tweet.url, tweet.user.id]) tweets_df = pd.DataFrame(tweets_list, columns=[ 'Tweet Id', 'Datetime', 'Text', 'Username', 'Followers', 'URL', 'User Id']) return tweets_df if st.button('Scrape Tweets'): tweets_df = scrape_tweets(query, num_tweets) st.success('Scraping done!') # Display data st.write(tweets_df) # Line plot of tweet count over time tweets_df['Date'] = tweets_df['Datetime'].dt.date tweets_by_date = tweets_df.groupby( ['Date'])['Tweet Id'].count().reset_index() fig = px.line(tweets_by_date, x='Date', y='Tweet Id') st.plotly_chart(fig) # Scatter plot of followers vs tweet count fig = px.scatter(tweets_df, x='Followers', y='Tweet Id') st.plotly_chart(fig) # Username selection and interaction count st.sidebar.title('Username and Interaction Count') selected_username = st.sidebar.selectbox( 'Select a username:', options=tweets_df['Username'].unique()) st.sidebar.write( f'Interactions with @{selected_username}: {tweets_df[tweets_df["Username"] == selected_username].shape[0]}') # Interaction count by username interactions_by_user = tweets_df.groupby(['Username'])['Tweet Id'].count( ).reset_index().sort_values(by=['Tweet Id'], ascending=False) fig = px.bar(interactions_by_user, x='Username', y='Tweet Id') st.plotly_chart(fig) # Interaction count with selected username over time tweets_by_date = tweets_df[tweets_df['Username'] == selected_username].groupby( ['Date'])['Tweet Id'].count().reset_index() fig = px.line(tweets_by_date, x='Date', y='Tweet Id') st.plotly_chart(fig) # Tweets involving selected username st.write(f'Tweets involving @{selected_username}:') st.write(tweets_df[tweets_df['Username'] == selected_username]) # Download as text file if st.button('Download as Text File'): t = '\t'.join(tweets_df.columns) + '\n' for index, row in tweets_df.iterrows(): t += '\t'.join([str(elem) for elem in row.values]) + '\n' with open('tweets.txt', 'w') as f: f.write(t) st.download_button( label="Download Tweets as Text", data=t, file_name='tweets.txt', mime='text/plain' ) # Download as csv file if st.button('Download as CSV File'): csv = tweets_df.to_csv(index=False) with open('tweets.csv', 'w') as f: f.write(csv) st.download_button( label="Download Tweets as CSV", data=csv, file_name='tweets.csv', mime='text/csv' )