Knight-coderr commited on
Commit
88e180d
·
verified ·
1 Parent(s): 22b1ebb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -135
app.py CHANGED
@@ -1,135 +1,136 @@
1
- import streamlit as st
2
- import pandas as pd
3
- from textblob import TextBlob
4
- import joblib
5
- import matplotlib.pyplot as plt
6
-
7
- # Load the data
8
- @st.cache_data
9
- def load_data():
10
- stock_data = pd.read_csv('data/stock_yfinance_data.csv')
11
- tweets_data = pd.read_csv('data/stock_tweets.csv')
12
-
13
- # Convert the Date columns to datetime
14
- stock_data['Date'] = pd.to_datetime(stock_data['Date'])
15
- tweets_data['Date'] = pd.to_datetime(tweets_data['Date']).dt.date
16
-
17
- # Perform sentiment analysis on tweets
18
- def get_sentiment(tweet):
19
- analysis = TextBlob(tweet)
20
- return analysis.sentiment.polarity
21
-
22
- tweets_data['Sentiment'] = tweets_data['Tweet'].apply(get_sentiment)
23
-
24
- # Aggregate sentiment by date and stock
25
- daily_sentiment = tweets_data.groupby(['Date', 'Stock Name']).mean(numeric_only=True).reset_index()
26
-
27
- # Convert the Date column in daily_sentiment to datetime64[ns]
28
- daily_sentiment['Date'] = pd.to_datetime(daily_sentiment['Date'])
29
-
30
- # Merge stock data with sentiment data
31
- merged_data = pd.merge(stock_data, daily_sentiment, how='left', left_on=['Date', 'Stock Name'], right_on=['Date', 'Stock Name'])
32
-
33
- # Fill missing sentiment values with 0 (neutral sentiment)
34
- merged_data['Sentiment'].fillna(0, inplace=True)
35
-
36
- # Sort the data by date
37
- merged_data.sort_values(by='Date', inplace=True)
38
-
39
- # Create lagged features
40
- merged_data['Prev_Close'] = merged_data.groupby('Stock Name')['Close'].shift(1)
41
- merged_data['Prev_Sentiment'] = merged_data.groupby('Stock Name')['Sentiment'].shift(1)
42
-
43
- # Create moving averages
44
- merged_data['MA7'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=7).mean())
45
- merged_data['MA14'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=14).mean())
46
-
47
- # Create daily price changes
48
- merged_data['Daily_Change'] = merged_data['Close'] - merged_data['Prev_Close']
49
-
50
- # Create volatility
51
- merged_data['Volatility'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=7).std())
52
-
53
- # Drop rows with missing values
54
- merged_data.dropna(inplace=True)
55
-
56
- return merged_data
57
-
58
- data = load_data()
59
- stock_names = data['Stock Name'].unique()
60
-
61
- # Load the best model
62
- model_filename = 'model/best_model.pkl'
63
- model = joblib.load(model_filename)
64
-
65
- st.title("Stock Price Prediction Using Sentiment Analysis")
66
-
67
- # User input for stock data
68
- st.header("Input Stock Data")
69
- selected_stock = st.selectbox("Select Stock Name", stock_names)
70
- days_to_predict = st.number_input("Number of Days to Predict",
71
- min_value=1, max_value=30, value=10)
72
-
73
- # Get the latest data for the selected stock
74
- latest_data = data[data['Stock Name'] == selected_stock].iloc[-1]
75
- prev_close = latest_data['Close']
76
- prev_sentiment = latest_data['Sentiment']
77
- ma7 = latest_data['MA7']
78
- ma14 = latest_data['MA14']
79
- daily_change = latest_data['Daily_Change']
80
- volatility = latest_data['Volatility']
81
-
82
- # Display the latest stock data in a table
83
- latest_data_df = pd.DataFrame({
84
- 'Metric': ['Previous Close Price', 'Previous Sentiment', '7-day Moving Average', '14-day Moving Average', 'Daily Change', 'Volatility'],
85
- 'Value': [prev_close, prev_sentiment, ma7, ma14, daily_change, volatility]
86
- })
87
-
88
- st.write("Latest Stock Data:")
89
- st.write(latest_data_df)
90
-
91
- st.write("Use the inputs above to predict the next days close prices of the stock.")
92
- if st.button("Predict"):
93
- predictions = []
94
- latest_date = latest_data['Date']
95
-
96
- for i in range(days_to_predict):
97
- X_future = pd.DataFrame({
98
- 'Prev_Close': [prev_close],
99
- 'Prev_Sentiment': [prev_sentiment],
100
- 'MA7': [ma7],
101
- 'MA14': [ma14],
102
- 'Daily_Change': [daily_change],
103
- 'Volatility': [volatility]
104
- })
105
-
106
- next_day_prediction = model.predict(X_future)[0]
107
- predictions.append(next_day_prediction)
108
-
109
- # Update features for next prediction
110
- prev_close = next_day_prediction
111
- ma7 = (ma7 * 6 + next_day_prediction) / 7 # Simplified rolling calculation
112
- ma14 = (ma14 * 13 + next_day_prediction) / 14 # Simplified rolling calculation
113
- daily_change = next_day_prediction - prev_close
114
-
115
- # st.write(f"Predicted next {days_to_predict} days close prices: {predictions}")
116
- # Prepare prediction data for display
117
- # Prepare prediction data for display
118
- prediction_dates = pd.date_range(start=latest_date + pd.Timedelta(days=1), periods=days_to_predict)
119
- prediction_df = pd.DataFrame({
120
- 'Date': prediction_dates,
121
- 'Predicted Close Price': predictions
122
- })
123
-
124
- st.subheader("Predicted Prices")
125
- st.write(prediction_df)
126
-
127
- # Plotting the results
128
- st.subheader("Prediction Chart")
129
- plt.figure(figsize=(10, 6))
130
- plt.plot(prediction_df['Date'], prediction_df['Predicted Close Price'], marker='o', linestyle='--', label="Predicted Close Price")
131
- plt.xlabel("Date")
132
- plt.ylabel("Close Price")
133
- plt.title(f"{selected_stock} Predicted Close Prices")
134
- plt.legend()
135
- st.pyplot(plt)
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from textblob import TextBlob
4
+ import joblib
5
+ import matplotlib.pyplot as plt
6
+ import datetime
7
+
8
+ # Load the data
9
+ @st.cache_data
10
+ def load_data():
11
+ stock_data = pd.read_csv('data/stock_yfinance_data.csv')
12
+ tweets_data = pd.read_csv('data/stock_tweets.csv')
13
+
14
+ # Convert the Date columns to datetime
15
+ stock_data['Date'] = pd.to_datetime(stock_data['Date'])
16
+ tweets_data['Date'] = pd.to_datetime(tweets_data['Date']).dt.date
17
+
18
+ # Perform sentiment analysis on tweets
19
+ def get_sentiment(tweet):
20
+ analysis = TextBlob(tweet)
21
+ return analysis.sentiment.polarity
22
+
23
+ tweets_data['Sentiment'] = tweets_data['Tweet'].apply(get_sentiment)
24
+
25
+ # Aggregate sentiment by date and stock
26
+ daily_sentiment = tweets_data.groupby(['Date', 'Stock Name']).mean(numeric_only=True).reset_index()
27
+
28
+ # Convert the Date column in daily_sentiment to datetime64[ns]
29
+ daily_sentiment['Date'] = pd.to_datetime(daily_sentiment['Date'])
30
+
31
+ # Merge stock data with sentiment data
32
+ merged_data = pd.merge(stock_data, daily_sentiment, how='left', left_on=['Date', 'Stock Name'], right_on=['Date', 'Stock Name'])
33
+
34
+ # Fill missing sentiment values with 0 (neutral sentiment)
35
+ merged_data['Sentiment'].fillna(0, inplace=True)
36
+
37
+ # Sort the data by date
38
+ merged_data.sort_values(by='Date', inplace=True)
39
+
40
+ # Create lagged features
41
+ merged_data['Prev_Close'] = merged_data.groupby('Stock Name')['Close'].shift(1)
42
+ merged_data['Prev_Sentiment'] = merged_data.groupby('Stock Name')['Sentiment'].shift(1)
43
+
44
+ # Create moving averages
45
+ merged_data['MA7'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=7).mean())
46
+ merged_data['MA14'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=14).mean())
47
+
48
+ # Create daily price changes
49
+ merged_data['Daily_Change'] = merged_data['Close'] - merged_data['Prev_Close']
50
+
51
+ # Create volatility
52
+ merged_data['Volatility'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=7).std())
53
+
54
+ # Drop rows with missing values
55
+ merged_data.dropna(inplace=True)
56
+
57
+ return merged_data
58
+
59
+ data = load_data()
60
+ stock_names = data['Stock Name'].unique()
61
+
62
+ # Load the best model
63
+ model_filename = 'model/best_model.pkl'
64
+ model = joblib.load(model_filename)
65
+
66
+ st.title("Stock Price Prediction Using Sentiment Analysis")
67
+
68
+ # User input for stock data
69
+ st.header("Input Stock Data")
70
+ selected_stock = st.selectbox("Select Stock Name", stock_names)
71
+ days_to_predict = st.number_input("Number of Days to Predict",
72
+ min_value=1, max_value=30, value=10)
73
+
74
+ # Get the latest data for the selected stock
75
+ latest_data = data[data['Stock Name'] == selected_stock].iloc[-1]
76
+ prev_close = latest_data['Close']
77
+ prev_sentiment = latest_data['Sentiment']
78
+ ma7 = latest_data['MA7']
79
+ ma14 = latest_data['MA14']
80
+ daily_change = latest_data['Daily_Change']
81
+ volatility = latest_data['Volatility']
82
+
83
+ # Display the latest stock data in a table
84
+ latest_data_df = pd.DataFrame({
85
+ 'Metric': ['Previous Close Price', 'Previous Sentiment', '7-day Moving Average', '14-day Moving Average', 'Daily Change', 'Volatility'],
86
+ 'Value': [prev_close, prev_sentiment, ma7, ma14, daily_change, volatility]
87
+ })
88
+
89
+ st.write("Latest Stock Data:")
90
+ st.write(latest_data_df)
91
+
92
+ st.write("Use the inputs above to predict the next days close prices of the stock.")
93
+ if st.button("Predict"):
94
+ predictions = []
95
+ latest_date = datetime.datetime.now()
96
+
97
+ for i in range(days_to_predict):
98
+ X_future = pd.DataFrame({
99
+ 'Prev_Close': [prev_close],
100
+ 'Prev_Sentiment': [prev_sentiment],
101
+ 'MA7': [ma7],
102
+ 'MA14': [ma14],
103
+ 'Daily_Change': [daily_change],
104
+ 'Volatility': [volatility]
105
+ })
106
+
107
+ next_day_prediction = model.predict(X_future)[0]
108
+ predictions.append(next_day_prediction)
109
+
110
+ # Update features for next prediction
111
+ prev_close = next_day_prediction
112
+ ma7 = (ma7 * 6 + next_day_prediction) / 7 # Simplified rolling calculation
113
+ ma14 = (ma14 * 13 + next_day_prediction) / 14 # Simplified rolling calculation
114
+ daily_change = next_day_prediction - prev_close
115
+
116
+ # st.write(f"Predicted next {days_to_predict} days close prices: {predictions}")
117
+ # Prepare prediction data for display
118
+ # Prepare prediction data for display
119
+ prediction_dates = pd.date_range(start=latest_date + pd.Timedelta(days=1), periods=days_to_predict)
120
+ prediction_df = pd.DataFrame({
121
+ 'Date': prediction_dates,
122
+ 'Predicted Close Price': predictions
123
+ })
124
+
125
+ st.subheader("Predicted Prices")
126
+ st.write(prediction_df)
127
+
128
+ # Plotting the results
129
+ st.subheader("Prediction Chart")
130
+ plt.figure(figsize=(10, 6))
131
+ plt.plot(prediction_df['Date'], prediction_df['Predicted Close Price'], marker='o', linestyle='--', label="Predicted Close Price")
132
+ plt.xlabel("Date")
133
+ plt.ylabel("Close Price")
134
+ plt.title(f"{selected_stock} Predicted Close Prices")
135
+ plt.legend()
136
+ st.pyplot(plt)