eagle0504's picture
apps updated
7e94170
raw
history blame
2.67 kB
# helper.py
import yfinance as yf
import pandas as pd
from typing import List, Tuple, Dict
def download_stock_data(
tickers: List[str], start_date: str, end_date: str, w: int
) -> pd.DataFrame:
"""
Download stock data for given tickers between start_date and end_date.
Args:
tickers (List[str]): List of stock ticker symbols.
start_date (str): Start date for data retrieval in 'YYYY-MM-DD' format.
end_date (str): End date for data retrieval in 'YYYY-MM-DD' format.
w (int): Size of the interval that is used to download data
Returns:
pd.DataFrame: DataFrame with adjusted close prices for the given tickers.
"""
data = yf.download(tickers, start=start_date, end=end_date, interval=w)
return data["Adj Close"]
def create_portfolio_and_calculate_returns(
stock_data: pd.DataFrame, top_n: int
) -> pd.DataFrame:
"""
Create a portfolio and calculate returns based on the given window size.
Args:
stock_data (pd.DataFrame): DataFrame containing stock data.
window_size (int): Size of the window to calculate returns.
Returns:
pd.DataFrame: DataFrame containing calculated returns and portfolio history.
"""
# Compute returns
returns_data = stock_data.pct_change()
returns_data.dropna(inplace=True)
portfolio_history = [] # To keep track of portfolio changes over time
portfolio_returns = [] # To store portfolio returns for each period
# Loop over the data in window_size-day windows
window_size = 1
for start in range(0, len(returns_data) - window_size, window_size):
end = start + window_size
current_window = returns_data[start:end]
top_stocks = (
current_window.mean()
.sort_values(ascending=False)
.head(top_n)
.index.tolist()
)
next_window = returns_data[end : end + window_size][top_stocks].mean(axis=1)
portfolio_returns.extend(next_window)
added_length = len(next_window)
portfolio_history.extend([top_stocks] * added_length)
new_returns_data = returns_data.copy()
new_returns_data = new_returns_data.iloc[0:-window_size, :]
new_returns_data["benchmark"] = new_returns_data.apply(
lambda x: x[0:5].mean(), axis=1
)
new_returns_data["portfolio_returns"] = portfolio_returns
new_returns_data["portfolio_history"] = portfolio_history
new_returns_data["rolling_benchmark"] = (
new_returns_data["benchmark"] + 1
).cumprod()
new_returns_data["rolling_portfolio_returns"] = (
new_returns_data["portfolio_returns"] + 1
).cumprod()
return new_returns_data