# helper.py import yfinance as yf import pandas as pd from typing import List, Tuple, Dict def download_stock_data( tickers: List[str], start_date: str, end_date: str, w: int ) -> pd.DataFrame: """ Download stock data for given tickers between start_date and end_date. Args: tickers (List[str]): List of stock ticker symbols. start_date (str): Start date for data retrieval in 'YYYY-MM-DD' format. end_date (str): End date for data retrieval in 'YYYY-MM-DD' format. w (int): Size of the interval that is used to download data Returns: pd.DataFrame: DataFrame with adjusted close prices for the given tickers. """ data = yf.download(tickers, start=start_date, end=end_date, interval=w) return data["Adj Close"] def create_portfolio_and_calculate_returns( stock_data: pd.DataFrame, top_n: int ) -> pd.DataFrame: """ Create a portfolio and calculate returns based on the given window size. Args: stock_data (pd.DataFrame): DataFrame containing stock data. window_size (int): Size of the window to calculate returns. Returns: pd.DataFrame: DataFrame containing calculated returns and portfolio history. """ # Compute returns returns_data = stock_data.pct_change() returns_data.dropna(inplace=True) portfolio_history = [] # To keep track of portfolio changes over time portfolio_returns = [] # To store portfolio returns for each period # Loop over the data in window_size-day windows window_size = 1 for start in range(0, len(returns_data) - window_size, window_size): end = start + window_size current_window = returns_data[start:end] top_stocks = ( current_window.mean() .sort_values(ascending=False) .head(top_n) .index.tolist() ) next_window = returns_data[end : end + window_size][top_stocks].mean(axis=1) portfolio_returns.extend(next_window) added_length = len(next_window) portfolio_history.extend([top_stocks] * added_length) new_returns_data = returns_data.copy() new_returns_data = new_returns_data.iloc[0:-window_size, :] new_returns_data["benchmark"] = new_returns_data.apply( lambda x: x[0:5].mean(), axis=1 ) new_returns_data["portfolio_returns"] = portfolio_returns new_returns_data["portfolio_history"] = portfolio_history new_returns_data["rolling_benchmark"] = ( new_returns_data["benchmark"] + 1 ).cumprod() new_returns_data["rolling_portfolio_returns"] = ( new_returns_data["portfolio_returns"] + 1 ).cumprod() return new_returns_data