File size: 2,667 Bytes
69d3198
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7e94170
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# helper.py
import yfinance as yf
import pandas as pd
from typing import List, Tuple, Dict


def download_stock_data(
    tickers: List[str], start_date: str, end_date: str, w: int
) -> pd.DataFrame:
    """
    Download stock data for given tickers between start_date and end_date.

    Args:
    tickers (List[str]): List of stock ticker symbols.
    start_date (str): Start date for data retrieval in 'YYYY-MM-DD' format.
    end_date (str): End date for data retrieval in 'YYYY-MM-DD' format.
    w (int): Size of the interval that is used to download data

    Returns:
    pd.DataFrame: DataFrame with adjusted close prices for the given tickers.
    """
    data = yf.download(tickers, start=start_date, end=end_date, interval=w)
    return data["Adj Close"]


def create_portfolio_and_calculate_returns(
    stock_data: pd.DataFrame, top_n: int
) -> pd.DataFrame:
    """
    Create a portfolio and calculate returns based on the given window size.

    Args:
    stock_data (pd.DataFrame): DataFrame containing stock data.
    window_size (int): Size of the window to calculate returns.

    Returns:
    pd.DataFrame: DataFrame containing calculated returns and portfolio history.
    """
    # Compute returns
    returns_data = stock_data.pct_change()
    returns_data.dropna(inplace=True)

    portfolio_history = []  # To keep track of portfolio changes over time
    portfolio_returns = []  # To store portfolio returns for each period

    # Loop over the data in window_size-day windows
    window_size = 1
    for start in range(0, len(returns_data) - window_size, window_size):
        end = start + window_size
        current_window = returns_data[start:end]
        top_stocks = (
            current_window.mean()
            .sort_values(ascending=False)
            .head(top_n)
            .index.tolist()
        )
        next_window = returns_data[end : end + window_size][top_stocks].mean(axis=1)

        portfolio_returns.extend(next_window)
        added_length = len(next_window)
        portfolio_history.extend([top_stocks] * added_length)

    new_returns_data = returns_data.copy()
    new_returns_data = new_returns_data.iloc[0:-window_size, :]
    new_returns_data["benchmark"] = new_returns_data.apply(
        lambda x: x[0:5].mean(), axis=1
    )
    new_returns_data["portfolio_returns"] = portfolio_returns
    new_returns_data["portfolio_history"] = portfolio_history
    new_returns_data["rolling_benchmark"] = (
        new_returns_data["benchmark"] + 1
    ).cumprod()
    new_returns_data["rolling_portfolio_returns"] = (
        new_returns_data["portfolio_returns"] + 1
    ).cumprod()

    return new_returns_data