File size: 3,336 Bytes
b77b6a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import pandas as pd
import pickle
from web3_utils import DATA_DIR, TMP_DIR
from staking import check_list_addresses


def get_trader_type(address: str, service_map: dict) -> str:
    # check if it is part of any service id on the map
    keys = service_map.keys()
    last_key = max(keys)

    for key, value in service_map.items():
        if value["safe_address"].lower() == address.lower():
            # found a service
            return "Olas"

    return "non_Olas"


def compute_active_traders_dataset():
    """Function to prepare the active traders dataset"""
    with open(DATA_DIR / "service_map.pkl", "rb") as f:
        service_map = pickle.load(f)
    # read tools info
    tools_df = pd.read_parquet(TMP_DIR / "tools.parquet")
    # rename the request_month_year_week
    tools_df.rename(
        columns={"request_month_year_week": "month_year_week"}, inplace=True
    )
    tool_traders = tools_df.trader_address.unique()
    mapping = check_list_addresses(tool_traders)
    # add trader type to tools_df
    tools_df["trader_type"] = tools_df.trader_address.apply(lambda x: mapping[x])
    tools_df = tools_df[
        ["month_year_week", "market_creator", "trader_type", "trader_address"]
    ]
    tools_df.drop_duplicates(inplace=True)
    # read trades info
    all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")

    # read unknown info
    unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
    unknown_traders["creation_timestamp"] = pd.to_datetime(
        unknown_traders["creation_timestamp"]
    )
    unknown_traders["creation_timestamp"] = unknown_traders[
        "creation_timestamp"
    ].dt.tz_convert("UTC")
    unknown_traders = unknown_traders.sort_values(
        by="creation_timestamp", ascending=True
    )
    unknown_traders["month_year_week"] = (
        unknown_traders["creation_timestamp"]
        .dt.to_period("W")
        .dt.start_time.dt.strftime("%b-%d-%Y")
    )
    unknown_traders["trader_type"] = "unknown"
    unknown_traders = unknown_traders[
        ["month_year_week", "trader_type", "market_creator", "trader_address"]
    ]
    unknown_traders.drop_duplicates(inplace=True)

    all_trades["creation_timestamp"] = pd.to_datetime(all_trades["creation_timestamp"])
    all_trades["creation_timestamp"] = all_trades["creation_timestamp"].dt.tz_convert(
        "UTC"
    )
    all_trades = all_trades.sort_values(by="creation_timestamp", ascending=True)
    all_trades["month_year_week"] = (
        all_trades["creation_timestamp"]
        .dt.to_period("W")
        .dt.start_time.dt.strftime("%b-%d-%Y")
    )
    all_trades["trader_type"] = all_trades["staking"].apply(
        lambda x: "non_Olas" if x == "non_Olas" else "Olas"
    )
    all_trades = all_trades[
        ["month_year_week", "market_creator", "trader_type", "trader_address"]
    ]
    all_trades.drop_duplicates(inplace=True)
    filtered_traders_data = pd.concat([all_trades, tools_df], axis=0)
    filtered_traders_data.drop_duplicates(inplace=True)
    if len(unknown_traders) > 0:
        # merge
        filtered_traders_data = pd.concat(
            [filtered_traders_data, unknown_traders], axis=0
        )
    filtered_traders_data.to_parquet(TMP_DIR / "active_traders.parquet")


if __name__ == "__main__":
    compute_active_traders_dataset()