rosacastillo commited on
Commit
b77b6a6
·
1 Parent(s): c7370ec

updating dashboards with new data

Browse files
data/all_trades_profitability.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6256840b7a7704aa5618fd5a4fed41b9444bbf80ea1dcaae068715026c8d52b0
3
- size 8218375
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a9fa6fcb351f21334b3a79194c82d9b344be2549d7cd9398c3dea84646291dc
3
+ size 11576111
data/all_trades_profitability_bak.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebdebf4884de2bb27a71c4d5144a6dad8f5f5c6f2675ac3b12993c4240b0e2de
3
+ size 7910144
data/daily_info.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3d8ec77951dad3d522c90ea0009c15e5ab717c3f34624b4f0d205ad58cfa16e
3
- size 1054780
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dafbb1cf7614f3040c27aeb6f130e19068e9ca56159ed21a584abd4a925db977
3
+ size 566350
data/error_by_markets.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dff09a27b7b5ac4a527d679c446627c6ca4fb2653c6bc50e818d79e29e3c1be
3
- size 12928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:026835121a261b46a391e397160f878413bd1f337903324bb1cd6ef88bc2d90c
3
+ size 12990
data/invalid_trades.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:099e999dc46d4a2d7086838f3645475aecf27fa88331a8b2d5fd4c9937f1ad81
3
- size 782151
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a47b04bfae501edb6051f0089117b51fc96cdb4eeb4ad5ef3ebfbcd7ee19590
3
+ size 755966
data/service_map.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32d288a076f719a659159ffdb2bca3f132c3efe3f62ee0412c11e8094c36ffc8
3
- size 164076
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da35d7c845c32bc90c5f298210458dfa01b8745bf95bd23b62a49c044ca06ac7
3
+ size 167913
data/tools_accuracy.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb5a70b32e6a7dbd75c7924a2fa887612bf7523a62f6710f2e2397cdc3664fa2
3
- size 1100
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a26951af78022999c8651278aff7a96ed271a6d325907096982b5333a7af7b7
3
+ size 1099
data/unknown_traders.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1633afc5d408263251ae5290e1f45972abaf0d3f0358ab880604de8a0baae559
3
- size 283140
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd2aa16659f93661ac3e9744f55ba17d921bf5f65c843a60f01232dd587254bf
3
+ size 365566
data/winning_df.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f394838074669231dc3f8dc46167bb05019ae12eb798933e99b2c2de9b9a2c1f
3
- size 12636
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10f727eac679040aa5f1dc1856580010c315b3f72810cdbcaa6175f5a8343c0e
3
+ size 12528
scripts/active_traders.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import pickle
3
+ from web3_utils import DATA_DIR, TMP_DIR
4
+ from staking import check_list_addresses
5
+
6
+
7
+ def get_trader_type(address: str, service_map: dict) -> str:
8
+ # check if it is part of any service id on the map
9
+ keys = service_map.keys()
10
+ last_key = max(keys)
11
+
12
+ for key, value in service_map.items():
13
+ if value["safe_address"].lower() == address.lower():
14
+ # found a service
15
+ return "Olas"
16
+
17
+ return "non_Olas"
18
+
19
+
20
+ def compute_active_traders_dataset():
21
+ """Function to prepare the active traders dataset"""
22
+ with open(DATA_DIR / "service_map.pkl", "rb") as f:
23
+ service_map = pickle.load(f)
24
+ # read tools info
25
+ tools_df = pd.read_parquet(TMP_DIR / "tools.parquet")
26
+ # rename the request_month_year_week
27
+ tools_df.rename(
28
+ columns={"request_month_year_week": "month_year_week"}, inplace=True
29
+ )
30
+ tool_traders = tools_df.trader_address.unique()
31
+ mapping = check_list_addresses(tool_traders)
32
+ # add trader type to tools_df
33
+ tools_df["trader_type"] = tools_df.trader_address.apply(lambda x: mapping[x])
34
+ tools_df = tools_df[
35
+ ["month_year_week", "market_creator", "trader_type", "trader_address"]
36
+ ]
37
+ tools_df.drop_duplicates(inplace=True)
38
+ # read trades info
39
+ all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
40
+
41
+ # read unknown info
42
+ unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
43
+ unknown_traders["creation_timestamp"] = pd.to_datetime(
44
+ unknown_traders["creation_timestamp"]
45
+ )
46
+ unknown_traders["creation_timestamp"] = unknown_traders[
47
+ "creation_timestamp"
48
+ ].dt.tz_convert("UTC")
49
+ unknown_traders = unknown_traders.sort_values(
50
+ by="creation_timestamp", ascending=True
51
+ )
52
+ unknown_traders["month_year_week"] = (
53
+ unknown_traders["creation_timestamp"]
54
+ .dt.to_period("W")
55
+ .dt.start_time.dt.strftime("%b-%d-%Y")
56
+ )
57
+ unknown_traders["trader_type"] = "unknown"
58
+ unknown_traders = unknown_traders[
59
+ ["month_year_week", "trader_type", "market_creator", "trader_address"]
60
+ ]
61
+ unknown_traders.drop_duplicates(inplace=True)
62
+
63
+ all_trades["creation_timestamp"] = pd.to_datetime(all_trades["creation_timestamp"])
64
+ all_trades["creation_timestamp"] = all_trades["creation_timestamp"].dt.tz_convert(
65
+ "UTC"
66
+ )
67
+ all_trades = all_trades.sort_values(by="creation_timestamp", ascending=True)
68
+ all_trades["month_year_week"] = (
69
+ all_trades["creation_timestamp"]
70
+ .dt.to_period("W")
71
+ .dt.start_time.dt.strftime("%b-%d-%Y")
72
+ )
73
+ all_trades["trader_type"] = all_trades["staking"].apply(
74
+ lambda x: "non_Olas" if x == "non_Olas" else "Olas"
75
+ )
76
+ all_trades = all_trades[
77
+ ["month_year_week", "market_creator", "trader_type", "trader_address"]
78
+ ]
79
+ all_trades.drop_duplicates(inplace=True)
80
+ filtered_traders_data = pd.concat([all_trades, tools_df], axis=0)
81
+ filtered_traders_data.drop_duplicates(inplace=True)
82
+ if len(unknown_traders) > 0:
83
+ # merge
84
+ filtered_traders_data = pd.concat(
85
+ [filtered_traders_data, unknown_traders], axis=0
86
+ )
87
+ filtered_traders_data.to_parquet(TMP_DIR / "active_traders.parquet")
88
+
89
+
90
+ if __name__ == "__main__":
91
+ compute_active_traders_dataset()
scripts/daily_data.py CHANGED
@@ -58,3 +58,4 @@ def prepare_live_metrics(
58
 
59
  if __name__ == "__main__":
60
  prepare_live_metrics()
 
 
58
 
59
  if __name__ == "__main__":
60
  prepare_live_metrics()
61
+ # generate_retention_activity_file()
scripts/get_mech_info.py CHANGED
@@ -117,63 +117,6 @@ def update_json_files():
117
  merge_json_files("tools_info.json", "new_tools_info.json")
118
 
119
 
120
- def update_fpmmTrades_parquet(trades_filename: str) -> pd.DataFrame:
121
- # Read old trades parquet file
122
- try:
123
- old_trades_df = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
124
- except Exception as e:
125
- print(f"Error reading old trades parquet file {e}")
126
- return None
127
-
128
- try:
129
- new_trades_df = pd.read_parquet(DATA_DIR / trades_filename)
130
- except Exception as e:
131
- print(f"Error reading new trades parquet file {e}")
132
- return None
133
-
134
- # lowercase and strip creator_address
135
- new_trades_df["trader_address"] = (
136
- new_trades_df["trader_address"].str.lower().str.strip()
137
- )
138
- # ensure creationTimestamp compatibility
139
- try:
140
- new_trades_df["creationTimestamp"] = new_trades_df["creationTimestamp"].apply(
141
- lambda x: transform_to_datetime(x)
142
- )
143
-
144
- except Exception as e:
145
- print(f"Transformation not needed")
146
- try:
147
- old_trades_df["creationTimestamp"] = old_trades_df["creationTimestamp"].apply(
148
- lambda x: transform_to_datetime(x)
149
- )
150
- except Exception as e:
151
- print(f"Transformation not needed")
152
-
153
- # merge two dataframes
154
- merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)
155
- # avoid numpy objects
156
- merge_df["fpmm.arbitrationOccurred"] = merge_df["fpmm.arbitrationOccurred"].astype(
157
- bool
158
- )
159
- merge_df["fpmm.isPendingArbitration"] = merge_df[
160
- "fpmm.isPendingArbitration"
161
- ].astype(bool)
162
-
163
- # Check for duplicates
164
- print(f"Initial length before removing duplicates in fpmmTrades= {len(merge_df)}")
165
-
166
- # Remove duplicates
167
- # fpmm.outcomes is a numpy array
168
- merge_df.drop_duplicates("id", keep="last", inplace=True)
169
- print(f"Final length after removing duplicates in fpmmTrades= {len(merge_df)}")
170
-
171
- # save the parquet file
172
- merge_df.to_parquet(TMP_DIR / "fpmmTrades.parquet", index=False)
173
-
174
- return
175
-
176
-
177
  def update_all_trades_parquet(new_trades_df: pd.DataFrame) -> pd.DataFrame:
178
  # Read old all_trades parquet file
179
  try:
@@ -315,11 +258,11 @@ def get_mech_events_since_last_run(logger):
315
  # Read the latest date from stored data
316
  try:
317
  all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
318
- latest_timestamp = max(all_trades.creation_timestamp)
319
- # cutoff_date = "2024-12-22"
320
- # latest_timestamp = pd.Timestamp(
321
- # datetime.strptime(cutoff_date, "%Y-%m-%d")
322
- # ).tz_localize("UTC")
323
  print(f"Updating data since {latest_timestamp}")
324
  except Exception:
325
  print("Error while reading the profitability parquet file")
 
117
  merge_json_files("tools_info.json", "new_tools_info.json")
118
 
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  def update_all_trades_parquet(new_trades_df: pd.DataFrame) -> pd.DataFrame:
121
  # Read old all_trades parquet file
122
  try:
 
258
  # Read the latest date from stored data
259
  try:
260
  all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
261
+ # latest_timestamp = max(all_trades.creation_timestamp)
262
+ cutoff_date = "2025-01-13"
263
+ latest_timestamp = pd.Timestamp(
264
+ datetime.strptime(cutoff_date, "%Y-%m-%d")
265
+ ).tz_localize("UTC")
266
  print(f"Updating data since {latest_timestamp}")
267
  except Exception:
268
  print("Error while reading the profitability parquet file")
scripts/markets.py CHANGED
@@ -357,7 +357,7 @@ def fpmmTrades_etl(
357
  # lowercase and strip creator_address
358
  fpmmTrades["trader_address"] = fpmmTrades["trader_address"].str.lower().str.strip()
359
  fpmmTrades.to_parquet(DATA_DIR / trades_filename, index=False)
360
- return
361
 
362
 
363
  def check_current_week_data(trades_df: pd.DataFrame) -> pd.DataFrame:
@@ -390,5 +390,75 @@ def check_current_week_data(trades_df: pd.DataFrame) -> pd.DataFrame:
390
  return trades_df
391
 
392
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393
  if __name__ == "__main__":
394
- etl("all_fpmms.parquet")
 
 
357
  # lowercase and strip creator_address
358
  fpmmTrades["trader_address"] = fpmmTrades["trader_address"].str.lower().str.strip()
359
  fpmmTrades.to_parquet(DATA_DIR / trades_filename, index=False)
360
+ return fpmmTrades
361
 
362
 
363
  def check_current_week_data(trades_df: pd.DataFrame) -> pd.DataFrame:
 
390
  return trades_df
391
 
392
 
393
+ def update_fpmmTrades_parquet(trades_filename: str) -> pd.DataFrame:
394
+ # Read old trades parquet file
395
+ try:
396
+ old_trades_df = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
397
+ except Exception as e:
398
+ print(f"Error reading old trades parquet file {e}")
399
+ return None
400
+
401
+ try:
402
+ new_trades_df = pd.read_parquet(DATA_DIR / trades_filename)
403
+ except Exception as e:
404
+ print(f"Error reading new trades parquet file {e}")
405
+ return None
406
+
407
+ # lowercase and strip creator_address
408
+ new_trades_df["trader_address"] = (
409
+ new_trades_df["trader_address"].str.lower().str.strip()
410
+ )
411
+ # ensure creationTimestamp compatibility
412
+ try:
413
+ new_trades_df["creationTimestamp"] = new_trades_df["creationTimestamp"].apply(
414
+ lambda x: transform_to_datetime(x)
415
+ )
416
+
417
+ except Exception as e:
418
+ print(f"Transformation not needed")
419
+ try:
420
+ old_trades_df["creationTimestamp"] = old_trades_df["creationTimestamp"].apply(
421
+ lambda x: transform_to_datetime(x)
422
+ )
423
+ except Exception as e:
424
+ print(f"Transformation not needed")
425
+
426
+ # merge two dataframes
427
+ merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)
428
+ # avoid numpy objects
429
+ merge_df["fpmm.arbitrationOccurred"] = merge_df["fpmm.arbitrationOccurred"].astype(
430
+ bool
431
+ )
432
+ merge_df["fpmm.isPendingArbitration"] = merge_df[
433
+ "fpmm.isPendingArbitration"
434
+ ].astype(bool)
435
+
436
+ # Check for duplicates
437
+ print(f"Initial length before removing duplicates in fpmmTrades= {len(merge_df)}")
438
+
439
+ # Remove duplicates
440
+ # fpmm.outcomes is a numpy array
441
+ merge_df.drop_duplicates("id", keep="last", inplace=True)
442
+ print(f"Final length after removing duplicates in fpmmTrades= {len(merge_df)}")
443
+
444
+ # save the parquet file
445
+ merge_df.to_parquet(TMP_DIR / "fpmmTrades.parquet", index=False)
446
+
447
+ return
448
+
449
+
450
+ def update_fpmmTrades(from_date: str):
451
+
452
+ from_timestamp = pd.Timestamp(datetime.strptime(from_date, "%Y-%m-%d")).tz_localize(
453
+ "UTC"
454
+ )
455
+ fpmmTrades_etl(
456
+ trades_filename="new_fpmmTrades.parquet",
457
+ from_timestamp=int(from_timestamp.timestamp()),
458
+ )
459
+ update_fpmmTrades_parquet("new_fpmmTrades.parquet")
460
+
461
+
462
  if __name__ == "__main__":
463
+ cutoff_date = "2025-01-13"
464
+ update_fpmmTrades(cutoff_date)
scripts/profitability.py CHANGED
@@ -37,6 +37,7 @@ from utils import (
37
  DATA_DIR,
38
  DEFAULT_MECH_FEE,
39
  TMP_DIR,
 
40
  )
41
  from staking import label_trades_by_staking
42
  from nr_mech_calls import (
@@ -141,13 +142,16 @@ def _is_redeemed(user_json: dict[str, Any], fpmmTrade: dict[str, Any]) -> bool:
141
  def prepare_profitalibity_data(
142
  tools_filename: str,
143
  trades_filename: str,
 
144
  ) -> pd.DataFrame:
145
  """Prepare data for profitalibity analysis."""
146
 
147
  # Check if tools.parquet is in the same directory
148
  try:
149
- # tools parquet file
150
- tools = pd.read_parquet(DATA_DIR / tools_filename)
 
 
151
 
152
  # make sure creator_address is in the columns
153
  assert "trader_address" in tools.columns, "trader_address column not found"
@@ -167,9 +171,12 @@ def prepare_profitalibity_data(
167
  # Check if fpmmTrades.parquet is in the same directory
168
  print("Reading the new trades file")
169
  try:
170
- fpmmTrades = pd.read_parquet(DATA_DIR / trades_filename)
 
 
 
171
  except FileNotFoundError:
172
- print(f"Error reading {trades_filename} file .")
173
 
174
  # make sure trader_address is in the columns
175
  assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
@@ -352,16 +359,18 @@ def analyse_all_traders(
352
  return all_creators_df
353
 
354
 
 
355
  def run_profitability_analysis(
356
  tools_filename: str,
357
  trades_filename: str,
358
  merge: bool = False,
 
359
  ):
360
  """Create all trades analysis."""
361
-
362
- # load dfs from data folder for analysis
363
  print(f"Preparing data with {tools_filename} and {trades_filename}")
364
- fpmmTrades = prepare_profitalibity_data(tools_filename, trades_filename)
 
 
365
 
366
  if merge:
367
  update_tools_parquet(tools_filename)
@@ -391,6 +400,7 @@ def run_profitability_analysis(
391
 
392
  # debugging purposes
393
  all_trades_df.to_parquet(JSON_DATA_DIR / "all_trades_df.parquet", index=False)
 
394
 
395
  # filter invalid markets. Condition: "is_invalid" is True
396
  invalid_trades = all_trades_df.loc[all_trades_df["is_invalid"] == True]
@@ -512,12 +522,9 @@ def add_trades_profitability(trades_filename: str):
512
 
513
 
514
  if __name__ == "__main__":
515
- # updating the whole fpmmTrades parquet file instead of just the new ones
516
- # trade_mech_calls = pd.read_parquet(TMP_DIR / "result_df.parquet")
517
- # fpmmTrades = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
518
- # fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
519
- # lambda x: transform_to_datetime(x)
520
- # )
521
- # all_trades_df = analyse_all_traders(fpmmTrades, trade_mech_calls)
522
- # all_trades_df.to_parquet(TMP_DIR / "all_trades_df.parquet", index=False)
523
- run_profitability_analysis("file1", "file2")
 
37
  DATA_DIR,
38
  DEFAULT_MECH_FEE,
39
  TMP_DIR,
40
+ measure_execution_time,
41
  )
42
  from staking import label_trades_by_staking
43
  from nr_mech_calls import (
 
142
  def prepare_profitalibity_data(
143
  tools_filename: str,
144
  trades_filename: str,
145
+ tmp_dir: bool = False,
146
  ) -> pd.DataFrame:
147
  """Prepare data for profitalibity analysis."""
148
 
149
  # Check if tools.parquet is in the same directory
150
  try:
151
+ if tmp_dir:
152
+ tools = pd.read_parquet(TMP_DIR / tools_filename)
153
+ else:
154
+ tools = pd.read_parquet(DATA_DIR / tools_filename)
155
 
156
  # make sure creator_address is in the columns
157
  assert "trader_address" in tools.columns, "trader_address column not found"
 
171
  # Check if fpmmTrades.parquet is in the same directory
172
  print("Reading the new trades file")
173
  try:
174
+ if tmp_dir:
175
+ fpmmTrades = pd.read_parquet(TMP_DIR / trades_filename)
176
+ else:
177
+ fpmmTrades = pd.read_parquet(DATA_DIR / trades_filename)
178
  except FileNotFoundError:
179
+ print(f"Error reading {trades_filename} file.")
180
 
181
  # make sure trader_address is in the columns
182
  assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
 
359
  return all_creators_df
360
 
361
 
362
+ @measure_execution_time
363
  def run_profitability_analysis(
364
  tools_filename: str,
365
  trades_filename: str,
366
  merge: bool = False,
367
+ tmp_dir: bool = False,
368
  ):
369
  """Create all trades analysis."""
 
 
370
  print(f"Preparing data with {tools_filename} and {trades_filename}")
371
+ fpmmTrades = prepare_profitalibity_data(
372
+ tools_filename, trades_filename, tmp_dir=tmp_dir
373
+ )
374
 
375
  if merge:
376
  update_tools_parquet(tools_filename)
 
400
 
401
  # debugging purposes
402
  all_trades_df.to_parquet(JSON_DATA_DIR / "all_trades_df.parquet", index=False)
403
+ # all_trades_df = pd.read_parquet(JSON_DATA_DIR / "all_trades_df.parquet")
404
 
405
  # filter invalid markets. Condition: "is_invalid" is True
406
  invalid_trades = all_trades_df.loc[all_trades_df["is_invalid"] == True]
 
522
 
523
 
524
  if __name__ == "__main__":
525
+ run_profitability_analysis(
526
+ tools_filename="tools.parquet",
527
+ trades_filename="fpmmTrades.parquet",
528
+ merge=False,
529
+ tmp_dir=True,
530
+ )
 
 
 
scripts/pull_data.py CHANGED
@@ -1,8 +1,13 @@
1
  import logging
2
  from datetime import datetime
3
  import pandas as pd
4
- from markets import etl as mkt_etl, DEFAULT_FILENAME as MARKETS_FILENAME, fpmmTrades_etl
5
- from tools import DEFAULT_FILENAME as TOOLS_FILENAME, generate_tools_file
 
 
 
 
 
6
  from profitability import run_profitability_analysis, add_trades_profitability
7
  from utils import (
8
  get_question,
@@ -15,7 +20,6 @@ from utils import (
15
  )
16
  from get_mech_info import (
17
  get_mech_events_since_last_run,
18
- update_fpmmTrades_parquet,
19
  update_json_files,
20
  )
21
  from update_tools_accuracy import compute_tools_accuracy
@@ -131,7 +135,7 @@ def only_new_weekly_analysis():
131
 
132
  save_historical_data()
133
  try:
134
- clean_old_data_from_parquet_files("2024-11-19")
135
  except Exception as e:
136
  print("Error cleaning the oldest information from parquet files")
137
  print(f"reason = {e}")
 
1
  import logging
2
  from datetime import datetime
3
  import pandas as pd
4
+ from markets import (
5
+ etl as mkt_etl,
6
+ DEFAULT_FILENAME as MARKETS_FILENAME,
7
+ fpmmTrades_etl,
8
+ update_fpmmTrades_parquet,
9
+ )
10
+ from tools import generate_tools_file
11
  from profitability import run_profitability_analysis, add_trades_profitability
12
  from utils import (
13
  get_question,
 
20
  )
21
  from get_mech_info import (
22
  get_mech_events_since_last_run,
 
23
  update_json_files,
24
  )
25
  from update_tools_accuracy import compute_tools_accuracy
 
135
 
136
  save_historical_data()
137
  try:
138
+ clean_old_data_from_parquet_files("2024-11-26")
139
  except Exception as e:
140
  print("Error cleaning the oldest information from parquet files")
141
  print(f"reason = {e}")
scripts/staking.py CHANGED
@@ -1,7 +1,7 @@
1
  import json
2
  import sys
3
  from typing import Any, List
4
- from utils import RPC, DATA_DIR, TMP_DIR
5
  import requests
6
  from tqdm import tqdm
7
  from web3 import Web3
@@ -148,6 +148,7 @@ def update_service_map(start: int = 1, end: int = 2000):
148
  if partial_dict:
149
  service_map.update(partial_dict)
150
 
 
151
  with open(DATA_DIR / "service_map.pkl", "wb") as f:
152
  pickle.dump(service_map, f)
153
 
@@ -223,6 +224,7 @@ def generate_retention_activity_file():
223
  ]
224
  print(f"length of reduced tools before labeling = {len(reduced_tools_df)}")
225
  reduced_tools_df = label_trades_by_staking(trades_df=reduced_tools_df)
 
226
  print(f"length of reduced tools after labeling = {len(reduced_tools_df)}")
227
  reduced_tools_df = reduced_tools_df.sort_values(by="request_time", ascending=True)
228
  reduced_tools_df["month_year_week"] = (
@@ -246,17 +248,20 @@ def check_list_addresses(address_list: list):
246
  print(f"last service key = {last_key}")
247
  update_service_map(start=last_key)
248
  found_key = -1
 
249
  for trader_address in address_list:
250
  for key, value in service_map.items():
251
  if value["safe_address"].lower() == trader_address.lower():
252
  # found a service
253
  found_key = key
254
  mapping[trader_address] = "Olas"
 
 
255
 
256
  if found_key == -1:
257
  mapping[trader_address] = "non_Olas"
258
- print("mapping")
259
- print(mapping)
260
 
261
 
262
  def check_service_map():
@@ -278,20 +283,20 @@ def check_service_map():
278
 
279
  if __name__ == "__main__":
280
  # create_service_map()
281
- # trades_df = pd.read_parquet(TMP_DIR / "all_trades_df.parquet")
282
- # trades_df = trades_df.loc[trades_df["is_invalid"] == False]
283
 
284
- # trades_df = label_trades_by_staking(trades_df=trades_df, start=8)
285
- # print(trades_df.staking.value_counts())
286
  # trades_df.to_parquet(TMP_DIR / "result_staking.parquet", index=False)
287
  # generate_retention_activity_file()
288
- a_list = [
289
- "0x027592700fafc4db3221bb662d7bdc7f546a2bb5",
290
- "0x0845f4ad01a2f41da618848c7a9e56b64377965e",
291
- ]
292
  # check_list_addresses(address_list=a_list)
293
  # update_service_map()
294
  # check_service_map()
295
- unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
296
- unknown_traders = label_trades_by_staking(trades_df=unknown_traders)
297
- unknown_traders.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
 
1
  import json
2
  import sys
3
  from typing import Any, List
4
+ from utils import RPC, DATA_DIR, TMP_DIR, JSON_DATA_DIR
5
  import requests
6
  from tqdm import tqdm
7
  from web3 import Web3
 
148
  if partial_dict:
149
  service_map.update(partial_dict)
150
 
151
+ print(f"length of service map {len(service_map)}")
152
  with open(DATA_DIR / "service_map.pkl", "wb") as f:
153
  pickle.dump(service_map, f)
154
 
 
224
  ]
225
  print(f"length of reduced tools before labeling = {len(reduced_tools_df)}")
226
  reduced_tools_df = label_trades_by_staking(trades_df=reduced_tools_df)
227
+ print(f"labeling of tools activity. {reduced_tools_df.staking.value_counts()}")
228
  print(f"length of reduced tools after labeling = {len(reduced_tools_df)}")
229
  reduced_tools_df = reduced_tools_df.sort_values(by="request_time", ascending=True)
230
  reduced_tools_df["month_year_week"] = (
 
248
  print(f"last service key = {last_key}")
249
  update_service_map(start=last_key)
250
  found_key = -1
251
+ trader_types = []
252
  for trader_address in address_list:
253
  for key, value in service_map.items():
254
  if value["safe_address"].lower() == trader_address.lower():
255
  # found a service
256
  found_key = key
257
  mapping[trader_address] = "Olas"
258
+ trader_types.append("Olas")
259
+ break
260
 
261
  if found_key == -1:
262
  mapping[trader_address] = "non_Olas"
263
+ trader_types.append("non_Olas")
264
+ return mapping
265
 
266
 
267
  def check_service_map():
 
283
 
284
  if __name__ == "__main__":
285
  # create_service_map()
286
+ trades_df = pd.read_parquet(JSON_DATA_DIR / "all_trades_df.parquet")
287
+ trades_df = trades_df.loc[trades_df["is_invalid"] == False]
288
 
289
+ trades_df = label_trades_by_staking(trades_df=trades_df)
290
+ print(trades_df.staking.value_counts())
291
  # trades_df.to_parquet(TMP_DIR / "result_staking.parquet", index=False)
292
  # generate_retention_activity_file()
293
+ # a_list = [
294
+ # "0x027592700fafc4db3221bb662d7bdc7f546a2bb5",
295
+ # "0x0845f4ad01a2f41da618848c7a9e56b64377965e",
296
+ # ]
297
  # check_list_addresses(address_list=a_list)
298
  # update_service_map()
299
  # check_service_map()
300
+ # unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
301
+ # unknown_traders = label_trades_by_staking(trades_df=unknown_traders)
302
+ # unknown_traders.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)