Spaces:

valory
/

olas-prediction-live-dashboard

Running

App Files Files Community

rosacastillo commited on Jan 31

Commit

82a220c

1 Parent(s): 6d1c646

cleaning old folders not used anymore here

Browse files

Files changed (27) hide show

app.py +1 -54
contracts/new_mech_abi.json +0 -718
contracts/old_mech_abi.json +0 -605
notebooks/staking.ipynb +0 -0
notebooks/tools_accuracy.ipynb +0 -1216
notebooks/weekly_analysis.ipynb +0 -0
notebooks/weighted_accuracy_ranking.ipynb +0 -0
scripts/__init__.py +0 -0
scripts/active_traders.py +0 -91
scripts/cleaning_old_info.py +0 -110
scripts/cloud_storage.py +0 -93
scripts/daily_data.py +0 -61
scripts/get_mech_info.py +0 -322
scripts/gnosis_timestamps.py +0 -186
scripts/manage_space_files.py +0 -40
scripts/markets.py +0 -464
scripts/mech_request_utils.py +0 -603
scripts/nr_mech_calls.py +0 -271
scripts/profitability.py +0 -530
scripts/pull_data.py +0 -173
scripts/queries.py +0 -161
scripts/staking.py +0 -302
scripts/tools.py +0 -320
scripts/tools_metrics.py +0 -95
scripts/update_tools_accuracy.py +0 -120
scripts/utils.py +0 -431
scripts/web3_utils.py +0 -276

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import gradio as gr
 import pandas as pd
-import duckdb
 import gzip
 import shutil
 import os
@@ -98,6 +97,7 @@ def load_all_data():
     # Now read the decompressed parquet file
     df2 = pd.read_parquet(parquet_file_path)
     # tools_accuracy
     tools_accuracy = pd.read_csv(
@@ -135,59 +135,6 @@ def load_all_data():
     return df1, df2, df3, df4, df5, df6
-def get_all_data():
-    """
-    Get all data from the parquet files
-    """
-    logger.info("Getting all data")
-    con = duckdb.connect(":memory:")
-    query6 = f"""
-    SELECT *
-    FROM read_parquet('./data/winning_df.parquet')
-    """
-    df6 = con.execute(query6).fetchdf()
-    query5 = f"""
-    SELECT *
-    FROM read_parquet('./data/unknown_traders.parquet')
-    """
-    df5 = con.execute(query5).fetchdf()
-    # Query to fetch invalid trades data
-    query4 = f"""
-    SELECT *
-    FROM read_parquet('./data/invalid_trades.parquet')
-    """
-    df4 = con.execute(query4).fetchdf()
-    # Query to fetch tools accuracy data
-    query3 = f"""
-    SELECT *
-    FROM read_csv('./data/tools_accuracy.csv')
-    """
-    df3 = con.execute(query3).fetchdf()
-    # Query to fetch data from all_trades_profitability.parquet
-    query2 = f"""
-    SELECT *
-    FROM read_parquet('./data/all_trades_profitability.parquet')
-    """
-    df2 = con.execute(query2).fetchdf()
-    logger.info("Got all data from all_trades_profitability.parquet")
-    query1 = f"""
-    SELECT *
-    FROM read_parquet('./data/error_by_markets.parquet')
-    """
-    df1 = con.execute(query1).fetchdf()
-    logger.info("Got all data from error_by_markets.parquet")
-    con.close()
-    return df1, df2, df3, df4, df5, df6
 def prepare_data():
     """
     Prepare the data for the dashboard

 import gradio as gr
 import pandas as pd
 import gzip
 import shutil
 import os
     # Now read the decompressed parquet file
     df2 = pd.read_parquet(parquet_file_path)
+    os.remove(parquet_file_path)
     # tools_accuracy
     tools_accuracy = pd.read_csv(
     return df1, df2, df3, df4, df5, df6
 def prepare_data():
     """
     Prepare the data for the dashboard

contracts/new_mech_abi.json DELETED Viewed

@@ -1,718 +0,0 @@
-[
-  {
-    "inputs": [
-      {
-        "internalType": "address",
-        "name": "_token",
-        "type": "address"
-      },
-      {
-        "internalType": "uint256",
-        "name": "_tokenId",
-        "type": "uint256"
-      },
-      {
-        "internalType": "uint256",
-        "name": "_price",
-        "type": "uint256"
-      }
-    ],
-    "stateMutability": "nonpayable",
-    "type": "constructor"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "uint256",
-        "name": "agentId",
-        "type": "uint256"
-      }
-    ],
-    "name": "AgentNotFound",
-    "type": "error"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "uint256",
-        "name": "provided",
-        "type": "uint256"
-      },
-      {
-        "internalType": "uint256",
-        "name": "expected",
-        "type": "uint256"
-      }
-    ],
-    "name": "NotEnoughPaid",
-    "type": "error"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "uint256",
-        "name": "provided",
-        "type": "uint256"
-      },
-      {
-        "internalType": "uint256",
-        "name": "max",
-        "type": "uint256"
-      }
-    ],
-    "name": "Overflow",
-    "type": "error"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "uint256",
-        "name": "requestId",
-        "type": "uint256"
-      }
-    ],
-    "name": "RequestIdNotFound",
-    "type": "error"
-  },
-  {
-    "inputs": [],
-    "name": "ZeroAddress",
-    "type": "error"
-  },
-  {
-    "anonymous": false,
-    "inputs": [
-      {
-        "indexed": true,
-        "internalType": "address",
-        "name": "sender",
-        "type": "address"
-      },
-      {
-        "indexed": false,
-        "internalType": "uint256",
-        "name": "requestId",
-        "type": "uint256"
-      },
-      {
-        "indexed": false,
-        "internalType": "bytes",
-        "name": "data",
-        "type": "bytes"
-      }
-    ],
-    "name": "Deliver",
-    "type": "event"
-  },
-  {
-    "anonymous": false,
-    "inputs": [
-      {
-        "indexed": false,
-        "internalType": "uint256",
-        "name": "price",
-        "type": "uint256"
-      }
-    ],
-    "name": "PriceUpdated",
-    "type": "event"
-  },
-  {
-    "anonymous": false,
-    "inputs": [
-      {
-        "indexed": true,
-        "internalType": "address",
-        "name": "sender",
-        "type": "address"
-      },
-      {
-        "indexed": false,
-        "internalType": "uint256",
-        "name": "requestId",
-        "type": "uint256"
-      },
-      {
-        "indexed": false,
-        "internalType": "bytes",
-        "name": "data",
-        "type": "bytes"
-      }
-    ],
-    "name": "Request",
-    "type": "event"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "uint256",
-        "name": "requestId",
-        "type": "uint256"
-      },
-      {
-        "internalType": "bytes",
-        "name": "data",
-        "type": "bytes"
-      }
-    ],
-    "name": "deliver",
-    "outputs": [],
-    "stateMutability": "nonpayable",
-    "type": "function"
-  },
-  {
-    "inputs": [],
-    "name": "entryPoint",
-    "outputs": [
-      {
-        "internalType": "contract IEntryPoint",
-        "name": "",
-        "type": "address"
-      }
-    ],
-    "stateMutability": "view",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "address",
-        "name": "to",
-        "type": "address"
-      },
-      {
-        "internalType": "uint256",
-        "name": "value",
-        "type": "uint256"
-      },
-      {
-        "internalType": "bytes",
-        "name": "data",
-        "type": "bytes"
-      },
-      {
-        "internalType": "enum Enum.Operation",
-        "name": "operation",
-        "type": "uint8"
-      },
-      {
-        "internalType": "uint256",
-        "name": "txGas",
-        "type": "uint256"
-      }
-    ],
-    "name": "exec",
-    "outputs": [
-      {
-        "internalType": "bytes",
-        "name": "returnData",
-        "type": "bytes"
-      }
-    ],
-    "stateMutability": "nonpayable",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "address",
-        "name": "account",
-        "type": "address"
-      },
-      {
-        "internalType": "bytes",
-        "name": "data",
-        "type": "bytes"
-      }
-    ],
-    "name": "getRequestId",
-    "outputs": [
-      {
-        "internalType": "uint256",
-        "name": "requestId",
-        "type": "uint256"
-      }
-    ],
-    "stateMutability": "pure",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "address",
-        "name": "account",
-        "type": "address"
-      }
-    ],
-    "name": "getRequestsCount",
-    "outputs": [
-      {
-        "internalType": "uint256",
-        "name": "requestsCount",
-        "type": "uint256"
-      }
-    ],
-    "stateMutability": "view",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "uint256",
-        "name": "size",
-        "type": "uint256"
-      },
-      {
-        "internalType": "uint256",
-        "name": "offset",
-        "type": "uint256"
-      }
-    ],
-    "name": "getUndeliveredRequestIds",
-    "outputs": [
-      {
-        "internalType": "uint256[]",
-        "name": "requestIds",
-        "type": "uint256[]"
-      }
-    ],
-    "stateMutability": "view",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "address",
-        "name": "signer",
-        "type": "address"
-      }
-    ],
-    "name": "isOperator",
-    "outputs": [
-      {
-        "internalType": "bool",
-        "name": "",
-        "type": "bool"
-      }
-    ],
-    "stateMutability": "view",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "bytes32",
-        "name": "hash",
-        "type": "bytes32"
-      },
-      {
-        "internalType": "bytes",
-        "name": "signature",
-        "type": "bytes"
-      }
-    ],
-    "name": "isValidSignature",
-    "outputs": [
-      {
-        "internalType": "bytes4",
-        "name": "magicValue",
-        "type": "bytes4"
-      }
-    ],
-    "stateMutability": "view",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "uint256",
-        "name": "",
-        "type": "uint256"
-      },
-      {
-        "internalType": "uint256",
-        "name": "",
-        "type": "uint256"
-      }
-    ],
-    "name": "mapRequestIds",
-    "outputs": [
-      {
-        "internalType": "uint256",
-        "name": "",
-        "type": "uint256"
-      }
-    ],
-    "stateMutability": "view",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "address",
-        "name": "",
-        "type": "address"
-      }
-    ],
-    "name": "mapRequestsCounts",
-    "outputs": [
-      {
-        "internalType": "uint256",
-        "name": "",
-        "type": "uint256"
-      }
-    ],
-    "stateMutability": "view",
-    "type": "function"
-  },
-  {
-    "inputs": [],
-    "name": "nonce",
-    "outputs": [
-      {
-        "internalType": "uint256",
-        "name": "",
-        "type": "uint256"
-      }
-    ],
-    "stateMutability": "view",
-    "type": "function"
-  },
-  {
-    "inputs": [],
-    "name": "numUndeliveredRequests",
-    "outputs": [
-      {
-        "internalType": "uint256",
-        "name": "",
-        "type": "uint256"
-      }
-    ],
-    "stateMutability": "view",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "address",
-        "name": "",
-        "type": "address"
-      },
-      {
-        "internalType": "address",
-        "name": "",
-        "type": "address"
-      },
-      {
-        "internalType": "uint256[]",
-        "name": "",
-        "type": "uint256[]"
-      },
-      {
-        "internalType": "uint256[]",
-        "name": "",
-        "type": "uint256[]"
-      },
-      {
-        "internalType": "bytes",
-        "name": "",
-        "type": "bytes"
-      }
-    ],
-    "name": "onERC1155BatchReceived",
-    "outputs": [
-      {
-        "internalType": "bytes4",
-        "name": "",
-        "type": "bytes4"
-      }
-    ],
-    "stateMutability": "pure",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "address",
-        "name": "",
-        "type": "address"
-      },
-      {
-        "internalType": "address",
-        "name": "",
-        "type": "address"
-      },
-      {
-        "internalType": "uint256",
-        "name": "",
-        "type": "uint256"
-      },
-      {
-        "internalType": "uint256",
-        "name": "",
-        "type": "uint256"
-      },
-      {
-        "internalType": "bytes",
-        "name": "",
-        "type": "bytes"
-      }
-    ],
-    "name": "onERC1155Received",
-    "outputs": [
-      {
-        "internalType": "bytes4",
-        "name": "",
-        "type": "bytes4"
-      }
-    ],
-    "stateMutability": "pure",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "address",
-        "name": "",
-        "type": "address"
-      },
-      {
-        "internalType": "address",
-        "name": "",
-        "type": "address"
-      },
-      {
-        "internalType": "uint256",
-        "name": "",
-        "type": "uint256"
-      },
-      {
-        "internalType": "bytes",
-        "name": "",
-        "type": "bytes"
-      }
-    ],
-    "name": "onERC721Received",
-    "outputs": [
-      {
-        "internalType": "bytes4",
-        "name": "",
-        "type": "bytes4"
-      }
-    ],
-    "stateMutability": "pure",
-    "type": "function"
-  },
-  {
-    "inputs": [],
-    "name": "price",
-    "outputs": [
-      {
-        "internalType": "uint256",
-        "name": "",
-        "type": "uint256"
-      }
-    ],
-    "stateMutability": "view",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "bytes",
-        "name": "data",
-        "type": "bytes"
-      }
-    ],
-    "name": "request",
-    "outputs": [
-      {
-        "internalType": "uint256",
-        "name": "requestId",
-        "type": "uint256"
-      }
-    ],
-    "stateMutability": "payable",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "uint256",
-        "name": "newPrice",
-        "type": "uint256"
-      }
-    ],
-    "name": "setPrice",
-    "outputs": [],
-    "stateMutability": "nonpayable",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "bytes",
-        "name": "initParams",
-        "type": "bytes"
-      }
-    ],
-    "name": "setUp",
-    "outputs": [],
-    "stateMutability": "nonpayable",
-    "type": "function"
-  },
-  {
-    "inputs": [],
-    "name": "token",
-    "outputs": [
-      {
-        "internalType": "contract IERC721",
-        "name": "",
-        "type": "address"
-      }
-    ],
-    "stateMutability": "view",
-    "type": "function"
-  },
-  {
-    "inputs": [],
-    "name": "tokenId",
-    "outputs": [
-      {
-        "internalType": "uint256",
-        "name": "",
-        "type": "uint256"
-      }
-    ],
-    "stateMutability": "view",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "address",
-        "name": "",
-        "type": "address"
-      },
-      {
-        "internalType": "address",
-        "name": "",
-        "type": "address"
-      },
-      {
-        "internalType": "address",
-        "name": "",
-        "type": "address"
-      },
-      {
-        "internalType": "uint256",
-        "name": "",
-        "type": "uint256"
-      },
-      {
-        "internalType": "bytes",
-        "name": "",
-        "type": "bytes"
-      },
-      {
-        "internalType": "bytes",
-        "name": "",
-        "type": "bytes"
-      }
-    ],
-    "name": "tokensReceived",
-    "outputs": [],
-    "stateMutability": "pure",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "components": [
-          {
-            "internalType": "address",
-            "name": "sender",
-            "type": "address"
-          },
-          {
-            "internalType": "uint256",
-            "name": "nonce",
-            "type": "uint256"
-          },
-          {
-            "internalType": "bytes",
-            "name": "initCode",
-            "type": "bytes"
-          },
-          {
-            "internalType": "bytes",
-            "name": "callData",
-            "type": "bytes"
-          },
-          {
-            "internalType": "uint256",
-            "name": "callGasLimit",
-            "type": "uint256"
-          },
-          {
-            "internalType": "uint256",
-            "name": "verificationGasLimit",
-            "type": "uint256"
-          },
-          {
-            "internalType": "uint256",
-            "name": "preVerificationGas",
-            "type": "uint256"
-          },
-          {
-            "internalType": "uint256",
-            "name": "maxFeePerGas",
-            "type": "uint256"
-          },
-          {
-            "internalType": "uint256",
-            "name": "maxPriorityFeePerGas",
-            "type": "uint256"
-          },
-          {
-            "internalType": "bytes",
-            "name": "paymasterAndData",
-            "type": "bytes"
-          },
-          {
-            "internalType": "bytes",
-            "name": "signature",
-            "type": "bytes"
-          }
-        ],
-        "internalType": "struct UserOperation",
-        "name": "userOp",
-        "type": "tuple"
-      },
-      {
-        "internalType": "bytes32",
-        "name": "userOpHash",
-        "type": "bytes32"
-      },
-      {
-        "internalType": "uint256",
-        "name": "missingAccountFunds",
-        "type": "uint256"
-      }
-    ],
-    "name": "validateUserOp",
-    "outputs": [
-      {
-        "internalType": "uint256",
-        "name": "validationData",
-        "type": "uint256"
-      }
-    ],
-    "stateMutability": "nonpayable",
-    "type": "function"
-  },
-  {
-    "stateMutability": "payable",
-    "type": "receive"
-  }
-]

contracts/old_mech_abi.json DELETED Viewed

@@ -1,605 +0,0 @@
-[
-  {
-    "inputs": [
-      {
-        "internalType": "address",
-        "name": "_token",
-        "type": "address"
-      },
-      {
-        "internalType": "uint256",
-        "name": "_tokenId",
-        "type": "uint256"
-      },
-      {
-        "internalType": "uint256",
-        "name": "_price",
-        "type": "uint256"
-      }
-    ],
-    "stateMutability": "nonpayable",
-    "type": "constructor"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "uint256",
-        "name": "agentId",
-        "type": "uint256"
-      }
-    ],
-    "name": "AgentNotFound",
-    "type": "error"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "uint256",
-        "name": "provided",
-        "type": "uint256"
-      },
-      {
-        "internalType": "uint256",
-        "name": "expected",
-        "type": "uint256"
-      }
-    ],
-    "name": "NotEnoughPaid",
-    "type": "error"
-  },
-  {
-    "inputs": [],
-    "name": "ZeroAddress",
-    "type": "error"
-  },
-  {
-    "anonymous": false,
-    "inputs": [
-      {
-        "indexed": false,
-        "internalType": "uint256",
-        "name": "requestId",
-        "type": "uint256"
-      },
-      {
-        "indexed": false,
-        "internalType": "bytes",
-        "name": "data",
-        "type": "bytes"
-      }
-    ],
-    "name": "Deliver",
-    "type": "event"
-  },
-  {
-    "anonymous": false,
-    "inputs": [
-      {
-        "indexed": true,
-        "internalType": "address",
-        "name": "sender",
-        "type": "address"
-      },
-      {
-        "indexed": false,
-        "internalType": "bytes32",
-        "name": "taskHash",
-        "type": "bytes32"
-      }
-    ],
-    "name": "Perform",
-    "type": "event"
-  },
-  {
-    "anonymous": false,
-    "inputs": [
-      {
-        "indexed": false,
-        "internalType": "uint256",
-        "name": "price",
-        "type": "uint256"
-      }
-    ],
-    "name": "PriceUpdated",
-    "type": "event"
-  },
-  {
-    "anonymous": false,
-    "inputs": [
-      {
-        "indexed": true,
-        "internalType": "address",
-        "name": "sender",
-        "type": "address"
-      },
-      {
-        "indexed": false,
-        "internalType": "uint256",
-        "name": "requestId",
-        "type": "uint256"
-      },
-      {
-        "indexed": false,
-        "internalType": "bytes",
-        "name": "data",
-        "type": "bytes"
-      }
-    ],
-    "name": "Request",
-    "type": "event"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "uint256",
-        "name": "requestId",
-        "type": "uint256"
-      },
-      {
-        "internalType": "bytes",
-        "name": "data",
-        "type": "bytes"
-      }
-    ],
-    "name": "deliver",
-    "outputs": [],
-    "stateMutability": "nonpayable",
-    "type": "function"
-  },
-  {
-    "inputs": [],
-    "name": "entryPoint",
-    "outputs": [
-      {
-        "internalType": "contract IEntryPoint",
-        "name": "",
-        "type": "address"
-      }
-    ],
-    "stateMutability": "view",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "address",
-        "name": "to",
-        "type": "address"
-      },
-      {
-        "internalType": "uint256",
-        "name": "value",
-        "type": "uint256"
-      },
-      {
-        "internalType": "bytes",
-        "name": "data",
-        "type": "bytes"
-      },
-      {
-        "internalType": "enum Enum.Operation",
-        "name": "operation",
-        "type": "uint8"
-      },
-      {
-        "internalType": "uint256",
-        "name": "txGas",
-        "type": "uint256"
-      }
-    ],
-    "name": "exec",
-    "outputs": [
-      {
-        "internalType": "bytes",
-        "name": "returnData",
-        "type": "bytes"
-      }
-    ],
-    "stateMutability": "nonpayable",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "address",
-        "name": "account",
-        "type": "address"
-      },
-      {
-        "internalType": "bytes",
-        "name": "data",
-        "type": "bytes"
-      }
-    ],
-    "name": "getRequestId",
-    "outputs": [
-      {
-        "internalType": "uint256",
-        "name": "requestId",
-        "type": "uint256"
-      }
-    ],
-    "stateMutability": "pure",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "address",
-        "name": "signer",
-        "type": "address"
-      }
-    ],
-    "name": "isOperator",
-    "outputs": [
-      {
-        "internalType": "bool",
-        "name": "",
-        "type": "bool"
-      }
-    ],
-    "stateMutability": "view",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "bytes32",
-        "name": "hash",
-        "type": "bytes32"
-      },
-      {
-        "internalType": "bytes",
-        "name": "signature",
-        "type": "bytes"
-      }
-    ],
-    "name": "isValidSignature",
-    "outputs": [
-      {
-        "internalType": "bytes4",
-        "name": "magicValue",
-        "type": "bytes4"
-      }
-    ],
-    "stateMutability": "view",
-    "type": "function"
-  },
-  {
-    "inputs": [],
-    "name": "nonce",
-    "outputs": [
-      {
-        "internalType": "uint256",
-        "name": "",
-        "type": "uint256"
-      }
-    ],
-    "stateMutability": "view",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "address",
-        "name": "",
-        "type": "address"
-      },
-      {
-        "internalType": "address",
-        "name": "",
-        "type": "address"
-      },
-      {
-        "internalType": "uint256[]",
-        "name": "",
-        "type": "uint256[]"
-      },
-      {
-        "internalType": "uint256[]",
-        "name": "",
-        "type": "uint256[]"
-      },
-      {
-        "internalType": "bytes",
-        "name": "",
-        "type": "bytes"
-      }
-    ],
-    "name": "onERC1155BatchReceived",
-    "outputs": [
-      {
-        "internalType": "bytes4",
-        "name": "",
-        "type": "bytes4"
-      }
-    ],
-    "stateMutability": "pure",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "address",
-        "name": "",
-        "type": "address"
-      },
-      {
-        "internalType": "address",
-        "name": "",
-        "type": "address"
-      },
-      {
-        "internalType": "uint256",
-        "name": "",
-        "type": "uint256"
-      },
-      {
-        "internalType": "uint256",
-        "name": "",
-        "type": "uint256"
-      },
-      {
-        "internalType": "bytes",
-        "name": "",
-        "type": "bytes"
-      }
-    ],
-    "name": "onERC1155Received",
-    "outputs": [
-      {
-        "internalType": "bytes4",
-        "name": "",
-        "type": "bytes4"
-      }
-    ],
-    "stateMutability": "pure",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "address",
-        "name": "",
-        "type": "address"
-      },
-      {
-        "internalType": "address",
-        "name": "",
-        "type": "address"
-      },
-      {
-        "internalType": "uint256",
-        "name": "",
-        "type": "uint256"
-      },
-      {
-        "internalType": "bytes",
-        "name": "",
-        "type": "bytes"
-      }
-    ],
-    "name": "onERC721Received",
-    "outputs": [
-      {
-        "internalType": "bytes4",
-        "name": "",
-        "type": "bytes4"
-      }
-    ],
-    "stateMutability": "pure",
-    "type": "function"
-  },
-  {
-    "inputs": [],
-    "name": "price",
-    "outputs": [
-      {
-        "internalType": "uint256",
-        "name": "",
-        "type": "uint256"
-      }
-    ],
-    "stateMutability": "view",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "bytes",
-        "name": "data",
-        "type": "bytes"
-      }
-    ],
-    "name": "request",
-    "outputs": [
-      {
-        "internalType": "uint256",
-        "name": "requestId",
-        "type": "uint256"
-      }
-    ],
-    "stateMutability": "payable",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "uint256",
-        "name": "newPrice",
-        "type": "uint256"
-      }
-    ],
-    "name": "setPrice",
-    "outputs": [],
-    "stateMutability": "nonpayable",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "bytes",
-        "name": "initParams",
-        "type": "bytes"
-      }
-    ],
-    "name": "setUp",
-    "outputs": [],
-    "stateMutability": "nonpayable",
-    "type": "function"
-  },
-  {
-    "inputs": [],
-    "name": "token",
-    "outputs": [
-      {
-        "internalType": "contract IERC721",
-        "name": "",
-        "type": "address"
-      }
-    ],
-    "stateMutability": "view",
-    "type": "function"
-  },
-  {
-    "inputs": [],
-    "name": "tokenId",
-    "outputs": [
-      {
-        "internalType": "uint256",
-        "name": "",
-        "type": "uint256"
-      }
-    ],
-    "stateMutability": "view",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "internalType": "address",
-        "name": "",
-        "type": "address"
-      },
-      {
-        "internalType": "address",
-        "name": "",
-        "type": "address"
-      },
-      {
-        "internalType": "address",
-        "name": "",
-        "type": "address"
-      },
-      {
-        "internalType": "uint256",
-        "name": "",
-        "type": "uint256"
-      },
-      {
-        "internalType": "bytes",
-        "name": "",
-        "type": "bytes"
-      },
-      {
-        "internalType": "bytes",
-        "name": "",
-        "type": "bytes"
-      }
-    ],
-    "name": "tokensReceived",
-    "outputs": [],
-    "stateMutability": "pure",
-    "type": "function"
-  },
-  {
-    "inputs": [
-      {
-        "components": [
-          {
-            "internalType": "address",
-            "name": "sender",
-            "type": "address"
-          },
-          {
-            "internalType": "uint256",
-            "name": "nonce",
-            "type": "uint256"
-          },
-          {
-            "internalType": "bytes",
-            "name": "initCode",
-            "type": "bytes"
-          },
-          {
-            "internalType": "bytes",
-            "name": "callData",
-            "type": "bytes"
-          },
-          {
-            "internalType": "uint256",
-            "name": "callGasLimit",
-            "type": "uint256"
-          },
-          {
-            "internalType": "uint256",
-            "name": "verificationGasLimit",
-            "type": "uint256"
-          },
-          {
-            "internalType": "uint256",
-            "name": "preVerificationGas",
-            "type": "uint256"
-          },
-          {
-            "internalType": "uint256",
-            "name": "maxFeePerGas",
-            "type": "uint256"
-          },
-          {
-            "internalType": "uint256",
-            "name": "maxPriorityFeePerGas",
-            "type": "uint256"
-          },
-          {
-            "internalType": "bytes",
-            "name": "paymasterAndData",
-            "type": "bytes"
-          },
-          {
-            "internalType": "bytes",
-            "name": "signature",
-            "type": "bytes"
-          }
-        ],
-        "internalType": "struct UserOperation",
-        "name": "userOp",
-        "type": "tuple"
-      },
-      {
-        "internalType": "bytes32",
-        "name": "userOpHash",
-        "type": "bytes32"
-      },
-      {
-        "internalType": "uint256",
-        "name": "missingAccountFunds",
-        "type": "uint256"
-      }
-    ],
-    "name": "validateUserOp",
-    "outputs": [
-      {
-        "internalType": "uint256",
-        "name": "validationData",
-        "type": "uint256"
-      }
-    ],
-    "stateMutability": "nonpayable",
-    "type": "function"
-  },
-  {
-    "stateMutability": "payable",
-    "type": "receive"
-  }
-]

notebooks/staking.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/tools_accuracy.ipynb DELETED Viewed

@@ -1,1216 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "import matplotlib.pyplot as plt\n",
-    "import seaborn as sns\n",
-    "import json\n",
-    "sns.set_style(\"darkgrid\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tools = pd.read_parquet('../data/tools.parquet')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Timestamp('2024-12-10 07:50:55+0000', tz='UTC')"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "max(tools.request_time)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Timestamp('2024-10-13 00:00:30+0000', tz='UTC')"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "min(tools.request_time)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "RangeIndex: 358454 entries, 0 to 358453\n",
-      "Data columns (total 23 columns):\n",
-      " #   Column                   Non-Null Count   Dtype  \n",
-      "---  ------                   --------------   -----  \n",
-      " 0   request_id               358454 non-null  object \n",
-      " 1   request_block            358454 non-null  object \n",
-      " 2   prompt_request           358454 non-null  object \n",
-      " 3   tool                     358454 non-null  object \n",
-      " 4   nonce                    358454 non-null  object \n",
-      " 5   trader_address           358454 non-null  object \n",
-      " 6   deliver_block            358454 non-null  object \n",
-      " 7   error                    358454 non-null  int64  \n",
-      " 8   error_message            3772 non-null    object \n",
-      " 9   prompt_response          357509 non-null  object \n",
-      " 10  mech_address             357601 non-null  object \n",
-      " 11  p_yes                    354682 non-null  float64\n",
-      " 12  p_no                     354682 non-null  float64\n",
-      " 13  confidence               354682 non-null  float64\n",
-      " 14  info_utility             354682 non-null  float64\n",
-      " 15  vote                     261707 non-null  object \n",
-      " 16  win_probability          354682 non-null  float64\n",
-      " 17  market_creator           358454 non-null  object \n",
-      " 18  title                    358454 non-null  object \n",
-      " 19  currentAnswer            287126 non-null  object \n",
-      " 20  request_time             358454 non-null  object \n",
-      " 21  request_month_year       358454 non-null  object \n",
-      " 22  request_month_year_week  358454 non-null  object \n",
-      "dtypes: float64(5), int64(1), object(17)\n",
-      "memory usage: 62.9+ MB\n"
-     ]
-    }
-   ],
-   "source": [
-    "tools.info()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['request_id', 'request_block', 'prompt_request', 'tool', 'nonce',\n",
-       "       'trader_address', 'deliver_block', 'error', 'error_message',\n",
-       "       'prompt_response', 'mech_address', 'p_yes', 'p_no', 'confidence',\n",
-       "       'info_utility', 'vote', 'win_probability', 'market_creator', 'title',\n",
-       "       'currentAnswer', 'request_time', 'request_month_year',\n",
-       "       'request_month_year_week'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tools.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "str"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "type(tools.iloc[0].request_time)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import pickle\n",
-    "t_map = pickle.load(open(\"../data/t_map.pkl\", \"rb\"))\n",
-    "type(t_map)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "475329"
-      ]
-     },
-     "execution_count": 24,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(t_map)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "29624577\n"
-     ]
-    }
-   ],
-   "source": [
-    "for item in t_map:\n",
-    "    print(item)\n",
-    "    break"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'2023-07-12 11:58:40'"
-      ]
-     },
-     "execution_count": 27,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "min(t_map.values())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'2023-08-24 16:04:50'"
-      ]
-     },
-     "execution_count": 26,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "t_map[29624577]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'2024-09-04 07:32:15'"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "max(t_map.values())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "request_id         1155886186807766696223563218518399229072982679...\n",
-       "request_block                                               35356121\n",
-       "prompt_request     Please take over the role of a Data Scientist ...\n",
-       "tool                                   prediction-request-rag-claude\n",
-       "nonce                           2c4c8c5c-afcf-4e28-a09a-aa2bae3f5444\n",
-       "trader_address            0x22335c348afa4eae4cc6d2158c1ac259aaaecdfe\n",
-       "deliver_block                                               35356134\n",
-       "error                                                              0\n",
-       "error_message                                                   None\n",
-       "prompt_response    \\nYou will be evaluating the likelihood of an ...\n",
-       "mech_address              0x5e1d1eb61e1164d5a50b28c575da73a29595dff7\n",
-       "p_yes                                                            0.6\n",
-       "p_no                                                             0.4\n",
-       "confidence                                                       0.7\n",
-       "info_utility                                                     0.7\n",
-       "vote                                                             Yes\n",
-       "win_probability                                                  0.6\n",
-       "title              Will there be an increase in the wasp populati...\n",
-       "currentAnswer                                                    Yes\n",
-       "Name: 0, dtype: object"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tools.iloc[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "vote\n",
-       "Yes    55881\n",
-       "No     51741\n",
-       "Name: count, dtype: int64"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tools.vote.value_counts()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "RangeIndex: 132150 entries, 0 to 132149\n",
-      "Data columns (total 22 columns):\n",
-      " #   Column                   Non-Null Count   Dtype  \n",
-      "---  ------                   --------------   -----  \n",
-      " 0   request_id               132150 non-null  object \n",
-      " 1   request_block            132150 non-null  int64  \n",
-      " 2   prompt_request           132150 non-null  object \n",
-      " 3   tool                     132150 non-null  object \n",
-      " 4   nonce                    132150 non-null  object \n",
-      " 5   trader_address           132150 non-null  object \n",
-      " 6   deliver_block            132150 non-null  int64  \n",
-      " 7   error                    132149 non-null  float64\n",
-      " 8   error_message            9702 non-null    object \n",
-      " 9   prompt_response          132060 non-null  object \n",
-      " 10  mech_address             132150 non-null  object \n",
-      " 11  p_yes                    122447 non-null  float64\n",
-      " 12  p_no                     122447 non-null  float64\n",
-      " 13  confidence               122447 non-null  float64\n",
-      " 14  info_utility             122447 non-null  float64\n",
-      " 15  vote                     102396 non-null  object \n",
-      " 16  win_probability          122447 non-null  float64\n",
-      " 17  title                    124256 non-null  object \n",
-      " 18  currentAnswer            85763 non-null   object \n",
-      " 19  request_time             132150 non-null  object \n",
-      " 20  request_month_year       132150 non-null  object \n",
-      " 21  request_month_year_week  132150 non-null  object \n",
-      "dtypes: float64(6), int64(2), object(14)\n",
-      "memory usage: 22.2+ MB\n"
-     ]
-    }
-   ],
-   "source": [
-    "tools.info()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fpmms = pd.read_parquet('../data/fpmms.parquet')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>currentAnswer</th>\n",
-       "      <th>id</th>\n",
-       "      <th>title</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>No</td>\n",
-       "      <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5</td>\n",
-       "      <td>Will the first floating offshore wind research...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>No</td>\n",
-       "      <td>0x0020d13c89140b47e10db54cbd53852b90bc1391</td>\n",
-       "      <td>Will the Francis Scott Key Bridge in Baltimore...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>No</td>\n",
-       "      <td>0x003ae5e007cc38b3f86b0ed7c82f938a1285ac07</td>\n",
-       "      <td>Will FC Saarbrucken reach the final of the Ger...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>Yes</td>\n",
-       "      <td>0x004c8d4c619dc6b9caa940f5ea7ef699ae85359c</td>\n",
-       "      <td>Will the pro-life activists convicted for 'con...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>Yes</td>\n",
-       "      <td>0x005e3f7a90585acbec807425a750fbba1d0c2b5c</td>\n",
-       "      <td>Will Apple announce the release of a new M4 ch...</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  currentAnswer                                          id  \\\n",
-       "0            No  0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5   \n",
-       "1            No  0x0020d13c89140b47e10db54cbd53852b90bc1391   \n",
-       "2            No  0x003ae5e007cc38b3f86b0ed7c82f938a1285ac07   \n",
-       "3           Yes  0x004c8d4c619dc6b9caa940f5ea7ef699ae85359c   \n",
-       "4           Yes  0x005e3f7a90585acbec807425a750fbba1d0c2b5c   \n",
-       "\n",
-       "                                               title  \n",
-       "0  Will the first floating offshore wind research...  \n",
-       "1  Will the Francis Scott Key Bridge in Baltimore...  \n",
-       "2  Will FC Saarbrucken reach the final of the Ger...  \n",
-       "3  Will the pro-life activists convicted for 'con...  \n",
-       "4  Will Apple announce the release of a new M4 ch...  "
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "fpmms.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "4251"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(fpmms)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "RangeIndex: 18035 entries, 0 to 18034\n",
-      "Data columns (total 20 columns):\n",
-      " #   Column                  Non-Null Count  Dtype              \n",
-      "---  ------                  --------------  -----              \n",
-      " 0   trader_address          18035 non-null  object             \n",
-      " 1   market_creator          18035 non-null  object             \n",
-      " 2   trade_id                18035 non-null  object             \n",
-      " 3   creation_timestamp      18035 non-null  datetime64[ns, UTC]\n",
-      " 4   title                   18035 non-null  object             \n",
-      " 5   market_status           18035 non-null  object             \n",
-      " 6   collateral_amount       18035 non-null  float64            \n",
-      " 7   outcome_index           18035 non-null  object             \n",
-      " 8   trade_fee_amount        18035 non-null  float64            \n",
-      " 9   outcomes_tokens_traded  18035 non-null  float64            \n",
-      " 10  current_answer          18035 non-null  int64              \n",
-      " 11  is_invalid              18035 non-null  bool               \n",
-      " 12  winning_trade           18035 non-null  bool               \n",
-      " 13  earnings                18035 non-null  float64            \n",
-      " 14  redeemed                18035 non-null  bool               \n",
-      " 15  redeemed_amount         18035 non-null  float64            \n",
-      " 16  num_mech_calls          18035 non-null  int64              \n",
-      " 17  mech_fee_amount         18035 non-null  float64            \n",
-      " 18  net_earnings            18035 non-null  float64            \n",
-      " 19  roi                     18035 non-null  float64            \n",
-      "dtypes: bool(3), datetime64[ns, UTC](1), float64(8), int64(2), object(6)\n",
-      "memory usage: 2.4+ MB\n"
-     ]
-    }
-   ],
-   "source": [
-    "prof = pd.read_parquet('../data/all_trades_profitability.parquet')\n",
-    "prof.info()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "market_creator\n",
-       "quickstart    16775\n",
-       "pearl          1260\n",
-       "Name: count, dtype: int64"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "prof.market_creator.value_counts()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "trades = pd.read_parquet(\"../data/fpmmTrades.parquet\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>request_id</th>\n",
-       "      <th>request_block</th>\n",
-       "      <th>prompt_request</th>\n",
-       "      <th>tool</th>\n",
-       "      <th>nonce</th>\n",
-       "      <th>trader_address</th>\n",
-       "      <th>deliver_block</th>\n",
-       "      <th>error</th>\n",
-       "      <th>error_message</th>\n",
-       "      <th>prompt_response</th>\n",
-       "      <th>mech_address</th>\n",
-       "      <th>p_yes</th>\n",
-       "      <th>p_no</th>\n",
-       "      <th>confidence</th>\n",
-       "      <th>info_utility</th>\n",
-       "      <th>vote</th>\n",
-       "      <th>win_probability</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "Empty DataFrame\n",
-       "Columns: [request_id, request_block, prompt_request, tool, nonce, trader_address, deliver_block, error, error_message, prompt_response, mech_address, p_yes, p_no, confidence, info_utility, vote, win_probability]\n",
-       "Index: []"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tools.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>collateralAmount</th>\n",
-       "      <th>collateralAmountUSD</th>\n",
-       "      <th>collateralToken</th>\n",
-       "      <th>creationTimestamp</th>\n",
-       "      <th>trader_address</th>\n",
-       "      <th>feeAmount</th>\n",
-       "      <th>id</th>\n",
-       "      <th>oldOutcomeTokenMarginalPrice</th>\n",
-       "      <th>outcomeIndex</th>\n",
-       "      <th>outcomeTokenMarginalPrice</th>\n",
-       "      <th>...</th>\n",
-       "      <th>market_creator</th>\n",
-       "      <th>fpmm.answerFinalizedTimestamp</th>\n",
-       "      <th>fpmm.arbitrationOccurred</th>\n",
-       "      <th>fpmm.currentAnswer</th>\n",
-       "      <th>fpmm.id</th>\n",
-       "      <th>fpmm.isPendingArbitration</th>\n",
-       "      <th>fpmm.openingTimestamp</th>\n",
-       "      <th>fpmm.outcomes</th>\n",
-       "      <th>fpmm.title</th>\n",
-       "      <th>fpmm.condition.id</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>450426474650738688</td>\n",
-       "      <td>0.4504269694034145716308073094168006</td>\n",
-       "      <td>0xe91d153e0b41518a2ce8dd3d7944fa863463a97d</td>\n",
-       "      <td>1724553455</td>\n",
-       "      <td>0x022b36c50b85b8ae7addfb8a35d76c59d5814834</td>\n",
-       "      <td>9008529493014773</td>\n",
-       "      <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x02...</td>\n",
-       "      <td>0.592785210609610270634125335572129</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.6171295391012242250994586583534301</td>\n",
-       "      <td>...</td>\n",
-       "      <td>quickstart</td>\n",
-       "      <td>1725071760</td>\n",
-       "      <td>False</td>\n",
-       "      <td>0x00000000000000000000000000000000000000000000...</td>\n",
-       "      <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5</td>\n",
-       "      <td>False</td>\n",
-       "      <td>1724976000</td>\n",
-       "      <td>[Yes, No]</td>\n",
-       "      <td>Will the first floating offshore wind research...</td>\n",
-       "      <td>0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>610163214546941400</td>\n",
-       "      <td>0.6101636232215150135654007337015298</td>\n",
-       "      <td>0xe91d153e0b41518a2ce8dd3d7944fa863463a97d</td>\n",
-       "      <td>1724811940</td>\n",
-       "      <td>0x034c4ad84f7ac6638bf19300d5bbe7d9b981e736</td>\n",
-       "      <td>12203264290938828</td>\n",
-       "      <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x03...</td>\n",
-       "      <td>0.842992636523755061934822129394812</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.8523396372892128845826889719620915</td>\n",
-       "      <td>...</td>\n",
-       "      <td>quickstart</td>\n",
-       "      <td>1725071760</td>\n",
-       "      <td>False</td>\n",
-       "      <td>0x00000000000000000000000000000000000000000000...</td>\n",
-       "      <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5</td>\n",
-       "      <td>False</td>\n",
-       "      <td>1724976000</td>\n",
-       "      <td>[Yes, No]</td>\n",
-       "      <td>Will the first floating offshore wind research...</td>\n",
-       "      <td>0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>789065092332460672</td>\n",
-       "      <td>0.7890644120527324071908793822796086</td>\n",
-       "      <td>0xe91d153e0b41518a2ce8dd3d7944fa863463a97d</td>\n",
-       "      <td>1724815755</td>\n",
-       "      <td>0x09e9d42a029e8b0c2df3871709a762117a681d92</td>\n",
-       "      <td>15781301846649213</td>\n",
-       "      <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x09...</td>\n",
-       "      <td>0.7983775743712442891104598770339028</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.8152123711444691659642000374025623</td>\n",
-       "      <td>...</td>\n",
-       "      <td>quickstart</td>\n",
-       "      <td>1725071760</td>\n",
-       "      <td>False</td>\n",
-       "      <td>0x00000000000000000000000000000000000000000000...</td>\n",
-       "      <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5</td>\n",
-       "      <td>False</td>\n",
-       "      <td>1724976000</td>\n",
-       "      <td>[Yes, No]</td>\n",
-       "      <td>Will the first floating offshore wind research...</td>\n",
-       "      <td>0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1000000000000000000</td>\n",
-       "      <td>1.000000605383660329048491794939126</td>\n",
-       "      <td>0xe91d153e0b41518a2ce8dd3d7944fa863463a97d</td>\n",
-       "      <td>1724546620</td>\n",
-       "      <td>0x09e9d42a029e8b0c2df3871709a762117a681d92</td>\n",
-       "      <td>20000000000000000</td>\n",
-       "      <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x09...</td>\n",
-       "      <td>0.5110745907733438805447072252622708</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.5746805204222762335911904727318937</td>\n",
-       "      <td>...</td>\n",
-       "      <td>quickstart</td>\n",
-       "      <td>1725071760</td>\n",
-       "      <td>False</td>\n",
-       "      <td>0x00000000000000000000000000000000000000000000...</td>\n",
-       "      <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5</td>\n",
-       "      <td>False</td>\n",
-       "      <td>1724976000</td>\n",
-       "      <td>[Yes, No]</td>\n",
-       "      <td>Will the first floating offshore wind research...</td>\n",
-       "      <td>0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>100000000000000000</td>\n",
-       "      <td>0.1000004271262862419547394646567906</td>\n",
-       "      <td>0xe91d153e0b41518a2ce8dd3d7944fa863463a97d</td>\n",
-       "      <td>1724771260</td>\n",
-       "      <td>0x0d049dcaece0ecb6fc81a460da7bcc2a4785d6e5</td>\n",
-       "      <td>2000000000000000</td>\n",
-       "      <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x0d...</td>\n",
-       "      <td>0.2713968218662319388988681987389408</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.2804586217805511523845593360379658</td>\n",
-       "      <td>...</td>\n",
-       "      <td>quickstart</td>\n",
-       "      <td>1725071760</td>\n",
-       "      <td>False</td>\n",
-       "      <td>0x00000000000000000000000000000000000000000000...</td>\n",
-       "      <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5</td>\n",
-       "      <td>False</td>\n",
-       "      <td>1724976000</td>\n",
-       "      <td>[Yes, No]</td>\n",
-       "      <td>Will the first floating offshore wind research...</td>\n",
-       "      <td>0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1...</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 24 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "      collateralAmount                   collateralAmountUSD  \\\n",
-       "0   450426474650738688  0.4504269694034145716308073094168006   \n",
-       "1   610163214546941400  0.6101636232215150135654007337015298   \n",
-       "2   789065092332460672  0.7890644120527324071908793822796086   \n",
-       "3  1000000000000000000   1.000000605383660329048491794939126   \n",
-       "4   100000000000000000  0.1000004271262862419547394646567906   \n",
-       "\n",
-       "                              collateralToken creationTimestamp  \\\n",
-       "0  0xe91d153e0b41518a2ce8dd3d7944fa863463a97d        1724553455   \n",
-       "1  0xe91d153e0b41518a2ce8dd3d7944fa863463a97d        1724811940   \n",
-       "2  0xe91d153e0b41518a2ce8dd3d7944fa863463a97d        1724815755   \n",
-       "3  0xe91d153e0b41518a2ce8dd3d7944fa863463a97d        1724546620   \n",
-       "4  0xe91d153e0b41518a2ce8dd3d7944fa863463a97d        1724771260   \n",
-       "\n",
-       "                               trader_address          feeAmount  \\\n",
-       "0  0x022b36c50b85b8ae7addfb8a35d76c59d5814834   9008529493014773   \n",
-       "1  0x034c4ad84f7ac6638bf19300d5bbe7d9b981e736  12203264290938828   \n",
-       "2  0x09e9d42a029e8b0c2df3871709a762117a681d92  15781301846649213   \n",
-       "3  0x09e9d42a029e8b0c2df3871709a762117a681d92  20000000000000000   \n",
-       "4  0x0d049dcaece0ecb6fc81a460da7bcc2a4785d6e5   2000000000000000   \n",
-       "\n",
-       "                                                  id  \\\n",
-       "0  0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x02...   \n",
-       "1  0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x03...   \n",
-       "2  0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x09...   \n",
-       "3  0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x09...   \n",
-       "4  0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x0d...   \n",
-       "\n",
-       "           oldOutcomeTokenMarginalPrice outcomeIndex  \\\n",
-       "0   0.592785210609610270634125335572129            1   \n",
-       "1   0.842992636523755061934822129394812            1   \n",
-       "2  0.7983775743712442891104598770339028            1   \n",
-       "3  0.5110745907733438805447072252622708            1   \n",
-       "4  0.2713968218662319388988681987389408            0   \n",
-       "\n",
-       "              outcomeTokenMarginalPrice  ... market_creator  \\\n",
-       "0  0.6171295391012242250994586583534301  ...     quickstart   \n",
-       "1  0.8523396372892128845826889719620915  ...     quickstart   \n",
-       "2  0.8152123711444691659642000374025623  ...     quickstart   \n",
-       "3  0.5746805204222762335911904727318937  ...     quickstart   \n",
-       "4  0.2804586217805511523845593360379658  ...     quickstart   \n",
-       "\n",
-       "  fpmm.answerFinalizedTimestamp fpmm.arbitrationOccurred  \\\n",
-       "0                    1725071760                    False   \n",
-       "1                    1725071760                    False   \n",
-       "2                    1725071760                    False   \n",
-       "3                    1725071760                    False   \n",
-       "4                    1725071760                    False   \n",
-       "\n",
-       "                                  fpmm.currentAnswer  \\\n",
-       "0  0x00000000000000000000000000000000000000000000...   \n",
-       "1  0x00000000000000000000000000000000000000000000...   \n",
-       "2  0x00000000000000000000000000000000000000000000...   \n",
-       "3  0x00000000000000000000000000000000000000000000...   \n",
-       "4  0x00000000000000000000000000000000000000000000...   \n",
-       "\n",
-       "                                      fpmm.id fpmm.isPendingArbitration  \\\n",
-       "0  0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5                     False   \n",
-       "1  0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5                     False   \n",
-       "2  0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5                     False   \n",
-       "3  0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5                     False   \n",
-       "4  0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5                     False   \n",
-       "\n",
-       "   fpmm.openingTimestamp fpmm.outcomes  \\\n",
-       "0             1724976000     [Yes, No]   \n",
-       "1             1724976000     [Yes, No]   \n",
-       "2             1724976000     [Yes, No]   \n",
-       "3             1724976000     [Yes, No]   \n",
-       "4             1724976000     [Yes, No]   \n",
-       "\n",
-       "                                          fpmm.title  \\\n",
-       "0  Will the first floating offshore wind research...   \n",
-       "1  Will the first floating offshore wind research...   \n",
-       "2  Will the first floating offshore wind research...   \n",
-       "3  Will the first floating offshore wind research...   \n",
-       "4  Will the first floating offshore wind research...   \n",
-       "\n",
-       "                                   fpmm.condition.id  \n",
-       "0  0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1...  \n",
-       "1  0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1...  \n",
-       "2  0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1...  \n",
-       "3  0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1...  \n",
-       "4  0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1...  \n",
-       "\n",
-       "[5 rows x 24 columns]"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "trades.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "RangeIndex: 23455 entries, 0 to 23454\n",
-      "Data columns (total 24 columns):\n",
-      " #   Column                         Non-Null Count  Dtype \n",
-      "---  ------                         --------------  ----- \n",
-      " 0   collateralAmount               23455 non-null  object\n",
-      " 1   collateralAmountUSD            23455 non-null  object\n",
-      " 2   collateralToken                23455 non-null  object\n",
-      " 3   creationTimestamp              23455 non-null  object\n",
-      " 4   trader_address                 23455 non-null  object\n",
-      " 5   feeAmount                      23455 non-null  object\n",
-      " 6   id                             23455 non-null  object\n",
-      " 7   oldOutcomeTokenMarginalPrice   23455 non-null  object\n",
-      " 8   outcomeIndex                   23455 non-null  object\n",
-      " 9   outcomeTokenMarginalPrice      23455 non-null  object\n",
-      " 10  outcomeTokensTraded            23455 non-null  object\n",
-      " 11  title                          23455 non-null  object\n",
-      " 12  transactionHash                23455 non-null  object\n",
-      " 13  type                           23455 non-null  object\n",
-      " 14  market_creator                 23455 non-null  object\n",
-      " 15  fpmm.answerFinalizedTimestamp  21489 non-null  object\n",
-      " 16  fpmm.arbitrationOccurred       23455 non-null  bool  \n",
-      " 17  fpmm.currentAnswer             21489 non-null  object\n",
-      " 18  fpmm.id                        23455 non-null  object\n",
-      " 19  fpmm.isPendingArbitration      23455 non-null  bool  \n",
-      " 20  fpmm.openingTimestamp          23455 non-null  object\n",
-      " 21  fpmm.outcomes                  23455 non-null  object\n",
-      " 22  fpmm.title                     23455 non-null  object\n",
-      " 23  fpmm.condition.id              23455 non-null  object\n",
-      "dtypes: bool(2), object(22)\n",
-      "memory usage: 4.0+ MB\n"
-     ]
-    }
-   ],
-   "source": [
-    "trades.info()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "market_creator\n",
-       "quickstart    21852\n",
-       "pearl          1603\n",
-       "Name: count, dtype: int64"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "trades.market_creator.value_counts()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fpmms_trades = pd.read_parquet('../data/fpmmTrades.parquet')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Adding market creator info"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ERROR getting the market creator of 0xae7b042059b179dcac3169657fac111d7925f8dd\n",
-      "ERROR getting the market creator of 0x347e4ef0ff34cf39d1c7e08bc07c68c41a4836d6\n",
-      "ERROR getting the market creator of 0x8e03d3a7a3dfd930f73577ba4204deadf12b33f3\n",
-      "ERROR getting the market creator of 0x2a0b461417fa0ae8bbeb28ed265fbe3944772435\n",
-      "ERROR getting the market creator of 0x8069ea45a18910fa493a6a736438971b0e59ec9b\n",
-      "ERROR getting the market creator of 0xdf91eac2a8573646c7e8e95c740877fe3d38f11f\n",
-      "ERROR getting the market creator of 0x87f0fcfe810502555f8d1439793155cbfa2eb583\n",
-      "ERROR getting the market creator of 0x7bcf0f480e52da1597d7437d5b4a4644b1e7ec23\n",
-      "ERROR getting the market creator of 0xcfef6a50bd9439d1e1a15fcfe99068a57e533d95\n",
-      "ERROR getting the market creator of 0xaeb8c31302361d42ec806faf406ef0c30b6eba5f\n",
-      "ERROR getting the market creator of 0x9db7e7a0c82a229a7f3bb15046ff0c3a778b7291\n",
-      "ERROR getting the market creator of 0x85c31bbeaab5468d97900e69d87a459aba997fa5\n",
-      "ERROR getting the market creator of 0x36660fec571bb4d5849a433f9ec64622416f1dbb\n",
-      "ERROR getting the market creator of 0x5ebe6dcb1ac4470bb71c89cf1e6b9abc48b637ba\n",
-      "ERROR getting the market creator of 0xa0acfecc55465870c9baa7c954a0e81165fb112c\n",
-      "ERROR getting the market creator of 0xd6d6951a8fa033f91a2227d75fb1eebc139e2e57\n",
-      "ERROR getting the market creator of 0x651d04044b780e68f3f952796fb7c06fb0928ad2\n",
-      "ERROR getting the market creator of 0xe271378e094db9d64e34c6c14a7492bcccd11dfb\n",
-      "ERROR getting the market creator of 0x37c241945001f6c26c886c8d551cc2e6cf34c214\n",
-      "ERROR getting the market creator of 0x20b9e32b17360310c633e5676f28430bd723f4bd\n",
-      "ERROR getting the market creator of 0x06d873e7465a6680f5487905d7b5daf7f2c6e299\n",
-      "ERROR getting the market creator of 0xd28b5e2f2ce950705354cd2ceaf4eab9d23db52b\n",
-      "ERROR getting the market creator of 0x4d70e1ac779094e9790c8b74954d15729371e6bc\n",
-      "ERROR getting the market creator of 0x81489c0eab196fb88704f08ef34b8a9ed7137c91\n",
-      "ERROR getting the market creator of 0x223c99787f25179d51a9934a426b6d1b252bb4bd\n",
-      "ERROR getting the market creator of 0xd61b2c4f70645c830bd5af76426d6b22af63c152\n",
-      "ERROR getting the market creator of 0xe66e931f7b065361f56e41d61f599adab3b167c2\n",
-      "ERROR getting the market creator of 0x5ccf21332df9af6195a5b1ba78d15562db915a35\n",
-      "ERROR getting the market creator of 0xf8e68d9f66d2534df36c23db6770467da1c1ff1b\n",
-      "ERROR getting the market creator of 0x2b9274ddf2213d8a6b2930a5b82801165df55017\n",
-      "ERROR getting the market creator of 0xf9349c5ea0b5559abd8dfa6cdd4e4d5d913e1e61\n",
-      "ERROR getting the market creator of 0xad8aa6f927bb6a38af8121418f1b64d4ed8be99c\n",
-      "ERROR getting the market creator of 0x3dcc00904249d796a89943de15c85ac11afc5d66\n",
-      "ERROR getting the market creator of 0x10ece1553b5017414388fe78f64720814d7f8799\n",
-      "ERROR getting the market creator of 0x0930bcc328a695419d596dae380dec7fb43cd715\n",
-      "ERROR getting the market creator of 0x956d8bbc930372482a361dec7e4707b15d8b02f4\n",
-      "ERROR getting the market creator of 0x14da1cc12b382142ac3e2422162f122a0a31ec45\n",
-      "ERROR getting the market creator of 0x28dd86a2c82ce02970eff7f4ea9ebde97750adc8\n",
-      "ERROR getting the market creator of 0xb997d5e2fddf39b8a197715c7b200df612d74360\n",
-      "ERROR getting the market creator of 0x2064ceecb78a382f4988d41f881abef89b6e785c\n",
-      "ERROR getting the market creator of 0xe715cc8f264ab48f75bb1b5c11d7dbaf949d73c5\n",
-      "ERROR getting the market creator of 0x5fc7213135962250147030c5dd30b84a80f2ad1e\n",
-      "ERROR getting the market creator of 0x10ccffdc8e801ab4fda98371723cda4e30e6d672\n",
-      "ERROR getting the market creator of 0x6e5d93fdcc14db02a58ace636c2dcff8db36039d\n",
-      "ERROR getting the market creator of 0xf792f6a308525b72b5d47f12798668c140f5968e\n",
-      "ERROR getting the market creator of 0x00897abcbbefe4f558956b7a9d1b7819677e4d90\n",
-      "ERROR getting the market creator of 0x29448445959cc5045c03b7f316fa3332cc2b37b7\n",
-      "ERROR getting the market creator of 0xdb8c2038cd17645216125f323048dcd4c9845826\n",
-      "ERROR getting the market creator of 0x32969cce1791f13dc5d500b9e701ffb931baae03\n",
-      "ERROR getting the market creator of 0x84aeb93d348c6da1ea4b0016c207aefc26edaa44\n",
-      "ERROR getting the market creator of 0xdda87f7ec43aab7080e9ac23ae1550e5bc89d6cc\n",
-      "ERROR getting the market creator of 0xafd80421ce35298d3698ca0f4008477a169c9ea2\n",
-      "ERROR getting the market creator of 0xffc47cb1ecd41daae58e39fd4193d6fe9a6f5d2e\n",
-      "ERROR getting the market creator of 0x351d430d229740f986ee240612c932c66188dd09\n",
-      "ERROR getting the market creator of 0xd72455c8d5398a2b3b822bbc7cc0de638ea35519\n",
-      "ERROR getting the market creator of 0x2c83cf4bb92e55e35b6e4af6eca6c0a85fb73650\n",
-      "ERROR getting the market creator of 0xf2baf410b7d42d7572fb2f39cf216ffae8d4cafe\n",
-      "ERROR getting the market creator of 0xb42a955a0e06b3e6bdf229c9abfd2fdad20688a7\n",
-      "ERROR getting the market creator of 0x35021fcc0d15c4e87fc1c7fb527f389829dde3d9\n",
-      "ERROR getting the market creator of 0xaa19120a9976c75dc569ab2cfcc087cd224db4e2\n",
-      "ERROR getting the market creator of 0x6e79766698f58a25d2548b76601de9535c5080d3\n",
-      "ERROR getting the market creator of 0x6915dcb7601802ea4a2dd840c44b6ed4473b5ce2\n",
-      "ERROR getting the market creator of 0x6957f7ac4a0a09f237a901749e518a678d1a614a\n",
-      "ERROR getting the market creator of 0x785a9d3329955ffd7cd24ca7a89ce2da21ac62da\n",
-      "ERROR getting the market creator of 0x1e738f7e82102e2f56fef62df473d3f1f1dc53b1\n",
-      "ERROR getting the market creator of 0x8e23b89649f22a6e8084b34a1a5de28d9ddf5a88\n",
-      "ERROR getting the market creator of 0x31c6b19cae793ba90ee9c70263af773c27df2774\n",
-      "ERROR getting the market creator of 0x3a2d7bf095988f30daf308b5484cd74903d82c22\n",
-      "ERROR getting the market creator of 0xde10d01d4315cf64d9feeb79e9a593d78da8a50b\n",
-      "ERROR getting the market creator of 0xa57b7f04bb813b5a6ded7cc92c5bd56586d8f7d4\n",
-      "ERROR getting the market creator of 0x97609769fddc72ea9f45f62cef1f7a9658dd1efe\n",
-      "ERROR getting the market creator of 0x7ddbfbebbec1635315f9217cbf9de8afd272c8de\n",
-      "ERROR getting the market creator of 0x37cdc93194dc7f46f3cc377cf4350f56455a4f85\n",
-      "ERROR getting the market creator of 0x75c10935141d740b71e1763aa6a3139643754655\n",
-      "ERROR getting the market creator of 0x0f98789650877b1928960490a5a19769ac1c84b3\n",
-      "ERROR getting the market creator of 0x9f87c202db8b3270406a3084817909a9d4afc6ea\n",
-      "ERROR getting the market creator of 0x8cfb5af2b0287b34a423755d0481478f0a8f1356\n",
-      "ERROR getting the market creator of 0x09244905029648aca18830291bb62634b04d9a46\n",
-      "ERROR getting the market creator of 0x4e9a5580ce24dd06ed8d6b1d75a7ccce7abf7361\n",
-      "ERROR getting the market creator of 0x8bbeb8a3e1f6fdc9e95aa0d7e80ebc6dc1468b7a\n",
-      "ERROR getting the market creator of 0xcb279a4ebb3f0d78cb15817e942cc7aea01b8545\n",
-      "ERROR getting the market creator of 0xb36fa15e34dd50b8199c57305573dc48d1271b50\n",
-      "ERROR getting the market creator of 0x2198981fc1d8b3c61e7df9a50cf240708c057dfa\n",
-      "ERROR getting the market creator of 0x37bab68f9ae4f9c7ce915d9e1f3404e7cd1794cc\n",
-      "ERROR getting the market creator of 0x97f59586921ebdcfc07694ba8376f59871db11f9\n",
-      "ERROR getting the market creator of 0xc79bf3f6370e8a8002a3093c379752f395a3c291\n",
-      "ERROR getting the market creator of 0x178021f40d4e1ed270f2d2125f9f80d3e78a1836\n",
-      "ERROR getting the market creator of 0xcca6ccde20a551caec29d6c1318f4f2ec7e6063c\n"
-     ]
-    }
-   ],
-   "source": [
-    "tools[\"market_creator\"] = \"\"\n",
-    "# traverse the list of traders\n",
-    "traders_list = list(tools.trader_address.unique())\n",
-    "for trader_address in traders_list:\n",
-    "    market_creator = \"\"\n",
-    "    try:\n",
-    "        trades = fpmms_trades[fpmms_trades[\"trader_address\"] == trader_address]\n",
-    "        market_creator = trades.iloc[0][\"market_creator\"]  # first value is enough\n",
-    "    except Exception:\n",
-    "        print(f\"ERROR getting the market creator of {trader_address}\")\n",
-    "    tools_of_the_trader = tools[tools[\"trader_address\"] == trader_address]\n",
-    "    # update\n",
-    "    tools.loc[tools[\"trader_address\"] == trader_address, \"market_creator\"] = market_creator"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "market_creator\n",
-       "quickstart    121106\n",
-       "pearl          12729\n",
-       "                5182\n",
-       "Name: count, dtype: int64"
-      ]
-     },
-     "execution_count": 37,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tools.market_creator.value_counts()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "139017"
-      ]
-     },
-     "execution_count": 38,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(tools)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.03727601660228605"
-      ]
-     },
-     "execution_count": 39,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "5182/139017"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tools = tools.loc[tools[\"market_creator\"] != \"\"]\n",
-    "tools.to_parquet(\"../data/tools.parquet\", index=False)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "hf_dashboards",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.2"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

notebooks/weekly_analysis.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/weighted_accuracy_ranking.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

scripts/__init__.py DELETED Viewed

File without changes

scripts/active_traders.py DELETED Viewed

@@ -1,91 +0,0 @@
-import pandas as pd
-import pickle
-from web3_utils import DATA_DIR, TMP_DIR
-from staking import check_list_addresses
-def get_trader_type(address: str, service_map: dict) -> str:
-    # check if it is part of any service id on the map
-    keys = service_map.keys()
-    last_key = max(keys)
-    for key, value in service_map.items():
-        if value["safe_address"].lower() == address.lower():
-            # found a service
-            return "Olas"
-    return "non_Olas"
-def compute_active_traders_dataset():
-    """Function to prepare the active traders dataset"""
-    with open(DATA_DIR / "service_map.pkl", "rb") as f:
-        service_map = pickle.load(f)
-    # read tools info
-    tools_df = pd.read_parquet(TMP_DIR / "tools.parquet")
-    # rename the request_month_year_week
-    tools_df.rename(
-        columns={"request_month_year_week": "month_year_week"}, inplace=True
-    )
-    tool_traders = tools_df.trader_address.unique()
-    mapping = check_list_addresses(tool_traders)
-    # add trader type to tools_df
-    tools_df["trader_type"] = tools_df.trader_address.apply(lambda x: mapping[x])
-    tools_df = tools_df[
-        ["month_year_week", "market_creator", "trader_type", "trader_address"]
-    ]
-    tools_df.drop_duplicates(inplace=True)
-    # read trades info
-    all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
-    # read unknown info
-    unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
-    unknown_traders["creation_timestamp"] = pd.to_datetime(
-        unknown_traders["creation_timestamp"]
-    )
-    unknown_traders["creation_timestamp"] = unknown_traders[
-        "creation_timestamp"
-    ].dt.tz_convert("UTC")
-    unknown_traders = unknown_traders.sort_values(
-        by="creation_timestamp", ascending=True
-    )
-    unknown_traders["month_year_week"] = (
-        unknown_traders["creation_timestamp"]
-        .dt.to_period("W")
-        .dt.start_time.dt.strftime("%b-%d-%Y")
-    )
-    unknown_traders["trader_type"] = "unknown"
-    unknown_traders = unknown_traders[
-        ["month_year_week", "trader_type", "market_creator", "trader_address"]
-    ]
-    unknown_traders.drop_duplicates(inplace=True)
-    all_trades["creation_timestamp"] = pd.to_datetime(all_trades["creation_timestamp"])
-    all_trades["creation_timestamp"] = all_trades["creation_timestamp"].dt.tz_convert(
-        "UTC"
-    )
-    all_trades = all_trades.sort_values(by="creation_timestamp", ascending=True)
-    all_trades["month_year_week"] = (
-        all_trades["creation_timestamp"]
-        .dt.to_period("W")
-        .dt.start_time.dt.strftime("%b-%d-%Y")
-    )
-    all_trades["trader_type"] = all_trades["staking"].apply(
-        lambda x: "non_Olas" if x == "non_Olas" else "Olas"
-    )
-    all_trades = all_trades[
-        ["month_year_week", "market_creator", "trader_type", "trader_address"]
-    ]
-    all_trades.drop_duplicates(inplace=True)
-    filtered_traders_data = pd.concat([all_trades, tools_df], axis=0)
-    filtered_traders_data.drop_duplicates(inplace=True)
-    if len(unknown_traders) > 0:
-        # merge
-        filtered_traders_data = pd.concat(
-            [filtered_traders_data, unknown_traders], axis=0
-        )
-    filtered_traders_data.to_parquet(TMP_DIR / "active_traders.parquet")
-if __name__ == "__main__":
-    compute_active_traders_dataset()

scripts/cleaning_old_info.py DELETED Viewed

@@ -1,110 +0,0 @@
-import pandas as pd
-from utils import DATA_DIR, TMP_DIR, transform_to_datetime
-def clean_old_data_from_parquet_files(cutoff_date: str):
-    print("Cleaning oldest data")
-    # Convert the string to datetime64[ns, UTC]
-    min_date_utc = pd.to_datetime(cutoff_date, format="%Y-%m-%d", utc=True)
-    # clean tools.parquet
-    try:
-        tools = pd.read_parquet(TMP_DIR / "tools.parquet")
-        # make sure creator_address is in the columns
-        assert "trader_address" in tools.columns, "trader_address column not found"
-        # lowercase and strip creator_address
-        tools["trader_address"] = tools["trader_address"].str.lower().str.strip()
-        tools["request_time"] = pd.to_datetime(tools["request_time"], utc=True)
-        print(f"length before filtering {len(tools)}")
-        tools = tools.loc[tools["request_time"] > min_date_utc]
-        print(f"length after filtering {len(tools)}")
-        tools.to_parquet(TMP_DIR / "tools.parquet", index=False)
-    except Exception as e:
-        print(f"Error cleaning tools file {e}")
-    # clean all_trades_profitability.parquet
-    try:
-        all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
-        all_trades["creation_timestamp"] = pd.to_datetime(
-            all_trades["creation_timestamp"], utc=True
-        )
-        print(f"length before filtering {len(all_trades)}")
-        all_trades = all_trades.loc[all_trades["creation_timestamp"] > min_date_utc]
-        print(f"length after filtering {len(all_trades)}")
-        all_trades.to_parquet(
-            DATA_DIR / "all_trades_profitability.parquet", index=False
-        )
-    except Exception as e:
-        print(f"Error cleaning all trades profitability file {e}")
-    # clean unknown_traders.parquet
-    try:
-        unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
-        unknown_traders["creation_timestamp"] = pd.to_datetime(
-            unknown_traders["creation_timestamp"], utc=True
-        )
-        print(f"length unknown traders before filtering {len(unknown_traders)}")
-        unknown_traders = unknown_traders.loc[
-            unknown_traders["creation_timestamp"] > min_date_utc
-        ]
-        print(f"length unknown traders after filtering {len(unknown_traders)}")
-        unknown_traders.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
-    except Exception as e:
-        print(f"Error cleaning unknown_traders file {e}")
-    # clean fpmmTrades.parquet
-    try:
-        fpmmTrades = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
-        try:
-            fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
-                lambda x: transform_to_datetime(x)
-            )
-        except Exception as e:
-            print(f"Transformation not needed")
-        fpmmTrades["creation_timestamp"] = pd.to_datetime(
-            fpmmTrades["creationTimestamp"]
-        )
-        fpmmTrades["creation_timestamp"] = pd.to_datetime(
-            fpmmTrades["creation_timestamp"], utc=True
-        )
-        print(f"length before filtering {len(fpmmTrades)}")
-        fpmmTrades = fpmmTrades.loc[fpmmTrades["creation_timestamp"] > min_date_utc]
-        print(f"length after filtering {len(fpmmTrades)}")
-        fpmmTrades.to_parquet(TMP_DIR / "fpmmTrades.parquet", index=False)
-    except Exception as e:
-        print(f"Error cleaning fpmmTrades file {e}")
-    # clean invalid trades parquet
-    try:
-        invalid_trades = pd.read_parquet(DATA_DIR / "invalid_trades.parquet")
-        invalid_trades["creation_timestamp"] = pd.to_datetime(
-            invalid_trades["creation_timestamp"], utc=True
-        )
-        print(f"length before filtering {len(invalid_trades)}")
-        invalid_trades = invalid_trades.loc[
-            invalid_trades["creation_timestamp"] > min_date_utc
-        ]
-        print(f"length after filtering {len(invalid_trades)}")
-        invalid_trades.to_parquet(DATA_DIR / "invalid_trades.parquet", index=False)
-    except Exception as e:
-        print(f"Error cleaning fpmmTrades file {e}")
-if __name__ == "__main__":
-    clean_old_data_from_parquet_files("2024-10-25")

scripts/cloud_storage.py DELETED Viewed

@@ -1,93 +0,0 @@
-from minio import Minio
-from minio.error import S3Error
-import os
-import argparse
-from utils import HIST_DIR
-MINIO_ENDPOINT = "minio.autonolas.tech"
-ACCESS_KEY = os.environ.get("CLOUD_ACCESS_KEY", None)
-SECRET_KEY = os.environ.get("CLOUD_SECRET_KEY", None)
-BUCKET_NAME = "weekly-stats"
-FOLDER_NAME = "historical_data"
-def initialize_client():
-    # Initialize the MinIO client
-    client = Minio(
-        MINIO_ENDPOINT,
-        access_key=ACCESS_KEY,
-        secret_key=SECRET_KEY,
-        secure=True,  # Set to False if not using HTTPS
-    )
-    return client
-def upload_file(client, filename: str, file_path: str) -> bool:
-    """Upload a file to the bucket"""
-    try:
-        OBJECT_NAME = FOLDER_NAME + "/" + filename
-        print(
-            f"filename={filename}, object_name={OBJECT_NAME} and file_path={file_path}"
-        )
-        client.fput_object(
-            BUCKET_NAME, OBJECT_NAME, file_path, part_size=10 * 1024 * 1024
-        )  # 10MB parts
-        print(f"File '{file_path}' uploaded as '{OBJECT_NAME}'.")
-        return True
-    except S3Error as err:
-        print(f"Error uploading file: {err}")
-        return False
-def download_file(client, filename: str, file_path: str):
-    """Download the file back"""
-    try:
-        OBJECT_NAME = FOLDER_NAME + "/" + filename
-        client.fget_object(BUCKET_NAME, OBJECT_NAME, "downloaded_" + file_path)
-        print(f"File '{OBJECT_NAME}' downloaded as 'downloaded_{file_path}'.")
-    except S3Error as err:
-        print(f"Error downloading file: {err}")
-def load_historical_file(client, filename: str) -> bool:
-    """Function to load one file into the cloud storage"""
-    file_path = filename
-    file_path = HIST_DIR / filename
-    return upload_file(client, filename, file_path)
-def upload_historical_file(filename: str):
-    client = initialize_client()
-    load_historical_file(client=client, filename=filename)
-def process_historical_files(client):
-    """Process all parquet files in historical_data folder"""
-    # Walk through all files in the folder
-    for filename in os.listdir(HIST_DIR):
-        # Check if file is a parquet file
-        if filename.endswith(".parquet"):
-            try:
-                if load_historical_file(client, filename):
-                    print(f"Successfully processed {filename}")
-                else:
-                    print("Error loading the files")
-            except Exception as e:
-                print(f"Error processing {filename}: {str(e)}")
-if __name__ == "__main__":
-    # parser = argparse.ArgumentParser(
-    #     description="Load files to the cloud storate for historical data"
-    # )
-    # parser.add_argument("param_1", type=str, help="Name of the file to upload")
-    # # Parse the arguments
-    # args = parser.parse_args()
-    # filename = args.param_1
-    client = initialize_client()
-    # load_historical_file(client, filename)
-    process_historical_files(client)

scripts/daily_data.py DELETED Viewed

@@ -1,61 +0,0 @@
-import logging
-from utils import measure_execution_time, DATA_DIR, TMP_DIR
-from profitability import (
-    analyse_all_traders,
-    label_trades_by_staking,
-)
-import pandas as pd
-from nr_mech_calls import (
-    create_unknown_traders_df,
-    compute_daily_mech_calls,
-    transform_to_datetime,
-)
-from markets import check_current_week_data
-from staking import generate_retention_activity_file
-logging.basicConfig(level=logging.INFO)
-@measure_execution_time
-def prepare_live_metrics(
-    tools_filename="new_tools.parquet", trades_filename="new_fpmmTrades.parquet"
-):
-    fpmmTrades = pd.read_parquet(TMP_DIR / trades_filename)
-    tools = pd.read_parquet(TMP_DIR / tools_filename)
-    # TODO if monday data of the week is missing in new_fpmmTrades then take it from the general file
-    try:
-        fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
-            lambda x: transform_to_datetime(x)
-        )
-    except Exception as e:
-        print(f"Transformation not needed")
-    # check missing data from Monday
-    fpmmTrades = check_current_week_data(fpmmTrades)
-    print("Computing the estimated mech calls dataset")
-    trader_mech_calls = compute_daily_mech_calls(fpmmTrades=fpmmTrades, tools=tools)
-    print("Analysing trades...")
-    all_trades_df = analyse_all_traders(fpmmTrades, trader_mech_calls, daily_info=True)
-    # staking label
-    all_trades_df = label_trades_by_staking(all_trades_df)
-    # create the unknown traders dataset
-    unknown_traders_df, all_trades_df = create_unknown_traders_df(
-        trades_df=all_trades_df
-    )
-    unknown_traders_df.to_parquet(
-        TMP_DIR / "unknown_daily_traders.parquet", index=False
-    )
-    # save into a separate file
-    all_trades_df.to_parquet(DATA_DIR / "daily_info.parquet", index=False)
-    # prepare the retention info file
-    generate_retention_activity_file()
-if __name__ == "__main__":
-    prepare_live_metrics()
-    # generate_retention_activity_file()

scripts/get_mech_info.py DELETED Viewed

@@ -1,322 +0,0 @@
-from string import Template
-from typing import Any
-from datetime import datetime, timedelta, UTC
-from utils import (
-    SUBGRAPH_API_KEY,
-    measure_execution_time,
-    DATA_DIR,
-    TMP_DIR,
-    NETWORK_SUBGRAPH_URL,
-    transform_to_datetime,
-)
-import requests
-import pandas as pd
-import numpy as np
-from mech_request_utils import (
-    collect_all_mech_delivers,
-    collect_all_mech_requests,
-    clean_mech_delivers,
-    fix_duplicate_requestIds,
-    merge_requests_delivers,
-    get_ipfs_data,
-    merge_json_files,
-)
-SUBGRAPH_HEADERS = {
-    "Accept": "application/json, multipart/mixed",
-    "Content-Type": "application/json",
-}
-QUERY_BATCH_SIZE = 1000
-DATETIME_60_DAYS_AGO = datetime.now(UTC) - timedelta(days=60)
-DATETIME_10_DAYS_AGO = datetime.now(UTC) - timedelta(days=10)
-DATETIME_10_HOURS_AGO = datetime.now(UTC) - timedelta(hours=10)
-BLOCK_NUMBER = Template(
-    """
-    {
-        blocks(
-            first: 1,
-            orderBy: timestamp,
-            orderDirection: asc,
-            where: {
-                timestamp_gte: "${timestamp_from}",
-                timestamp_lte: "${timestamp_to}"
-            }
-        ){
-            id,
-            number,
-        }
-    }
-    """
-)
-LATEST_BLOCK_QUERY = """
-    {
-        blocks(
-            first: 1,
-            orderBy: timestamp,
-            orderDirection: desc,
-        ){
-            id,
-            number,
-        }
-    }
-    """
-def fetch_last_block_number() -> dict:
-    # print(f"Sending query for the subgraph = {query}")
-    network_subgraph_url = NETWORK_SUBGRAPH_URL.substitute(
-        subgraph_api_key=SUBGRAPH_API_KEY
-    )
-    query = LATEST_BLOCK_QUERY
-    response = requests.post(
-        network_subgraph_url,
-        headers=SUBGRAPH_HEADERS,
-        json={"query": query},
-        timeout=300,
-    )
-    result_json = response.json()
-    print(f"Response of the query={result_json}")
-    blocks = result_json.get("data", {}).get("blocks", "")
-    if len(blocks) == 0:
-        raise ValueError(f"The query {query} did not return any results")
-    return blocks[0]
-def fetch_block_number(timestamp_from: int, timestamp_to: int) -> dict:
-    """Get a block number by its timestamp margins."""
-    query = BLOCK_NUMBER.substitute(
-        timestamp_from=timestamp_from, timestamp_to=timestamp_to
-    )
-    # print(f"Sending query for the subgraph = {query}")
-    network_subgraph_url = NETWORK_SUBGRAPH_URL.substitute(
-        subgraph_api_key=SUBGRAPH_API_KEY
-    )
-    response = requests.post(
-        network_subgraph_url,
-        headers=SUBGRAPH_HEADERS,
-        json={"query": query},
-        timeout=300,
-    )
-    # print(f"block query: {query}")
-    result_json = response.json()
-    print(f"Response of the query={result_json}")
-    blocks = result_json.get("data", {}).get("blocks", "")
-    if len(blocks) == 0:
-        raise ValueError(f"The query {query} did not return any results")
-    return blocks[0]
-def update_json_files():
-    merge_json_files("mech_requests.json", "new_mech_requests.json")
-    merge_json_files("mech_delivers.json", "new_mech_delivers.json")
-    merge_json_files("merged_requests.json", "new_merged_requests.json")
-    merge_json_files("tools_info.json", "new_tools_info.json")
-def update_all_trades_parquet(new_trades_df: pd.DataFrame) -> pd.DataFrame:
-    # Read old all_trades parquet file
-    try:
-        old_trades_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
-    except Exception as e:
-        print(f"Error reading old trades parquet file {e}")
-        return None
-    # merge two dataframes
-    merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)
-    # Check for duplicates
-    print(f"Initial length before removing duplicates in all_trades= {len(merge_df)}")
-    # Remove duplicates
-    merge_df.drop_duplicates("trade_id", inplace=True)
-    print(f"Final length after removing duplicates in all_trades = {len(merge_df)}")
-    return merge_df
-def update_tools_parquet(new_tools_filename: pd.DataFrame):
-    try:
-        old_tools_df = pd.read_parquet(TMP_DIR / "tools.parquet")
-    except Exception as e:
-        print(f"Error reading old tools parquet file {e}")
-        return None
-    try:
-        new_tools_df = pd.read_parquet(DATA_DIR / new_tools_filename)
-    except Exception as e:
-        print(f"Error reading new trades parquet file {e}")
-        return None
-    # merge two dataframes
-    merge_df = pd.concat([old_tools_df, new_tools_df], ignore_index=True)
-    # Check for duplicates
-    print(f"Initial length before removing duplicates in tools= {len(merge_df)}")
-    # Remove duplicates
-    merge_df.drop_duplicates(
-        subset=["request_id", "request_time"], keep="last", inplace=True
-    )
-    print(f"Final length after removing duplicates in tools= {len(merge_df)}")
-    # save the parquet file
-    merge_df.to_parquet(TMP_DIR / "tools.parquet", index=False)
-def get_mech_info_2024() -> dict[str, Any]:
-    """Query the subgraph to get the 2024 information from mech."""
-    date = "2024-01-01"
-    datetime_jan_2024 = datetime.strptime(date, "%Y-%m-%d")
-    timestamp_jan_2024 = int(datetime_jan_2024.timestamp())
-    margin = timedelta(seconds=5)
-    timestamp_jan_2024_plus_margin = int((datetime_jan_2024 + margin).timestamp())
-    jan_block_number = fetch_block_number(
-        timestamp_jan_2024, timestamp_jan_2024_plus_margin
-    )
-    # expecting only one block
-    jan_block_number = jan_block_number.get("number", "")
-    if jan_block_number.isdigit():
-        jan_block_number = int(jan_block_number)
-    if jan_block_number == "":
-        raise ValueError(
-            "Could not find a valid block number for the first of January 2024"
-        )
-    MECH_TO_INFO = {
-        # this block number is when the creator had its first tx ever, and after this mech's creation
-        "0xff82123dfb52ab75c417195c5fdb87630145ae81": (
-            "old_mech_abi.json",
-            jan_block_number,
-        ),
-        # this block number is when this mech was created
-        "0x77af31de935740567cf4ff1986d04b2c964a786a": (
-            "new_mech_abi.json",
-            jan_block_number,
-        ),
-    }
-    return MECH_TO_INFO
-def get_last_block_number() -> int:
-    last_block_number = fetch_last_block_number()
-    # expecting only one block
-    last_block_number = last_block_number.get("number", "")
-    if last_block_number.isdigit():
-        last_block_number = int(last_block_number)
-    if last_block_number == "":
-        raise ValueError("Could not find a valid block number for last month data")
-    return last_block_number
-def get_last_60_days_block_number() -> int:
-    timestamp_60_days_ago = int((DATETIME_60_DAYS_AGO).timestamp())
-    margin = timedelta(seconds=5)
-    timestamp_60_days_ago_plus_margin = int((DATETIME_60_DAYS_AGO + margin).timestamp())
-    last_month_block_number = fetch_block_number(
-        timestamp_60_days_ago, timestamp_60_days_ago_plus_margin
-    )
-    # expecting only one block
-    last_month_block_number = last_month_block_number.get("number", "")
-    if last_month_block_number.isdigit():
-        last_month_block_number = int(last_month_block_number)
-    if last_month_block_number == "":
-        raise ValueError("Could not find a valid block number for last month data")
-    return last_month_block_number
-def get_mech_info_last_60_days() -> dict[str, Any]:
-    """Query the subgraph to get the last 60 days of information from mech."""
-    last_month_block_number = get_last_60_days_block_number()
-    MECH_TO_INFO = {
-        # this block number is when the creator had its first tx ever, and after this mech's creation
-        "0xff82123dfb52ab75c417195c5fdb87630145ae81": (
-            "old_mech_abi.json",
-            last_month_block_number,
-        ),
-        # this block number is when this mech was created
-        "0x77af31de935740567cf4ff1986d04b2c964a786a": (
-            "new_mech_abi.json",
-            last_month_block_number,
-        ),
-    }
-    print(f"last 60 days block number {last_month_block_number}")
-    return MECH_TO_INFO
-@measure_execution_time
-def get_mech_events_since_last_run(logger):
-    """Function to download only the new events since the last execution."""
-    # Read the latest date from stored data
-    try:
-        all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
-        # latest_timestamp = max(all_trades.creation_timestamp)
-        cutoff_date = "2025-01-13"
-        latest_timestamp = pd.Timestamp(
-            datetime.strptime(cutoff_date, "%Y-%m-%d")
-        ).tz_localize("UTC")
-        print(f"Updating data since {latest_timestamp}")
-    except Exception:
-        print("Error while reading the profitability parquet file")
-        return None
-    # Get the block number of lastest date
-    five_seconds = np.timedelta64(5, "s")
-    last_run_block_number = fetch_block_number(
-        int(latest_timestamp.timestamp()),
-        int((latest_timestamp + five_seconds).timestamp()),
-    )
-    # expecting only one block
-    last_run_block_number = last_run_block_number.get("number", "")
-    if last_run_block_number.isdigit():
-        last_run_block_number = int(last_run_block_number)
-    if last_run_block_number == "":
-        raise ValueError("Could not find a valid block number for last collected data")
-    last_block_number = get_last_block_number()
-    # mech requests
-    requests_dict, duplicatedReqId, nr_errors = collect_all_mech_requests(
-        from_block=last_run_block_number,
-        to_block=last_block_number,
-        filename="new_mech_requests.json",
-    )
-    print(f"NUMBER OF MECH REQUEST ERRORS={nr_errors}")
-    # mech delivers
-    delivers_dict, duplicatedIds, nr_errors = collect_all_mech_delivers(
-        from_block=last_run_block_number,
-        to_block=last_block_number,
-        filename="new_mech_delivers.json",
-    )
-    print(f"NUMBER OF MECH DELIVER ERRORS={nr_errors}")
-    if delivers_dict is None:
-        return None
-    # clean delivers
-    clean_mech_delivers("new_mech_requests.json", "new_mech_delivers.json")
-    # solve duplicated requestIds
-    block_map = fix_duplicate_requestIds(
-        "new_mech_requests.json", "new_mech_delivers.json"
-    )
-    # merge the two files into one source
-    not_found = merge_requests_delivers(
-        "new_mech_requests.json", "new_mech_delivers.json", "new_merged_requests.json"
-    )
-    # Add ipfs contents
-    get_ipfs_data("new_merged_requests.json", "new_tools_info.json", logger)
-    return latest_timestamp
-if __name__ == "__main__":
-    get_mech_events_since_last_run()
-    # result = get_mech_info_last_60_days()
-    # print(result)

scripts/gnosis_timestamps.py DELETED Viewed

@@ -1,186 +0,0 @@
-from web3 import Web3
-import os
-import requests
-import time
-import pickle
-from datetime import datetime, timezone
-from functools import partial
-import pandas as pd
-import pytz
-from tqdm import tqdm
-from utils import DATA_DIR, TMP_DIR, measure_execution_time
-from concurrent.futures import ThreadPoolExecutor
-GNOSIS_API_INTERVAL = 0.2  # 5 calls in 1 second
-GNOSIS_URL = "https://api.gnosisscan.io/api"
-GNOSIS_API_KEY = os.environ.get("GNOSIS_API_KEY", None)
-# https://api.gnosisscan.io/api?module=account&action=txlist&address=0x1fe2b09de07475b1027b0c73a5bf52693b31a52e&startblock=36626348&endblock=36626348&page=1&offset=10&sort=asc&apikey=${gnosis_api_key}""
-# Connect to Gnosis Chain RPC
-w3 = Web3(Web3.HTTPProvider("https://rpc.gnosischain.com"))
-def parallelize_timestamp_computation(df: pd.DataFrame, function: callable) -> list:
-    """Parallelize the timestamp conversion."""
-    tx_hashes = df["tx_hash"].tolist()
-    with ThreadPoolExecutor(max_workers=10) as executor:
-        results = list(tqdm(executor.map(function, tx_hashes), total=len(tx_hashes)))
-    return results
-def transform_timestamp_to_datetime(timestamp):
-    dt = datetime.fromtimestamp(timestamp, timezone.utc)
-    return dt
-def get_tx_hash(trader_address, request_block):
-    """Function to get the transaction hash from the address and block number"""
-    params = {
-        "module": "account",
-        "action": "txlist",
-        "address": trader_address,
-        "page": 1,
-        "offset": 100,
-        "startblock": request_block,
-        "endblock": request_block,
-        "sort": "asc",
-        "apikey": GNOSIS_API_KEY,
-    }
-    try:
-        response = requests.get(GNOSIS_URL, params=params)
-        tx_list = response.json()["result"]
-        time.sleep(GNOSIS_API_INTERVAL)
-        if len(tx_list) > 1:
-            raise ValueError("More than one transaction found")
-        return tx_list[0]["hash"]
-    except Exception as e:
-        return None
-def add_tx_hash_info(filename: str = "tools.parquet"):
-    """Function to add the hash info to the saved tools parquet file"""
-    tools = pd.read_parquet(DATA_DIR / filename)
-    tools["tx_hash"] = None
-    total_errors = 0
-    for i, mech_request in tqdm(
-        tools.iterrows(), total=len(tools), desc="Adding tx hash"
-    ):
-        try:
-            trader_address = mech_request["trader_address"]
-            block_number = mech_request["request_block"]
-            tools.at[i, "tx_hash"] = get_tx_hash(
-                trader_address=trader_address, request_block=block_number
-            )
-        except Exception as e:
-            print(f"Error with mech request {mech_request}")
-            total_errors += 1
-            continue
-    print(f"Total number of errors = {total_errors}")
-    tools.to_parquet(DATA_DIR / filename)
-def get_transaction_timestamp(tx_hash: str, web3: Web3):
-    try:
-        # Get transaction data
-        tx = web3.eth.get_transaction(tx_hash)
-        # Get block data
-        block = web3.eth.get_block(tx["blockNumber"])
-        # Get timestamp
-        timestamp = block["timestamp"]
-        # Convert to datetime
-        dt = datetime.fromtimestamp(timestamp, tz=pytz.UTC)
-        # return {
-        #     "timestamp": timestamp,
-        #     "datetime": dt,
-        #     "from_address": tx["from"],
-        #     "to_address": tx["to"],
-        #     "success": True,
-        # }
-        return dt.strftime("%Y-%m-%d %H:%M:%S")
-    except Exception as e:
-        print(f"Error getting the timestamp from {tx_hash}")
-        return None
-@measure_execution_time
-def compute_request_time(tools_df: pd.DataFrame) -> pd.DataFrame:
-    """Function to compute the request timestamp from the tx hash"""
-    # read the local info
-    try:
-        gnosis_info = pickle.load(open(TMP_DIR / "gnosis_info.pkl", "rb"))
-    except Exception:
-        print("File not found or not created. Creating a new one")
-        gnosis_info = {}
-    # any previous information?
-    tools_df["request_time"] = tools_df["tx_hash"].map(gnosis_info)
-    # Identify tools with missing request_time and fill them
-    missing_time_indices = tools_df[tools_df["request_time"].isna()].index
-    print(f"length of missing_time_indices = {len(missing_time_indices)}")
-    # traverse all tx hashes and get the timestamp of each tx
-    partial_mech_request_timestamp = partial(get_transaction_timestamp, web3=w3)
-    missing_timestamps = parallelize_timestamp_computation(
-        tools_df.loc[missing_time_indices], partial_mech_request_timestamp
-    )
-    # Update the original DataFrame with the missing timestamps
-    for i, timestamp in zip(missing_time_indices, missing_timestamps):
-        tools_df.at[i, "request_time"] = timestamp
-    # creating other time fields
-    tools_df["request_month_year"] = pd.to_datetime(
-        tools_df["request_time"]
-    ).dt.strftime("%Y-%m")
-    tools_df["request_month_year_week"] = (
-        pd.to_datetime(tools_df["request_time"])
-        .dt.to_period("W")
-        .dt.start_time.dt.strftime("%b-%d-%Y")
-    )
-    # Update t_map with new timestamps
-    new_timestamps = (
-        tools_df[["tx_hash", "request_time"]]
-        .dropna()
-        .set_index("tx_hash")
-        .to_dict()["request_time"]
-    )
-    gnosis_info.update(new_timestamps)
-    # saving  gnosis info
-    with open(TMP_DIR / "gnosis_info.pkl", "wb") as f:
-        pickle.dump(gnosis_info, f)
-    return tools_df
-def get_account_details(address):
-    # gnosis_url = GNOSIS_URL.substitute(gnosis_api_key=GNOSIS_API_KEY, tx_hash=tx_hash)
-    params = {
-        "module": "account",
-        "action": "txlistinternal",
-        "address": address,
-        #'page': 1,
-        #'offset': 100,
-        #'startblock': 0,
-        #'endblock': 9999999999,
-        #'sort': 'asc',
-        "apikey": GNOSIS_API_KEY,
-    }
-    try:
-        response = requests.get(GNOSIS_URL, params=params)
-        return response.json()
-    except Exception as e:
-        return {"error": str(e)}
-if __name__ == "__main__":
-    # tx_data = "0x783BFA045BDE2D0BCD65280D97A29E7BD9E4FDC10985848690C9797E767140F4"
-    new_tools = pd.read_parquet(DATA_DIR / "new_tools.parquet")
-    new_tools = compute_request_time(new_tools)
-    new_tools.to_parquet(DATA_DIR / "new_tools.parquet")
-    # result = get_tx_hash("0x1fe2b09de07475b1027b0c73a5bf52693b31a52e", 36626348)
-    # print(result)

scripts/manage_space_files.py DELETED Viewed

@@ -1,40 +0,0 @@
-import os
-import shutil
-# Define the file names to move
-files_to_move = [
-    "new_tools.parquet",
-    "new_fpmmTrades.parquet",
-    "fpmms.parquet",
-    "fpmmTrades.parquet",
-]
-# Get the current working directory
-current_dir = os.getcwd()
-# Define source and destination paths
-source_dir = os.path.join(current_dir, "data")
-dest_dir = os.path.join(current_dir, "tmp")
-def move_files():
-    # Create tmp directory if it doesn't exist
-    if not os.path.exists(dest_dir):
-        os.makedirs(dest_dir)
-    # Move each file
-    for file_name in files_to_move:
-        source_file = os.path.join(source_dir, file_name)
-        dest_file = os.path.join(dest_dir, file_name)
-        try:
-            if os.path.exists(source_file):
-                shutil.move(source_file, dest_file)
-                print(f"Moved {file_name} successfully")
-            else:
-                print(f"File not found: {file_name}")
-        except Exception as e:
-            print(f"Error moving {file_name}: {str(e)}")
-if __name__ == "__main__":
-    move_files()

scripts/markets.py DELETED Viewed

@@ -1,464 +0,0 @@
-#   -*- coding: utf-8 -*-
-#   ------------------------------------------------------------------------------
-#
-#     Copyright 2023 Valory AG
-#
-#     Licensed under the Apache License, Version 2.0 (the "License");
-#     you may not use this file except in compliance with the License.
-#     You may obtain a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#     Unless required by applicable law or agreed to in writing, software
-#     distributed under the License is distributed on an "AS IS" BASIS,
-#     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#     See the License for the specific language governing permissions and
-#     limitations under the License.
-#
-#   ------------------------------------------------------------------------------
-import functools
-import warnings
-from datetime import datetime, timedelta
-from typing import Optional, Generator, Callable
-import pandas as pd
-import requests
-from tqdm import tqdm
-from typing import List, Dict
-from utils import SUBGRAPH_API_KEY, DATA_DIR, TMP_DIR, transform_to_datetime
-from web3_utils import (
-    FPMM_QS_CREATOR,
-    FPMM_PEARL_CREATOR,
-    query_omen_xdai_subgraph,
-    OMEN_SUBGRAPH_URL,
-)
-from queries import (
-    FPMMS_QUERY,
-    ID_FIELD,
-    DATA_FIELD,
-    ANSWER_FIELD,
-    QUERY_FIELD,
-    TITLE_FIELD,
-    OUTCOMES_FIELD,
-    ERROR_FIELD,
-    QUESTION_FIELD,
-    FPMMS_FIELD,
-)
-ResponseItemType = List[Dict[str, str]]
-SubgraphResponseType = Dict[str, ResponseItemType]
-BATCH_SIZE = 1000
-DEFAULT_TO_TIMESTAMP = 2147483647  # around year 2038
-DEFAULT_FROM_TIMESTAMP = 0
-MAX_UINT_HEX = "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
-DEFAULT_FILENAME = "fpmms.parquet"
-market_creators_map = {"quickstart": FPMM_QS_CREATOR, "pearl": FPMM_PEARL_CREATOR}
-class RetriesExceeded(Exception):
-    """Exception to raise when retries are exceeded during data-fetching."""
-    def __init__(
-        self, msg="Maximum retries were exceeded while trying to fetch the data!"
-    ):
-        super().__init__(msg)
-def hacky_retry(func: Callable, n_retries: int = 3) -> Callable:
-    """Create a hacky retry strategy.
-        Unfortunately, we cannot use `requests.packages.urllib3.util.retry.Retry`,
-        because the subgraph does not return the appropriate status codes in case of failure.
-        Instead, it always returns code 200. Thus, we raise exceptions manually inside `make_request`,
-        catch those exceptions in the hacky retry decorator and try again.
-        Finally, if the allowed number of retries is exceeded, we raise a custom `RetriesExceeded` exception.
-    :param func: the input request function.
-    :param n_retries: the maximum allowed number of retries.
-    :return: The request method with the hacky retry strategy applied.
-    """
-    @functools.wraps(func)
-    def wrapper_hacky_retry(*args, **kwargs) -> SubgraphResponseType:
-        """The wrapper for the hacky retry.
-        :return: a response dictionary.
-        """
-        retried = 0
-        while retried <= n_retries:
-            try:
-                if retried > 0:
-                    warnings.warn(f"Retrying {retried}/{n_retries}...")
-                return func(*args, **kwargs)
-            except (ValueError, ConnectionError) as e:
-                warnings.warn(e.args[0])
-            finally:
-                retried += 1
-        raise RetriesExceeded()
-    return wrapper_hacky_retry
-@hacky_retry
-def query_subgraph(url: str, query: str, key: str) -> SubgraphResponseType:
-    """Query a subgraph.
-    Args:
-        url: the subgraph's URL.
-        query: the query to be used.
-        key: the key to use in order to access the required data.
-    Returns:
-        a response dictionary.
-    """
-    content = {QUERY_FIELD: query}
-    headers = {
-        "Accept": "application/json",
-        "Content-Type": "application/json",
-    }
-    res = requests.post(url, json=content, headers=headers)
-    if res.status_code != 200:
-        raise ConnectionError(
-            "Something went wrong while trying to communicate with the subgraph "
-            f"(Error: {res.status_code})!\n{res.text}"
-        )
-    body = res.json()
-    if ERROR_FIELD in body.keys():
-        raise ValueError(f"The given query is not correct: {body[ERROR_FIELD]}")
-    data = body.get(DATA_FIELD, {}).get(key, None)
-    if data is None:
-        raise ValueError(f"Unknown error encountered!\nRaw response: \n{body}")
-    return data
-def transform_fpmmTrades(df: pd.DataFrame) -> pd.DataFrame:
-    print("Transforming trades dataframe")
-    # convert creator to address
-    df["creator"] = df["creator"].apply(lambda x: x["id"])
-    # normalize fpmm column
-    fpmm = pd.json_normalize(df["fpmm"])
-    fpmm.columns = [f"fpmm.{col}" for col in fpmm.columns]
-    df = pd.concat([df, fpmm], axis=1)
-    # drop fpmm column
-    df.drop(["fpmm"], axis=1, inplace=True)
-    # change creator to creator_address
-    df.rename(columns={"creator": "trader_address"}, inplace=True)
-    return df
-def create_fpmmTrades(
-    from_timestamp: int = DEFAULT_FROM_TIMESTAMP,
-    to_timestamp: int = DEFAULT_TO_TIMESTAMP,
-):
-    """Create fpmmTrades for all trades."""
-    print("Getting trades from Quickstart markets")
-    # Quickstart trades
-    qs_trades_json = query_omen_xdai_subgraph(
-        trader_category="quickstart",
-        from_timestamp=from_timestamp,
-        to_timestamp=to_timestamp,
-        fpmm_from_timestamp=from_timestamp,
-        fpmm_to_timestamp=to_timestamp,
-    )
-    print(f"length of the qs_trades_json dataset {len(qs_trades_json)}")
-    # convert to dataframe
-    qs_df = pd.DataFrame(qs_trades_json["data"]["fpmmTrades"])
-    qs_df["market_creator"] = "quickstart"
-    qs_df = transform_fpmmTrades(qs_df)
-    # Pearl trades
-    print("Getting trades from Pearl markets")
-    pearl_trades_json = query_omen_xdai_subgraph(
-        trader_category="pearl",
-        from_timestamp=from_timestamp,
-        to_timestamp=DEFAULT_TO_TIMESTAMP,
-        fpmm_from_timestamp=from_timestamp,
-        fpmm_to_timestamp=DEFAULT_TO_TIMESTAMP,
-    )
-    print(f"length of the pearl_trades_json dataset {len(pearl_trades_json)}")
-    # convert to dataframe
-    pearl_df = pd.DataFrame(pearl_trades_json["data"]["fpmmTrades"])
-    pearl_df["market_creator"] = "pearl"
-    pearl_df = transform_fpmmTrades(pearl_df)
-    return pd.concat([qs_df, pearl_df], ignore_index=True)
-def fpmms_fetcher(trader_category: str) -> Generator[ResponseItemType, int, None]:
-    """An indefinite fetcher for the FPMMs."""
-    omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
-    print(f"omen_subgraph = {omen_subgraph}")
-    if trader_category == "pearl":
-        creator_id = FPMM_PEARL_CREATOR
-    else:  # quickstart
-        creator_id = FPMM_QS_CREATOR
-    while True:
-        fpmm_id = yield
-        fpmms_query = FPMMS_QUERY.substitute(
-            creator=creator_id,
-            fpmm_id=fpmm_id,
-            fpmms_field=FPMMS_FIELD,
-            first=BATCH_SIZE,
-            id_field=ID_FIELD,
-            answer_field=ANSWER_FIELD,
-            question_field=QUESTION_FIELD,
-            outcomes_field=OUTCOMES_FIELD,
-            title_field=TITLE_FIELD,
-        )
-        print(f"markets query = {fpmms_query}")
-        yield query_subgraph(omen_subgraph, fpmms_query, FPMMS_FIELD)
-def fetch_qs_fpmms() -> pd.DataFrame:
-    """Fetch all the fpmms of the creator."""
-    latest_id = ""
-    fpmms = []
-    trader_category = "quickstart"
-    print(f"Getting markets for {trader_category}")
-    fetcher = fpmms_fetcher(trader_category)
-    for _ in tqdm(fetcher, unit="fpmms", unit_scale=BATCH_SIZE):
-        batch = fetcher.send(latest_id)
-        if len(batch) == 0:
-            break
-        latest_id = batch[-1].get(ID_FIELD, "")
-        if latest_id == "":
-            raise ValueError(f"Unexpected data format retrieved: {batch}")
-        fpmms.extend(batch)
-    return pd.DataFrame(fpmms)
-def fetch_pearl_fpmms() -> pd.DataFrame:
-    """Fetch all the fpmms of the creator."""
-    latest_id = ""
-    fpmms = []
-    trader_category = "pearl"
-    print(f"Getting markets for {trader_category}")
-    fetcher = fpmms_fetcher(trader_category)
-    for _ in tqdm(fetcher, unit="fpmms", unit_scale=BATCH_SIZE):
-        batch = fetcher.send(latest_id)
-        if len(batch) == 0:
-            break
-        latest_id = batch[-1].get(ID_FIELD, "")
-        if latest_id == "":
-            raise ValueError(f"Unexpected data format retrieved: {batch}")
-        fpmms.extend(batch)
-    return pd.DataFrame(fpmms)
-def get_answer(fpmm: pd.Series) -> str:
-    """Get an answer from its index, using Series of an FPMM."""
-    return fpmm[QUESTION_FIELD][OUTCOMES_FIELD][fpmm[ANSWER_FIELD]]
-def transform_fpmms(fpmms: pd.DataFrame) -> pd.DataFrame:
-    """Transform an FPMMS dataframe."""
-    transformed = fpmms.dropna()
-    transformed = transformed.drop_duplicates([ID_FIELD])
-    transformed = transformed.loc[transformed[ANSWER_FIELD] != MAX_UINT_HEX]
-    transformed.loc[:, ANSWER_FIELD] = (
-        transformed[ANSWER_FIELD].str.slice(-1).astype(int)
-    )
-    transformed.loc[:, ANSWER_FIELD] = transformed.apply(get_answer, axis=1)
-    transformed = transformed.drop(columns=[QUESTION_FIELD])
-    return transformed
-def etl(filename: Optional[str] = None) -> pd.DataFrame:
-    """Fetch, process, store and return the markets as a Dataframe."""
-    qs_fpmms = fetch_qs_fpmms()
-    qs_fpmms = transform_fpmms(qs_fpmms)
-    qs_fpmms["market_creator"] = "quickstart"
-    print(f"Results for the market creator quickstart. Len = {len(qs_fpmms)}")
-    pearl_fpmms = fetch_pearl_fpmms()
-    pearl_fpmms = transform_fpmms(pearl_fpmms)
-    pearl_fpmms["market_creator"] = "pearl"
-    print(f"Results for the market creator pearl. Len = {len(pearl_fpmms)}")
-    fpmms = pd.concat([qs_fpmms, pearl_fpmms], ignore_index=True)
-    if filename:
-        fpmms.to_parquet(DATA_DIR / filename, index=False)
-    return fpmms
-def read_global_trades_file() -> pd.DataFrame:
-    try:
-        trades_filename = "fpmmTrades.parquet"
-        fpmms_trades = pd.read_parquet(TMP_DIR / trades_filename)
-    except FileNotFoundError:
-        print("Error: fpmmTrades.parquet not found. No market creator added")
-        return
-    return fpmms_trades
-def add_market_creator(tools: pd.DataFrame) -> None:
-    # Check if fpmmTrades.parquet is in the same directory
-    fpmms_trades = read_global_trades_file()
-    tools["market_creator"] = ""
-    # traverse the list of traders
-    tools_no_market_creator = 0
-    traders_list = list(tools.trader_address.unique())
-    for trader_address in traders_list:
-        market_creator = ""
-        try:
-            trades = fpmms_trades[fpmms_trades["trader_address"] == trader_address]
-            market_creator = trades.iloc[0]["market_creator"]  # first value is enough
-        except Exception:
-            print(f"ERROR getting the market creator of {trader_address}")
-            tools_no_market_creator += 1
-            continue
-        # update
-        tools.loc[tools["trader_address"] == trader_address, "market_creator"] = (
-            market_creator
-        )
-    # filter those tools where we don't have market creator info
-    tools = tools.loc[tools["market_creator"] != ""]
-    print(f"Number of tools with no market creator info = {tools_no_market_creator}")
-    return tools
-def fpmmTrades_etl(
-    trades_filename: str, from_timestamp: int, to_timestamp: int = DEFAULT_TO_TIMESTAMP
-) -> None:
-    print("Generating the trades file")
-    try:
-        fpmmTrades = create_fpmmTrades(
-            from_timestamp=from_timestamp, to_timestamp=to_timestamp
-        )
-    except FileNotFoundError:
-        print(f"Error creating {trades_filename} file .")
-    # make sure trader_address is in the columns
-    assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
-    # lowercase and strip creator_address
-    fpmmTrades["trader_address"] = fpmmTrades["trader_address"].str.lower().str.strip()
-    fpmmTrades.to_parquet(DATA_DIR / trades_filename, index=False)
-    return fpmmTrades
-def check_current_week_data(trades_df: pd.DataFrame) -> pd.DataFrame:
-    """Function to check if all current weeks data is present, if not, then add the missing data from previous file"""
-    # Get current date
-    now = datetime.now()
-    # Get start of the current week (Monday)
-    start_of_week = now - timedelta(days=now.weekday())
-    start_of_week = start_of_week.replace(hour=0, minute=0, second=0, microsecond=0)
-    print(f"start of the week = {start_of_week}")
-    trades_df["creation_timestamp"] = pd.to_datetime(trades_df["creationTimestamp"])
-    trades_df["creation_date"] = trades_df["creation_timestamp"].dt.date
-    trades_df["creation_date"] = pd.to_datetime(trades_df["creation_date"])
-    # Check dataframe
-    min_date = min(trades_df.creation_date)
-    if min_date > start_of_week:
-        # missing data of current week in the trades file
-        fpmms_trades = read_global_trades_file()
-        # get missing data
-        missing_data = fpmms_trades[
-            (fpmms_trades["creation_date"] >= start_of_week)
-            & (fpmms_trades["creation_date"] < min_date)
-        ]
-        merge_df = pd.concat([trades_df, missing_data], ignore_index=True)
-        merge_df.drop_duplicates("id", keep="last", inplace=True)
-        return merge_df
-    # no update needed
-    return trades_df
-def update_fpmmTrades_parquet(trades_filename: str) -> pd.DataFrame:
-    # Read old trades parquet file
-    try:
-        old_trades_df = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
-    except Exception as e:
-        print(f"Error reading old trades parquet file {e}")
-        return None
-    try:
-        new_trades_df = pd.read_parquet(DATA_DIR / trades_filename)
-    except Exception as e:
-        print(f"Error reading new trades parquet file {e}")
-        return None
-    # lowercase and strip creator_address
-    new_trades_df["trader_address"] = (
-        new_trades_df["trader_address"].str.lower().str.strip()
-    )
-    # ensure creationTimestamp compatibility
-    try:
-        new_trades_df["creationTimestamp"] = new_trades_df["creationTimestamp"].apply(
-            lambda x: transform_to_datetime(x)
-        )
-    except Exception as e:
-        print(f"Transformation not needed")
-    try:
-        old_trades_df["creationTimestamp"] = old_trades_df["creationTimestamp"].apply(
-            lambda x: transform_to_datetime(x)
-        )
-    except Exception as e:
-        print(f"Transformation not needed")
-    # merge two dataframes
-    merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)
-    # avoid numpy objects
-    merge_df["fpmm.arbitrationOccurred"] = merge_df["fpmm.arbitrationOccurred"].astype(
-        bool
-    )
-    merge_df["fpmm.isPendingArbitration"] = merge_df[
-        "fpmm.isPendingArbitration"
-    ].astype(bool)
-    # Check for duplicates
-    print(f"Initial length before removing duplicates in fpmmTrades= {len(merge_df)}")
-    # Remove duplicates
-    # fpmm.outcomes is a numpy array
-    merge_df.drop_duplicates("id", keep="last", inplace=True)
-    print(f"Final length after removing duplicates in fpmmTrades= {len(merge_df)}")
-    # save the parquet file
-    merge_df.to_parquet(TMP_DIR / "fpmmTrades.parquet", index=False)
-    return
-def update_fpmmTrades(from_date: str):
-    from_timestamp = pd.Timestamp(datetime.strptime(from_date, "%Y-%m-%d")).tz_localize(
-        "UTC"
-    )
-    fpmmTrades_etl(
-        trades_filename="new_fpmmTrades.parquet",
-        from_timestamp=int(from_timestamp.timestamp()),
-    )
-    update_fpmmTrades_parquet("new_fpmmTrades.parquet")
-if __name__ == "__main__":
-    cutoff_date = "2025-01-13"
-    update_fpmmTrades(cutoff_date)

scripts/mech_request_utils.py DELETED Viewed

@@ -1,603 +0,0 @@
-# -*- coding: utf-8 -*-
-# ------------------------------------------------------------------------------
-#
-#   Copyright 2024 Valory AG
-#
-#   Licensed under the Apache License, Version 2.0 (the "License");
-#   you may not use this file except in compliance with the License.
-#   You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#   Unless required by applicable law or agreed to in writing, software
-#   distributed under the License is distributed on an "AS IS" BASIS,
-#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#   See the License for the specific language governing permissions and
-#   limitations under the License.
-#
-# ------------------------------------------------------------------------------
-"""Script for retrieving mech requests and their delivers."""
-import json
-import time
-import pickle
-from random import uniform
-from typing import Any, Dict, Tuple
-import requests
-from gql import Client, gql
-from gql.transport.requests import RequestsHTTPTransport
-from tools import (
-    GET_CONTENTS_BATCH_SIZE,
-    IRRELEVANT_TOOLS,
-    create_session,
-    request,
-)
-from tqdm import tqdm
-from web3_utils import (
-    FPMM_QS_CREATOR,
-    FPMM_PEARL_CREATOR,
-    IPFS_POLL_INTERVAL,
-    SUBGRAPH_POLL_INTERVAL,
-)
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from utils import (
-    DATA_DIR,
-    JSON_DATA_DIR,
-    MECH_SUBGRAPH_URL,
-    SUBGRAPH_API_KEY,
-    IPFS_ADDRESS,
-)
-NUM_WORKERS = 10
-BLOCKS_CHUNK_SIZE = 10000
-TEXT_ALIGNMENT = 30
-MINIMUM_WRITE_FILE_DELAY_SECONDS = 20
-MECH_FROM_BLOCK_RANGE = 50000
-last_write_time = 0.0
-REQUESTS_QUERY_FILTER = """
-query requests_query($sender_not_in: [Bytes!], $id_gt: Bytes, $blockNumber_gte: BigInt, $blockNumber_lte: BigInt) {
-  requests(where: {sender_not_in: $sender_not_in, id_gt: $id_gt, blockNumber_gte: $blockNumber_gte, blockNumber_lte: $blockNumber_lte},  orderBy: id, first: 1000) {
-    blockNumber
-    blockTimestamp
-    id
-    ipfsHash
-    requestId
-    sender
-    transactionHash
-  }
-}
-"""
-DELIVERS_QUERY_NO_FILTER = """
-query delivers_query($id_gt: Bytes, $blockNumber_gte: BigInt, $blockNumber_lte: BigInt) {
-  delivers(where: {id_gt: $id_gt, blockNumber_gte: $blockNumber_gte, blockNumber_lte: $blockNumber_lte}, orderBy: id, first: 1000) {
-    blockNumber
-    blockTimestamp
-    id
-    ipfsHash
-    requestId
-    sender
-    transactionHash
-  }
-}
-"""
-DELIVERS_QUERY = """
-query delivers_query($requestId: BigInt, $blockNumber_gte: BigInt, $blockNumber_lte: BigInt) {
-  delivers(where: {requestId: $requestId, blockNumber_gte: $blockNumber_gte, blockNumber_lte: $blockNumber_lte}, orderBy: blockNumber, first: 1000) {
-    blockNumber
-    blockTimestamp
-    id
-    ipfsHash
-    requestId
-    sender
-    transactionHash
-  }
-}
-"""
-MISSING_DELIVERS_QUERY = """
-query delivers_query($requestId: BigInt, $blockNumber_gte: BigInt, $blockNumber_lte: BigInt) {
-  delivers(where: {requestId: $requestId, blockNumber_gte: $blockNumber_gte, blockNumber_lte: $blockNumber_lte}, orderBy: blockNumber, first: 1000) {
-    blockNumber
-    blockTimestamp
-    id
-    ipfsHash
-    requestId
-    sender
-    transactionHash
-  }
-}
-"""
-def collect_all_mech_requests(from_block: int, to_block: int, filename: str) -> Tuple:
-    print(f"Fetching all mech requests from {from_block} to {to_block}")
-    mech_requests = {}
-    duplicated_reqIds = []
-    mech_subgraph_url = MECH_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
-    transport = RequestsHTTPTransport(url=mech_subgraph_url)
-    client = Client(transport=transport, fetch_schema_from_transport=True)
-    id_gt = "0x00"
-    nr_errors = 0
-    while True:
-        variables = {
-            "sender_not_in": [FPMM_QS_CREATOR, FPMM_PEARL_CREATOR],
-            "id_gt": id_gt,
-            "blockNumber_gte": str(from_block),  # str
-            "blockNumber_lte": str(to_block),  # str
-        }
-        try:
-            response = fetch_with_retry(client, REQUESTS_QUERY_FILTER, variables)
-            items = response.get("requests", [])
-            if not items:
-                break
-            for mech_request in items:
-                if mech_request["id"] not in mech_requests:
-                    mech_requests[mech_request["id"]] = mech_request
-                else:
-                    duplicated_reqIds.append(mech_request["id"])
-        except Exception as e:
-            # counter for errors
-            nr_errors += 1
-            print(f"Error while getting the response: {e}")
-        id_gt = items[-1]["id"]
-        time.sleep(SUBGRAPH_POLL_INTERVAL)
-        print(f"New execution for id_gt = {id_gt}")
-        if len(duplicated_reqIds) > 0:
-            print(f"Number of duplicated req Ids = {len(duplicated_reqIds)}")
-        save_json_file(mech_requests, filename)
-    print(f"Number of requests = {len(mech_requests)}")
-    print(f"Number of duplicated req Ids = {len(duplicated_reqIds)}")
-    save_json_file(mech_requests, filename)
-    return mech_requests, duplicated_reqIds, nr_errors
-def fetch_with_retry(client, query, variables, max_retries=5):
-    for attempt in range(max_retries):
-        try:
-            return client.execute(gql(query), variable_values=variables)
-        except Exception as e:
-            if attempt == max_retries - 1:
-                raise e
-            wait_time = (2**attempt) + uniform(0, 1)  # exponential backoff with jitter
-            time.sleep(wait_time)
-def collect_all_mech_delivers(from_block: int, to_block: int, filename: str) -> Tuple:
-    print(f"Fetching all mech delivers from {from_block} to {to_block}")
-    mech_delivers = {}
-    duplicated_requestIds = []
-    mech_subgraph_url = MECH_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
-    transport = RequestsHTTPTransport(url=mech_subgraph_url)
-    client = Client(transport=transport, fetch_schema_from_transport=True)
-    to_block = (
-        to_block + MECH_FROM_BLOCK_RANGE
-    )  # there is a delay between deliver and request
-    id_gt = ""
-    nr_errors = 0
-    while True:
-        variables = {
-            "id_gt": id_gt,
-            "blockNumber_gte": str(from_block),  # str
-            "blockNumber_lte": str(to_block),  # str
-        }
-        try:
-            response = fetch_with_retry(client, DELIVERS_QUERY_NO_FILTER, variables)
-            items = response.get("delivers", [])
-            if not items:
-                break
-            for mech_deliver in items:
-                if mech_deliver["requestId"] not in mech_delivers:
-                    mech_delivers[mech_deliver["requestId"]] = [mech_deliver]
-                else:
-                    duplicated_requestIds.append(mech_deliver["requestId"])
-                    # we will handle the duplicated later
-        except Exception as e:
-            # counter for errors
-            nr_errors += 1
-            print(f"Error while getting the response: {e}")
-            # return None, None
-        id_gt = items[-1]["id"]
-        time.sleep(SUBGRAPH_POLL_INTERVAL)
-        print(f"New execution for id_gt = {id_gt}")
-        if len(duplicated_requestIds) > 0:
-            print(f"Number of duplicated request id = {len(duplicated_requestIds)}")
-        save_json_file(mech_delivers, filename)
-    print(f"Number of delivers = {len(mech_delivers)}")
-    print(f"Number of duplicated request id = {len(duplicated_requestIds)}")
-    save_json_file(mech_delivers, filename)
-    return mech_delivers, duplicated_requestIds, nr_errors
-def collect_missing_delivers(request_id: int, block_number: int) -> Dict[str, Any]:
-    to_block = (
-        block_number + MECH_FROM_BLOCK_RANGE
-    )  # there is a delay between deliver and request
-    print(f"Fetching all missing delivers from {block_number} to {to_block}")
-    mech_delivers = {}
-    mech_subgraph_url = MECH_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
-    transport = RequestsHTTPTransport(url=mech_subgraph_url)
-    client = Client(transport=transport, fetch_schema_from_transport=True)
-    variables = {
-        "requestId": request_id,
-        "blockNumber_gte": str(block_number),  # str
-        "blockNumber_lte": str(to_block),  # str
-    }
-    try:
-        response = fetch_with_retry(client, MISSING_DELIVERS_QUERY, variables)
-        items = response.get("delivers", [])
-        # If the user sends requests with the same values (tool, prompt, nonce) it
-        # will generate the same requestId. Therefore, multiple items can be retrieved
-        # at this point. We assume the most likely deliver to this request is the
-        # one with the closest blockNumber among all delivers with the same requestId.
-        if items:
-            return items[0]
-    except Exception as e:
-        print(f"Error while getting the response: {e}")
-        # TODO count how many mech requests without a deliver do we have
-    return mech_delivers
-def populate_requests_ipfs_contents(
-    session: requests.Session, mech_requests: Dict[str, Any], keys_to_traverse: list
-) -> dict:
-    updated_dict = {}
-    wrong_response_count = 0
-    for k in tqdm(
-        keys_to_traverse,
-        desc="Fetching IPFS contents for requests",
-        position=1,
-        unit="results",
-    ):
-        mech_request = mech_requests[k]
-        if "ipfsContents" not in mech_request:
-            ipfs_hash = mech_request["ipfsHash"]
-            url = f"{IPFS_ADDRESS}{ipfs_hash}/metadata.json"
-            response = request(session, url)
-            if response is None:
-                tqdm.write(f"Skipping {mech_request=}. because response was None")
-                wrong_response_count += 1
-                continue
-            try:
-                contents = response.json()
-                if contents["tool"] in IRRELEVANT_TOOLS:
-                    continue
-                mech_request["ipfsContents"] = contents
-            except requests.exceptions.JSONDecodeError:
-                tqdm.write(
-                    f"Skipping {mech_request} because of JSONDecodeError when parsing response"
-                )
-                wrong_response_count += 1
-                continue
-        updated_dict[k] = mech_request
-        time.sleep(IPFS_POLL_INTERVAL)
-    return updated_dict, wrong_response_count
-def populate_delivers_ipfs_contents(
-    session: requests.Session, mech_requests: Dict[str, Any], keys_to_traverse: list
-) -> dict:
-    """Function to complete the delivers content info from ipfs"""
-    updated_dict = {}
-    errors = 0
-    for k in tqdm(
-        keys_to_traverse,
-        desc="Fetching IPFS contents for delivers",
-        position=1,
-        unit="results",
-    ):
-        mech_request = mech_requests[k]
-        if "deliver" not in mech_request or len(mech_request["deliver"]) == 0:
-            print(f"Skipping mech request {mech_request} because of no delivers info")
-            continue
-        deliver = mech_request["deliver"]
-        if "ipfsContents" not in deliver:
-            ipfs_hash = deliver["ipfsHash"]
-            request_id = deliver["requestId"]
-            url = f"{IPFS_ADDRESS}{ipfs_hash}/{request_id}"
-            response = request(session, url)
-            if response is None:
-                tqdm.write(f"Skipping {mech_request=}.")
-                continue
-            try:
-                contents = response.json()
-                metadata = contents.get("metadata", None)
-                if metadata and contents["metadata"]["tool"] in IRRELEVANT_TOOLS:
-                    continue
-                contents.pop("cost_dict", None)
-                deliver["ipfsContents"] = contents
-            except requests.exceptions.JSONDecodeError:
-                tqdm.write(f"Skipping {mech_request} because of JSONDecodeError")
-                continue
-            except Exception:
-                errors += 1
-                tqdm.write(
-                    f"Skipping {mech_request} because of error parsing the response"
-                )
-                continue
-        updated_dict[k] = mech_request
-        time.sleep(IPFS_POLL_INTERVAL)
-    return updated_dict, errors
-def write_mech_events_to_file(
-    mech_requests: Dict[str, Any],
-    filename: str,
-    force_write: bool = False,
-) -> None:
-    global last_write_time  # pylint: disable=global-statement
-    now = time.time()
-    if len(mech_requests) == 0:
-        return
-    filename_path = DATA_DIR / filename
-    if force_write or (now - last_write_time) >= MINIMUM_WRITE_FILE_DELAY_SECONDS:
-        with open(filename_path, "w", encoding="utf-8") as file:
-            json.dump(mech_requests, file, indent=2)
-        last_write_time = now
-def save_json_file(data: Dict[str, Any], filename: str):
-    """Function to save the content into a json file"""
-    filename_path = JSON_DATA_DIR / filename
-    with open(filename_path, "w", encoding="utf-8") as file:
-        json.dump(data, file, indent=2)
-def merge_json_files(old_file: str, new_file: str):
-    # read old file
-    with open(JSON_DATA_DIR / old_file, "r") as f:
-        old_data = json.load(f)
-    # read the new file
-    with open(JSON_DATA_DIR / new_file, "r") as f:
-        new_data = json.load(f)
-    # Merge the two JSON files and remove duplicates
-    old_data.update(new_data)
-    # Save the merged JSON file
-    print(f"{old_file} updated")
-    save_json_file(old_data, old_file)
-def clean_mech_delivers(requests_filename: str, delivers_filename: str) -> None:
-    """Function to remove from the delivers json file the request Ids that are not in the mech requests"""
-    # read mech requests
-    with open(JSON_DATA_DIR / requests_filename, "r") as file:
-        mech_requests = json.load(file)
-    list_reqIds = [mech_requests[k].get("requestId") for k in mech_requests.keys()]
-    # remove requestIds from delivers that are not in this list
-    with open(JSON_DATA_DIR / delivers_filename, "r") as file:
-        mech_delivers = json.load(file)
-    print(f"original size of the file {len(mech_delivers)}")
-    mech_delivers = {
-        k: v
-        for k, v in tqdm(
-            mech_delivers.items(),
-            total=len(mech_delivers),
-            desc="Filtering delivers dictionary",
-        )
-        if k in set(list_reqIds)
-    }
-    print(f"final size of the file {len(mech_delivers)}")
-    save_json_file(mech_delivers, delivers_filename)
-def get_request_block_numbers(
-    mech_requests: Dict[str, Any], target_req_id: int
-) -> list:
-    block_numbers = []
-    for entry in mech_requests.values():
-        if entry["requestId"] == target_req_id:
-            block_numbers.append(entry["blockNumber"])
-    return block_numbers
-def update_block_request_map(block_request_id_map: dict) -> None:
-    print("Saving block request id map info")
-    with open(JSON_DATA_DIR / "block_request_id_map.pickle", "wb") as handle:
-        pickle.dump(block_request_id_map, handle, protocol=pickle.HIGHEST_PROTOCOL)
-def fix_duplicate_requestIds(requests_filename: str, delivers_filename: str) -> dict:
-    print("Fix duplicated request Ids")
-    with open(JSON_DATA_DIR / delivers_filename, "r") as file:
-        data_delivers = json.load(file)
-    with open(JSON_DATA_DIR / requests_filename, "r") as file:
-        mech_requests = json.load(file)
-    list_request_Ids = list(data_delivers.keys())
-    list_duplicated_reqIds = []
-    for req_Id in list_request_Ids:
-        if len(data_delivers.get(req_Id)) > 1:
-            list_duplicated_reqIds.append(req_Id)
-    print(len(list_duplicated_reqIds))
-    block_request_id_map = {}
-    for req_Id in list_duplicated_reqIds:
-        # get the list of mech request block numbers for that requestId
-        block_nrs = get_request_block_numbers(mech_requests, req_Id)
-        # get the list of mech delivers
-        mech_delivers_list = data_delivers.get(req_Id)  # list of dictionaries
-        if len(block_nrs) > 1:
-            print("More than one block number was found")
-        for block_nr in block_nrs:
-            key = (block_nr, req_Id)
-            min_difference_request = min(
-                mech_delivers_list,
-                key=lambda x: abs(int(x["blockNumber"]) - int(block_nr)),
-            )
-            block_request_id_map[key] = min_difference_request
-    update_block_request_map(block_request_id_map)
-    return block_request_id_map
-def merge_requests_delivers(
-    requests_filename: str, delivers_filename: str, filename: str
-) -> None:
-    print("Merge request delivers")
-    """Function to map requests and delivers"""
-    with open(JSON_DATA_DIR / delivers_filename, "r") as file:
-        mech_delivers = json.load(file)
-    with open(JSON_DATA_DIR / requests_filename, "r") as file:
-        mech_requests = json.load(file)
-    # read the block map for duplicated requestIds
-    with open(JSON_DATA_DIR / "block_request_id_map.pickle", "rb") as handle:
-        # key = (block_nr, req_Id) value = delivers dictionary
-        block_request_id_map = pickle.load(handle)
-    for _, mech_req in tqdm(
-        mech_requests.items(),
-        desc=f"Merging delivers data into the mech requests",
-    ):
-        if "deliver" in mech_req:
-            continue
-        block_number_req = mech_req["blockNumber"]
-        req_Id = mech_req["requestId"]
-        # check if it is in the duplicated map
-        key = (block_number_req, req_Id)
-        if key in block_request_id_map.keys():
-            deliver_dict = block_request_id_map[key]
-        elif req_Id in mech_delivers.keys():
-            deliver_dict = mech_delivers.get(req_Id)[0]  # the value is a list
-        else:
-            print("No deliver entry found for this request Id")
-            deliver_dict = collect_missing_delivers(
-                request_id=req_Id, block_number=int(block_number_req)
-            )
-        # extract the info and append it to the original mech request dictionary
-        mech_req["deliver"] = deliver_dict
-    save_json_file(mech_requests, filename)
-    return
-def get_ipfs_data(input_filename: str, output_filename: str, logger):
-    with open(JSON_DATA_DIR / input_filename, "r") as file:
-        mech_requests = json.load(file)
-    total_keys_to_traverse = list(mech_requests.keys())
-    updated_mech_requests = dict()
-    session = create_session()
-    logger.info("UPDATING IPFS CONTENTS OF REQUESTS")
-    # requests
-    nr_errors = 0
-    with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
-        futures = []
-        for i in range(0, len(mech_requests), GET_CONTENTS_BATCH_SIZE):
-            futures.append(
-                executor.submit(
-                    populate_requests_ipfs_contents,
-                    session,
-                    mech_requests,
-                    total_keys_to_traverse[i : i + GET_CONTENTS_BATCH_SIZE],
-                )
-            )
-        for future in tqdm(
-            as_completed(futures),
-            total=len(futures),
-            desc=f"Fetching all ipfs contents from requests ",
-        ):
-            partial_dict, error_counter = future.result()
-            nr_errors += error_counter
-            updated_mech_requests.update(partial_dict)
-    save_json_file(updated_mech_requests, output_filename)
-    logger.info(f"NUMBER OF MECH REQUEST IPFS ERRORS={nr_errors}")
-    # delivers
-    nr_deliver_errors = 0
-    logger.info("UPDATING IPFS CONTENTS OF DELIVERS")
-    total_keys_to_traverse = list(updated_mech_requests.keys())
-    final_tools_content = {}
-    with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
-        futures = []
-        for i in range(0, len(updated_mech_requests), GET_CONTENTS_BATCH_SIZE):
-            futures.append(
-                executor.submit(
-                    populate_delivers_ipfs_contents,
-                    session,
-                    updated_mech_requests,
-                    total_keys_to_traverse[i : i + GET_CONTENTS_BATCH_SIZE],
-                )
-            )
-        for future in tqdm(
-            as_completed(futures),
-            total=len(futures),
-            desc=f"Fetching all ipfs contents from delivers ",
-        ):
-            partial_dict, error_counter = future.result()
-            nr_deliver_errors += error_counter
-            final_tools_content.update(partial_dict)
-    save_json_file(final_tools_content, output_filename)
-    logger.info(f"NUMBER OF MECH DELIVERS IPFS ERRORS={nr_deliver_errors}")
-def only_delivers_loop():
-    with open(DATA_DIR / "tools_info.json", "r") as file:
-        updated_mech_requests = json.load(file)
-    # delivers
-    session = create_session()
-    print("UPDATING IPFS CONTENTS OF DELIVERS")
-    total_keys_to_traverse = list(updated_mech_requests.keys())
-    final_tools_content = {}
-    with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
-        futures = []
-        for i in range(0, len(updated_mech_requests), GET_CONTENTS_BATCH_SIZE):
-            futures.append(
-                executor.submit(
-                    populate_delivers_ipfs_contents,
-                    session,
-                    updated_mech_requests,
-                    total_keys_to_traverse[i : i + GET_CONTENTS_BATCH_SIZE],
-                )
-            )
-        for future in tqdm(
-            as_completed(futures),
-            total=len(futures),
-            desc=f"Fetching all ipfs contents from delivers ",
-        ):
-            partial_dict = future.result()
-            final_tools_content.update(partial_dict)
-    save_json_file(final_tools_content, "tools_info.json")

scripts/nr_mech_calls.py DELETED Viewed

@@ -1,271 +0,0 @@
-import pandas as pd
-from utils import DATA_DIR, DEFAULT_MECH_FEE, TMP_DIR, transform_to_datetime
-from tqdm import tqdm
-from typing import Dict, Any
-from collections import defaultdict
-from tools import IRRELEVANT_TOOLS
-import re
-def update_roi(row: pd.DataFrame) -> float:
-    new_value = row.net_earnings / (
-        row.collateral_amount
-        + row.trade_fee_amount
-        + row.num_mech_calls * DEFAULT_MECH_FEE
-    )
-    return new_value
-def get_mech_statistics(mech_requests: Dict[str, Any]) -> Dict[str, Dict[str, int]]:
-    """Outputs a table with Mech statistics"""
-    mech_statistics: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int))
-    for mech_request in mech_requests.values():
-        if (
-            "ipfs_contents" not in mech_request
-            or "tool" not in mech_request["ipfs_contents"]
-            or "prompt" not in mech_request["ipfs_contents"]
-        ):
-            continue
-        if mech_request["ipfs_contents"]["tool"] in IRRELEVANT_TOOLS:
-            continue
-        prompt = mech_request["ipfs_contents"]["prompt"]
-        prompt = prompt.replace("\n", " ")
-        prompt = prompt.strip()
-        prompt = re.sub(r"\s+", " ", prompt)
-        prompt_match = re.search(r"\"(.*)\"", prompt)
-        if prompt_match:
-            question = prompt_match.group(1)
-        else:
-            question = prompt
-        mech_statistics[question]["count"] += 1
-        mech_statistics[question]["fees"] += mech_request["fee"]
-    return mech_statistics
-def create_unknown_traders_df(trades_df: pd.DataFrame) -> pd.DataFrame:
-    """filter trades coming from non-Olas traders that are placing no mech calls"""
-    no_mech_calls_mask = (trades_df["staking"] == "non_Olas") & (
-        trades_df["num_mech_calls"] == 0
-    )
-    no_mech_calls_df = trades_df.loc[no_mech_calls_mask]
-    trades_df = trades_df.loc[~no_mech_calls_mask]
-    return no_mech_calls_df, trades_df
-def update_trade_nr_mech_calls(non_agents: bool = False):
-    try:
-        all_trades_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
-        tools = pd.read_parquet(DATA_DIR / "tools.parquet")
-    except Exception as e:
-        print(f"Error reading the profitability and tools parquet files")
-    traders = list(all_trades_df.trader_address.unique())
-    if non_agents:
-        traders = list(
-            all_trades_df.loc[
-                all_trades_df["staking"] == "non_agent"
-            ].trader_address.unique()
-        )
-    print("before updating")
-    print(
-        all_trades_df.loc[
-            all_trades_df["staking"] == "non_agent"
-        ].num_mech_calls.describe()
-    )
-    for trader in tqdm(traders, desc=f"Updating Traders mech calls", unit="traders"):
-        tools_usage = tools[tools["trader_address"] == trader]
-        if len(tools_usage) == 0:
-            tqdm.write(f"trader with no tools usage found {trader}")
-            all_trades_df.loc[
-                all_trades_df["trader_address"] == trader, "nr_mech_calls"
-            ] = 0
-    # update roi
-    all_trades_df["roi"] = all_trades_df.apply(lambda x: update_roi(x), axis=1)
-    print("after updating")
-    print(
-        all_trades_df.loc[
-            all_trades_df["staking"] == "non_agent"
-        ].num_mech_calls.describe()
-    )
-    # saving
-    all_trades_df.to_parquet(DATA_DIR / "all_trades_profitability.parquet", index=False)
-    # print("Summarising trades...")
-    # summary_df = summary_analyse(all_trades_df)
-    # summary_df.to_parquet(DATA_DIR / "summary_profitability.parquet", index=False)
-def get_daily_mech_calls_estimation(
-    daily_trades: pd.DataFrame, daily_tools: pd.DataFrame
-) -> list:
-    # for each market
-    daily_markets = daily_trades.title.unique()
-    trader = daily_trades.iloc[0].trader_address
-    day = daily_trades.iloc[0].creation_date
-    estimations = []
-    for market in daily_markets:
-        estimation_dict = {}
-        estimation_dict["trader_address"] = trader
-        estimation_dict["trading_day"] = day
-        # tools usage of this market
-        market_requests = daily_tools.loc[daily_tools["title"] == market]
-        # trades done on this market
-        market_trades = daily_trades[daily_trades["title"] == market]
-        mech_calls_estimation = 0
-        total_trades = len(market_trades)
-        total_requests = 0
-        if len(market_requests) > 0:
-            total_requests = len(market_requests)
-            mech_calls_estimation = total_requests / total_trades
-        estimation_dict["total_trades"] = total_trades
-        estimation_dict["total_mech_requests"] = total_requests
-        estimation_dict["market"] = market
-        estimation_dict["mech_calls_per_trade"] = mech_calls_estimation
-        estimations.append(estimation_dict)
-    return estimations
-def compute_daily_mech_calls(
-    fpmmTrades: pd.DataFrame, tools: pd.DataFrame
-) -> pd.DataFrame:
-    """Function to compute the daily mech calls at the trader and market level"""
-    nr_traders = len(fpmmTrades["trader_address"].unique())
-    fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
-    fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
-    fpmmTrades = fpmmTrades.sort_values(by="creation_timestamp", ascending=True)
-    tools["request_time"] = pd.to_datetime(tools["request_time"])
-    tools["request_date"] = tools["request_time"].dt.date
-    tools = tools.sort_values(by="request_time", ascending=True)
-    all_mech_calls = []
-    for trader in tqdm(
-        fpmmTrades["trader_address"].unique(),
-        total=nr_traders,
-        desc="creating daily mech calls computation",
-    ):
-        # compute the mech calls estimations for each trader
-        all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader]
-        all_tools = tools[tools["trader_address"] == trader]
-        trading_days = all_trades.creation_date.unique()
-        for trading_day in trading_days:
-            daily_trades = all_trades.loc[all_trades["creation_date"] == trading_day]
-            daily_tools = all_tools.loc[all_tools["request_date"] == trading_day]
-            trader_entry = {}
-            trader_entry["trader_address"] = trader
-            trader_entry["total_trades"] = len(daily_trades)
-            trader_entry["trading_day"] = trading_day
-            trader_entry["total_mech_calls"] = len(daily_tools)
-            all_mech_calls.append(trader_entry)
-    return pd.DataFrame.from_dict(all_mech_calls, orient="columns")
-def compute_mech_call_estimations(
-    fpmmTrades: pd.DataFrame, tools: pd.DataFrame
-) -> pd.DataFrame:
-    """Function to compute the estimated mech calls needed per trade at the trader and market level"""
-    nr_traders = len(fpmmTrades["trader_address"].unique())
-    fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
-    fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
-    tools["request_time"] = pd.to_datetime(tools["request_time"])
-    tools["request_date"] = tools["request_time"].dt.date
-    all_estimations = []
-    for trader in tqdm(
-        fpmmTrades["trader_address"].unique(),
-        total=nr_traders,
-        desc="creating mech calls estimation dataframe",
-    ):
-        # compute the mech calls estimations for each trader
-        all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader]
-        all_tools = tools[tools["trader_address"] == trader]
-        trading_days = all_trades.creation_date.unique()
-        for trading_day in trading_days:
-            daily_trades = all_trades.loc[all_trades["creation_date"] == trading_day]
-            daily_tools = all_tools.loc[all_tools["request_date"] == trading_day]
-            daily_estimations = get_daily_mech_calls_estimation(
-                daily_trades=daily_trades, daily_tools=daily_tools
-            )
-            all_estimations.extend(daily_estimations)
-    return pd.DataFrame.from_dict(all_estimations, orient="columns")
-def compute_timestamp_mech_calls(
-    all_trades: pd.DataFrame, all_tools: pd.DataFrame
-) -> list:
-    """Function to compute the mech calls based on timestamps but without repeating mech calls"""
-    mech_calls_contents = []
-    request_timestamps_used = {}
-    # intialize the dict with all markets
-    all_markets = all_trades.title.unique()
-    for market in all_markets:
-        request_timestamps_used[market] = []
-    for i, trade in all_trades.iterrows():
-        trader = trade["trader_address"]
-        trade_id = trade["id"]
-        market = trade["title"]
-        trade_ts = trade["creation_timestamp"]
-        market_requests = all_tools.loc[
-            (all_tools["trader_address"] == trader) & (all_tools["title"] == market)
-        ]
-        # traverse market requests
-        total_mech_calls = 0
-        for i, mech_request in market_requests.iterrows():
-            # check timestamp (before the trade)
-            request_ts = mech_request["request_time"]
-            if request_ts < trade_ts:
-                # check the timestamp has not been used in a previous trade
-                used_timestamps = request_timestamps_used[market]
-                if request_ts not in used_timestamps:
-                    request_timestamps_used[market].append(request_ts)
-                    total_mech_calls += 1
-        # create enty for the dataframe
-        mech_call_entry = {}
-        mech_call_entry["trader_address"] = trader
-        mech_call_entry["market"] = market
-        mech_call_entry["trade_id"] = trade_id
-        mech_call_entry["total_mech_calls"] = total_mech_calls
-        mech_calls_contents.append(mech_call_entry)
-    return mech_calls_contents
-def compute_mech_calls_based_on_timestamps(
-    fpmmTrades: pd.DataFrame, tools: pd.DataFrame
-) -> pd.DataFrame:
-    """Function to compute the mech calls needed per trade at the trader and market level using timestamps"""
-    nr_traders = len(fpmmTrades["trader_address"].unique())
-    fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
-    fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
-    fpmmTrades = fpmmTrades.sort_values(by="creation_timestamp", ascending=True)
-    tools["request_time"] = pd.to_datetime(tools["request_time"])
-    tools["request_date"] = tools["request_time"].dt.date
-    tools = tools.sort_values(by="request_time", ascending=True)
-    all_mech_calls = []
-    for trader in tqdm(
-        fpmmTrades["trader_address"].unique(),
-        total=nr_traders,
-        desc="creating mech calls count based on timestamps",
-    ):
-        # compute the mech calls for each trader
-        all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader]
-        all_tools = tools[tools["trader_address"] == trader]
-        trader_mech_calls = compute_timestamp_mech_calls(all_trades, all_tools)
-        all_mech_calls.extend(trader_mech_calls)
-    return pd.DataFrame.from_dict(all_mech_calls, orient="columns")
-if __name__ == "__main__":
-    # update_trade_nr_mech_calls(non_agents=True)
-    tools = pd.read_parquet(TMP_DIR / "tools.parquet")
-    fpmmTrades = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
-    fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
-        lambda x: transform_to_datetime(x)
-    )
-    result = compute_mech_calls_based_on_timestamps(fpmmTrades=fpmmTrades, tools=tools)
-    result.to_parquet(TMP_DIR / "result_df.parquet", index=False)

scripts/profitability.py DELETED Viewed

@@ -1,530 +0,0 @@
-#   -*- coding: utf-8 -*-
-#   ------------------------------------------------------------------------------
-#
-#     Copyright 2023 Valory AG
-#
-#     Licensed under the Apache License, Version 2.0 (the "License");
-#     you may not use this file except in compliance with the License.
-#     You may obtain a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#     Unless required by applicable law or agreed to in writing, software
-#     distributed under the License is distributed on an "AS IS" BASIS,
-#     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#     See the License for the specific language governing permissions and
-#     limitations under the License.
-#
-#   ------------------------------------------------------------------------------
-import time
-import pandas as pd
-from typing import Any
-from enum import Enum
-from tqdm import tqdm
-import numpy as np
-import os
-from web3_utils import query_conditional_tokens_gc_subgraph
-from get_mech_info import (
-    DATETIME_60_DAYS_AGO,
-    update_tools_parquet,
-    update_all_trades_parquet,
-)
-from utils import (
-    wei_to_unit,
-    convert_hex_to_int,
-    JSON_DATA_DIR,
-    DATA_DIR,
-    DEFAULT_MECH_FEE,
-    TMP_DIR,
-    measure_execution_time,
-)
-from staking import label_trades_by_staking
-from nr_mech_calls import (
-    create_unknown_traders_df,
-    transform_to_datetime,
-    compute_mech_calls_based_on_timestamps,
-)
-DUST_THRESHOLD = 10000000000000
-INVALID_ANSWER = -1
-DEFAULT_60_DAYS_AGO_TIMESTAMP = (DATETIME_60_DAYS_AGO).timestamp()
-WXDAI_CONTRACT_ADDRESS = "0xe91D153E0b41518A2Ce8Dd3D7944Fa863463a97d"
-DUST_THRESHOLD = 10000000000000
-class MarketState(Enum):
-    """Market state"""
-    OPEN = 1
-    PENDING = 2
-    FINALIZING = 3
-    ARBITRATING = 4
-    CLOSED = 5
-    def __str__(self) -> str:
-        """Prints the market status."""
-        return self.name.capitalize()
-class MarketAttribute(Enum):
-    """Attribute"""
-    NUM_TRADES = "Num_trades"
-    WINNER_TRADES = "Winner_trades"
-    NUM_REDEEMED = "Num_redeemed"
-    INVESTMENT = "Investment"
-    FEES = "Fees"
-    MECH_CALLS = "Mech_calls"
-    MECH_FEES = "Mech_fees"
-    EARNINGS = "Earnings"
-    NET_EARNINGS = "Net_earnings"
-    REDEMPTIONS = "Redemptions"
-    ROI = "ROI"
-    def __str__(self) -> str:
-        """Prints the attribute."""
-        return self.value
-    def __repr__(self) -> str:
-        """Prints the attribute representation."""
-        return self.name
-    @staticmethod
-    def argparse(s: str) -> "MarketAttribute":
-        """Performs string conversion to MarketAttribute."""
-        try:
-            return MarketAttribute[s.upper()]
-        except KeyError as e:
-            raise ValueError(f"Invalid MarketAttribute: {s}") from e
-ALL_TRADES_STATS_DF_COLS = [
-    "trader_address",
-    "market_creator",
-    "trade_id",
-    "creation_timestamp",
-    "title",
-    "market_status",
-    "collateral_amount",
-    "outcome_index",
-    "trade_fee_amount",
-    "outcomes_tokens_traded",
-    "current_answer",
-    "is_invalid",
-    "winning_trade",
-    "earnings",
-    "redeemed",
-    "redeemed_amount",
-    "num_mech_calls",
-    "mech_fee_amount",
-    "net_earnings",
-    "roi",
-]
-def _is_redeemed(user_json: dict[str, Any], fpmmTrade: dict[str, Any]) -> bool:
-    """Returns whether the user has redeemed the position."""
-    user_positions = user_json["data"]["user"]["userPositions"]
-    condition_id = fpmmTrade["fpmm.condition.id"]
-    for position in user_positions:
-        position_condition_ids = position["position"]["conditionIds"]
-        balance = int(position["balance"])
-        if condition_id in position_condition_ids:
-            if balance == 0:
-                return True
-            # return early
-            return False
-    return False
-def prepare_profitalibity_data(
-    tools_filename: str,
-    trades_filename: str,
-    tmp_dir: bool = False,
-) -> pd.DataFrame:
-    """Prepare data for profitalibity analysis."""
-    # Check if tools.parquet is in the same directory
-    try:
-        if tmp_dir:
-            tools = pd.read_parquet(TMP_DIR / tools_filename)
-        else:
-            tools = pd.read_parquet(DATA_DIR / tools_filename)
-        # make sure creator_address is in the columns
-        assert "trader_address" in tools.columns, "trader_address column not found"
-        # lowercase and strip creator_address
-        tools["trader_address"] = tools["trader_address"].str.lower().str.strip()
-        tools.drop_duplicates(
-            subset=["request_id", "request_block"], keep="last", inplace=True
-        )
-        tools.to_parquet(DATA_DIR / tools_filename)
-        print(f"{tools_filename} loaded")
-    except FileNotFoundError:
-        print(f"{tools_filename} not found.")
-        return
-    # Check if fpmmTrades.parquet is in the same directory
-    print("Reading the new trades file")
-    try:
-        if tmp_dir:
-            fpmmTrades = pd.read_parquet(TMP_DIR / trades_filename)
-        else:
-            fpmmTrades = pd.read_parquet(DATA_DIR / trades_filename)
-    except FileNotFoundError:
-        print(f"Error reading {trades_filename} file.")
-    # make sure trader_address is in the columns
-    assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
-    # lowercase and strip creator_address
-    fpmmTrades["trader_address"] = fpmmTrades["trader_address"].str.lower().str.strip()
-    return fpmmTrades
-def determine_market_status(trade, current_answer):
-    """Determine the market status of a trade."""
-    if (current_answer is np.nan or current_answer is None) and time.time() >= int(
-        trade["fpmm.openingTimestamp"]
-    ):
-        return MarketState.PENDING
-    elif current_answer is np.nan or current_answer is None:
-        return MarketState.OPEN
-    elif trade["fpmm.isPendingArbitration"]:
-        return MarketState.ARBITRATING
-    elif time.time() < int(trade["fpmm.answerFinalizedTimestamp"]):
-        return MarketState.FINALIZING
-    return MarketState.CLOSED
-def analyse_trader(
-    trader_address: str,
-    fpmmTrades: pd.DataFrame,
-    trader_estimated_mech_calls: pd.DataFrame,
-    daily_info: bool = False,
-) -> pd.DataFrame:
-    """Analyse a trader's trades"""
-    fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
-    fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
-    # Filter trades and tools for the given trader
-    trades = fpmmTrades[fpmmTrades["trader_address"] == trader_address]
-    # Prepare the DataFrame
-    trades_df = pd.DataFrame(columns=ALL_TRADES_STATS_DF_COLS)
-    if trades.empty:
-        return trades_df
-    # Fetch user's conditional tokens gc graph
-    try:
-        user_json = query_conditional_tokens_gc_subgraph(trader_address)
-    except Exception as e:
-        print(f"Error fetching user data: {e}")
-        return trades_df
-    # Iterate over the trades
-    trades_answer_nan = 0
-    trades_no_closed_market = 0
-    for i, trade in tqdm(trades.iterrows(), total=len(trades), desc="Analysing trades"):
-        try:
-            market_answer = trade["fpmm.currentAnswer"]
-            trading_day = trade["creation_date"]
-            trade_id = trade["id"]
-            if not daily_info and not market_answer:
-                # print(f"Skipping trade {i} because currentAnswer is NaN")
-                trades_answer_nan += 1
-                continue
-            # Parsing and computing shared values
-            collateral_amount = wei_to_unit(float(trade["collateralAmount"]))
-            fee_amount = wei_to_unit(float(trade["feeAmount"]))
-            outcome_tokens_traded = wei_to_unit(float(trade["outcomeTokensTraded"]))
-            earnings, winner_trade = (0, False)
-            redemption = _is_redeemed(user_json, trade)
-            current_answer = market_answer if market_answer else None
-            market_creator = trade["market_creator"]
-            # Determine market status
-            market_status = determine_market_status(trade, current_answer)
-            # Skip non-closed markets
-            if not daily_info and market_status != MarketState.CLOSED:
-                # print(
-                #     f"Skipping trade {i} because market is not closed. Market Status: {market_status}"
-                # )
-                trades_no_closed_market += 1
-                continue
-            if current_answer is not None:
-                current_answer = convert_hex_to_int(current_answer)
-            # Compute invalidity
-            is_invalid = current_answer == INVALID_ANSWER
-            # Compute earnings and winner trade status
-            if current_answer is None:
-                earnings = 0.0
-                winner_trade = None
-            elif is_invalid:
-                earnings = collateral_amount
-                winner_trade = False
-            elif int(trade["outcomeIndex"]) == current_answer:
-                earnings = outcome_tokens_traded
-                winner_trade = True
-            # Compute mech calls using the title, and trade id
-            if daily_info:
-                total_mech_calls = trader_estimated_mech_calls.loc[
-                    (trader_estimated_mech_calls["trading_day"] == trading_day),
-                    "total_mech_calls",
-                ].iloc[0]
-            else:
-                total_mech_calls = trader_estimated_mech_calls.loc[
-                    (trader_estimated_mech_calls["market"] == trade["title"])
-                    & (trader_estimated_mech_calls["trade_id"] == trade_id),
-                    "total_mech_calls",
-                ].iloc[0]
-            net_earnings = (
-                earnings
-                - fee_amount
-                - (total_mech_calls * DEFAULT_MECH_FEE)
-                - collateral_amount
-            )
-            # Assign values to DataFrame
-            trades_df.loc[i] = {
-                "trader_address": trader_address,
-                "market_creator": market_creator,
-                "trade_id": trade["id"],
-                "market_status": market_status.name,
-                "creation_timestamp": trade["creationTimestamp"],
-                "title": trade["title"],
-                "collateral_amount": collateral_amount,
-                "outcome_index": trade["outcomeIndex"],
-                "trade_fee_amount": fee_amount,
-                "outcomes_tokens_traded": outcome_tokens_traded,
-                "current_answer": current_answer,
-                "is_invalid": is_invalid,
-                "winning_trade": winner_trade,
-                "earnings": earnings,
-                "redeemed": redemption,
-                "redeemed_amount": earnings if redemption else 0,
-                "num_mech_calls": total_mech_calls,
-                "mech_fee_amount": total_mech_calls * DEFAULT_MECH_FEE,
-                "net_earnings": net_earnings,
-                "roi": net_earnings
-                / (
-                    collateral_amount + fee_amount + total_mech_calls * DEFAULT_MECH_FEE
-                ),
-            }
-        except Exception as e:
-            print(f"Error processing trade {i}: {e}")
-            print(trade)
-            continue
-    print(f"Number of trades where currentAnswer is NaN = {trades_answer_nan}")
-    print(
-        f"Number of trades where the market is not closed = {trades_no_closed_market}"
-    )
-    return trades_df
-def analyse_all_traders(
-    trades: pd.DataFrame,
-    estimated_mech_calls: pd.DataFrame,
-    daily_info: bool = False,
-) -> pd.DataFrame:
-    """Analyse all creators."""
-    all_traders = []
-    for trader in tqdm(
-        trades["trader_address"].unique(),
-        total=len(trades["trader_address"].unique()),
-        desc="Analysing creators",
-    ):
-        trader_estimated_mech_calls = estimated_mech_calls.loc[
-            estimated_mech_calls["trader_address"] == trader
-        ]
-        all_traders.append(
-            analyse_trader(trader, trades, trader_estimated_mech_calls, daily_info)
-        )
-    # concat all creators
-    all_creators_df = pd.concat(all_traders)
-    return all_creators_df
-@measure_execution_time
-def run_profitability_analysis(
-    tools_filename: str,
-    trades_filename: str,
-    merge: bool = False,
-    tmp_dir: bool = False,
-):
-    """Create all trades analysis."""
-    print(f"Preparing data with {tools_filename} and {trades_filename}")
-    fpmmTrades = prepare_profitalibity_data(
-        tools_filename, trades_filename, tmp_dir=tmp_dir
-    )
-    if merge:
-        update_tools_parquet(tools_filename)
-    tools = pd.read_parquet(TMP_DIR / "tools.parquet")
-    try:
-        fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
-            lambda x: transform_to_datetime(x)
-        )
-    except Exception as e:
-        print(f"Transformation not needed")
-    print("Computing the estimated mech calls dataset")
-    trade_mech_calls = compute_mech_calls_based_on_timestamps(
-        fpmmTrades=fpmmTrades, tools=tools
-    )
-    trade_mech_calls.to_parquet(TMP_DIR / "trade_mech_calls.parquet")
-    print(trade_mech_calls.total_mech_calls.describe())
-    print("Analysing trades...")
-    all_trades_df = analyse_all_traders(fpmmTrades, trade_mech_calls)
-    # # merge previous files if requested
-    if merge:
-        all_trades_df = update_all_trades_parquet(all_trades_df)
-    # debugging purposes
-    all_trades_df.to_parquet(JSON_DATA_DIR / "all_trades_df.parquet", index=False)
-    # all_trades_df = pd.read_parquet(JSON_DATA_DIR / "all_trades_df.parquet")
-    # filter invalid markets. Condition: "is_invalid" is True
-    invalid_trades = all_trades_df.loc[all_trades_df["is_invalid"] == True]
-    if len(invalid_trades) == 0:
-        print("No new invalid trades")
-    else:
-        if merge:
-            try:
-                print("Merging invalid trades parquet file")
-                old_invalid_trades = pd.read_parquet(
-                    DATA_DIR / "invalid_trades.parquet"
-                )
-                merge_df = pd.concat(
-                    [old_invalid_trades, invalid_trades], ignore_index=True
-                )
-                invalid_trades = merge_df.drop_duplicates()
-            except Exception as e:
-                print(f"Error updating the invalid trades parquet {e}")
-        invalid_trades.to_parquet(DATA_DIR / "invalid_trades.parquet", index=False)
-    all_trades_df = all_trades_df.loc[all_trades_df["is_invalid"] == False]
-    all_trades_df = label_trades_by_staking(trades_df=all_trades_df)
-    print("Creating unknown traders dataset")
-    unknown_traders_df, all_trades_df = create_unknown_traders_df(
-        trades_df=all_trades_df
-    )
-    # merge with previous unknown traders dataset
-    previous_unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
-    unknown_traders_df: pd.DataFrame = pd.concat(
-        [unknown_traders_df, previous_unknown_traders], ignore_index=True
-    )
-    unknown_traders_df.drop_duplicates("trade_id", keep="last", inplace=True)
-    unknown_traders_df.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
-    # save to parquet
-    all_trades_df.to_parquet(DATA_DIR / "all_trades_profitability.parquet", index=False)
-    print("Done!")
-    return all_trades_df
-def add_trades_profitability(trades_filename: str):
-    print("Reading the trades file")
-    try:
-        fpmmTrades = pd.read_parquet(DATA_DIR / trades_filename)
-    except FileNotFoundError:
-        print(f"Error reading {trades_filename} file .")
-    # make sure trader_address is in the columns
-    assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
-    # lowercase and strip creator_address
-    fpmmTrades["trader_address"] = fpmmTrades["trader_address"].str.lower().str.strip()
-    print("Reading tools parquet file")
-    tools = pd.read_parquet(TMP_DIR / "tools.parquet")
-    try:
-        fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
-            lambda x: transform_to_datetime(x)
-        )
-    except Exception as e:
-        print(f"Transformation not needed")
-    print("Computing the estimated mech calls dataset")
-    trade_mech_calls = compute_mech_calls_based_on_timestamps(
-        fpmmTrades=fpmmTrades, tools=tools
-    )
-    print(trade_mech_calls.total_mech_calls.describe())
-    print("Analysing trades...")
-    all_trades_df = analyse_all_traders(fpmmTrades, trade_mech_calls)
-    # debugging purposes
-    all_trades_df.to_parquet(JSON_DATA_DIR / "missing_trades_df.parquet", index=False)
-    # filter invalid markets. Condition: "is_invalid" is True
-    print("Checking invalid trades")
-    invalid_trades = all_trades_df.loc[all_trades_df["is_invalid"] == True]
-    if len(invalid_trades) > 0:
-        try:
-            print("Merging invalid trades parquet file")
-            old_invalid_trades = pd.read_parquet(DATA_DIR / "invalid_trades.parquet")
-            merge_df = pd.concat(
-                [old_invalid_trades, invalid_trades], ignore_index=True
-            )
-            invalid_trades = merge_df.drop_duplicates("trade_id")
-        except Exception as e:
-            print(f"Error updating the invalid trades parquet {e}")
-        invalid_trades.to_parquet(DATA_DIR / "invalid_trades.parquet", index=False)
-    all_trades_df = all_trades_df.loc[all_trades_df["is_invalid"] == False]
-    print("Adding staking labels")
-    all_trades_df = label_trades_by_staking(trades_df=all_trades_df)
-    print("Creating unknown traders dataset")
-    unknown_traders_df, all_trades_df = create_unknown_traders_df(
-        trades_df=all_trades_df
-    )
-    if len(unknown_traders_df) > 0:
-        print("Merging unknown traders info")
-        # merge with previous unknown traders dataset
-        previous_unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
-        unknown_traders_df: pd.DataFrame = pd.concat(
-            [unknown_traders_df, previous_unknown_traders], ignore_index=True
-        )
-        unknown_traders_df.drop_duplicates("trade_id", keep="last", inplace=True)
-        unknown_traders_df.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
-    print("merge with previous all_trades_profitability")
-    old_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
-    all_trades_df: pd.DataFrame = pd.concat(
-        [all_trades_df, old_trades], ignore_index=True
-    )
-    all_trades_df.drop_duplicates("trade_id", keep="last", inplace=True)
-    all_trades_df.to_parquet(DATA_DIR / "all_trades_profitability.parquet", index=False)
-if __name__ == "__main__":
-    run_profitability_analysis(
-        tools_filename="tools.parquet",
-        trades_filename="fpmmTrades.parquet",
-        merge=False,
-        tmp_dir=True,
-    )

scripts/pull_data.py DELETED Viewed

@@ -1,173 +0,0 @@
-import logging
-from datetime import datetime
-import pandas as pd
-from markets import (
-    etl as mkt_etl,
-    DEFAULT_FILENAME as MARKETS_FILENAME,
-    fpmmTrades_etl,
-    update_fpmmTrades_parquet,
-)
-from tools import generate_tools_file
-from profitability import run_profitability_analysis, add_trades_profitability
-from utils import (
-    get_question,
-    current_answer,
-    RPC,
-    measure_execution_time,
-    DATA_DIR,
-    HIST_DIR,
-    TMP_DIR,
-)
-from get_mech_info import (
-    get_mech_events_since_last_run,
-    update_json_files,
-)
-from update_tools_accuracy import compute_tools_accuracy
-from cleaning_old_info import clean_old_data_from_parquet_files
-from web3_utils import updating_timestamps
-from manage_space_files import move_files
-from cloud_storage import upload_historical_file
-from tools_metrics import compute_tools_based_datasets
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
-    datefmt="%Y-%m-%d %H:%M:%S",
-)
-logger = logging.getLogger(__name__)
-def add_current_answer(tools_filename: str):
-    # Get currentAnswer from FPMMS
-    fpmms = pd.read_parquet(DATA_DIR / MARKETS_FILENAME)
-    tools = pd.read_parquet(DATA_DIR / tools_filename)
-    # Get the question from the tools
-    logging.info("Getting the question and current answer for the tools")
-    tools["title"] = tools["prompt_request"].apply(lambda x: get_question(x))
-    tools["currentAnswer"] = tools["title"].apply(lambda x: current_answer(x, fpmms))
-    tools["currentAnswer"] = tools["currentAnswer"].str.replace("yes", "Yes")
-    tools["currentAnswer"] = tools["currentAnswer"].str.replace("no", "No")
-    # Save the tools data after the updates on the content
-    tools.to_parquet(DATA_DIR / tools_filename, index=False)
-    del fpmms
-def save_historical_data():
-    """Function to save a copy of the main trades and tools file
-    into the historical folder"""
-    print("Saving historical data copies")
-    current_datetime = datetime.now()
-    timestamp = current_datetime.strftime("%Y%m%d_%H%M%S")
-    try:
-        tools = pd.read_parquet(TMP_DIR / "tools.parquet")
-        filename = f"tools_{timestamp}.parquet"
-        tools.to_parquet(HIST_DIR / filename, index=False)
-        # save into cloud storage
-        upload_historical_file(filename)
-    except Exception as e:
-        print(f"Error saving tools file in the historical folder {e}")
-    try:
-        all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
-        filename = f"all_trades_profitability_{timestamp}.parquet"
-        all_trades.to_parquet(HIST_DIR / filename, index=False)
-        # save into cloud storage
-        upload_historical_file(filename)
-    except Exception as e:
-        print(
-            f"Error saving all_trades_profitability file in the historical folder {e}"
-        )
-@measure_execution_time
-def only_new_weekly_analysis():
-    """Run weekly analysis for the FPMMS project."""
-    rpc = RPC
-    # Run markets ETL
-    logging.info("Running markets ETL")
-    mkt_etl(MARKETS_FILENAME)
-    logging.info("Markets ETL completed")
-    # Mech events ETL
-    logging.info("Generating the mech json files")
-    # get only new data
-    latest_timestamp = get_mech_events_since_last_run(logger)
-    if latest_timestamp == None:
-        print("Error while getting the mech events")
-        return
-    logging.info(f"Finished generating the mech json files from {latest_timestamp}")
-    # FpmmTrades ETL
-    fpmmTrades_etl(
-        trades_filename="new_fpmmTrades.parquet",
-        from_timestamp=int(latest_timestamp.timestamp()),
-    )
-    # merge with previous file
-    print("Merging with previous fpmmTrades file")
-    update_fpmmTrades_parquet(trades_filename="new_fpmmTrades.parquet")
-    # Run tools ETL
-    logging.info("Generate and parse the tools content")
-    # generate only new file
-    generate_tools_file("new_tools_info.json", "new_tools.parquet")
-    logging.info("Tools ETL completed")
-    add_current_answer("new_tools.parquet")
-    # # Run profitability analysis
-    logging.info("Running profitability analysis")
-    run_profitability_analysis(
-        tools_filename="new_tools.parquet",
-        trades_filename="new_fpmmTrades.parquet",
-        merge=True,
-    )
-    logging.info("Profitability analysis completed")
-    # merge new json files with old json files
-    update_json_files()
-    save_historical_data()
-    try:
-        clean_old_data_from_parquet_files("2024-11-26")
-    except Exception as e:
-        print("Error cleaning the oldest information from parquet files")
-        print(f"reason = {e}")
-    compute_tools_accuracy()
-    compute_tools_based_datasets()
-    # # move to tmp folder the new generated files
-    move_files()
-    logging.info("Weekly analysis files generated and saved")
-def restoring_trades_data(from_date: str, to_date: str):
-    # Convert the string to datetime64[ns, UTC]
-    min_date_utc = pd.to_datetime(from_date, format="%Y-%m-%d", utc=True)
-    max_date_utc = pd.to_datetime(to_date, format="%Y-%m-%d", utc=True)
-    logging.info("Running markets ETL")
-    mkt_etl(MARKETS_FILENAME)
-    logging.info("Markets ETL completed")
-    fpmmTrades_etl(
-        trades_filename="missing_fpmmTrades.parquet",
-        from_timestamp=int(min_date_utc.timestamp()),
-        to_timestamp=int(max_date_utc.timestamp()),
-    )
-    # merge with the old file
-    print("Merging with previous fpmmTrades file")
-    update_fpmmTrades_parquet(trades_filename="missing_fpmmTrades.parquet")
-    # adding tools information
-    add_trades_profitability(trades_filename="missing_fpmmTrades.parquet")
-if __name__ == "__main__":
-    only_new_weekly_analysis()
-    # restoring_trades_data("2024-12-28", "2025-01-07")

scripts/queries.py DELETED Viewed

@@ -1,161 +0,0 @@
-#   -*- coding: utf-8 -*-
-#   ------------------------------------------------------------------------------
-#
-#     Copyright 2024 Valory AG
-#
-#     Licensed under the Apache License, Version 2.0 (the "License");
-#     you may not use this file except in compliance with the License.
-#     You may obtain a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#     Unless required by applicable law or agreed to in writing, software
-#     distributed under the License is distributed on an "AS IS" BASIS,
-#     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#     See the License for the specific language governing permissions and
-#     limitations under the License.
-#
-#   ------------------------------------------------------------------------------
-from string import Template
-FPMMS_FIELD = "fixedProductMarketMakers"
-QUERY_FIELD = "query"
-ERROR_FIELD = "errors"
-DATA_FIELD = "data"
-ID_FIELD = "id"
-ANSWER_FIELD = "currentAnswer"
-QUESTION_FIELD = "question"
-OUTCOMES_FIELD = "outcomes"
-TITLE_FIELD = "title"
-ANSWER_TIMESTAMP_FIELD = "currentAnswerTimestamp"
-OPENING_TIMESTAMP_FIELD = "openingTimestamp"
-RESOLUTION_TIMESTAMP_FIELD = "resolutionTimestamp"
-CREATION_TIMESTAMP_FIELD = "creationTimestamp"
-LIQUIDITY_FIELD = "liquidityParameter"
-LIQUIDIY_MEASURE_FIELD = "liquidityMeasure"
-TOKEN_AMOUNTS_FIELD = "outcomeTokenAmounts"
-FPMMS_QUERY = Template(
-    """
-    {
-      ${fpmms_field}(
-        where: {
-          creator: "${creator}",
-          id_gt: "${fpmm_id}",
-          isPendingArbitration: false
-        },
-        orderBy: ${id_field}
-        first: ${first}
-      ){
-        ${id_field}
-        ${answer_field}
-        ${question_field} {
-          ${outcomes_field}
-        }
-        ${title_field}
-      }
-    }
-    """
-)
-omen_xdai_trades_query = Template(
-    """
-    {
-        fpmmTrades(
-            where: {
-                type: Buy,
-                fpmm_: {
-                    creator: "${fpmm_creator}"
-                    creationTimestamp_gte: "${fpmm_creationTimestamp_gte}",
-                    creationTimestamp_lt: "${fpmm_creationTimestamp_lte}"
-                },
-                creationTimestamp_gte: "${creationTimestamp_gte}",
-                creationTimestamp_lte: "${creationTimestamp_lte}"
-                id_gt: "${id_gt}"
-            }
-            first: ${first}
-            orderBy: id
-            orderDirection: asc
-        ) {
-            id
-            title
-            collateralToken
-            outcomeTokenMarginalPrice
-            oldOutcomeTokenMarginalPrice
-            type
-            creator {
-                id
-            }
-            creationTimestamp
-            collateralAmount
-            collateralAmountUSD
-            feeAmount
-            outcomeIndex
-            outcomeTokensTraded
-            transactionHash
-            fpmm {
-                id
-                outcomes
-                title
-                answerFinalizedTimestamp
-                currentAnswer
-                isPendingArbitration
-                arbitrationOccurred
-                openingTimestamp
-                condition {
-                    id
-                }
-            }
-        }
-    }
-    """
-)
-conditional_tokens_gc_user_query = Template(
-    """
-    {
-        user(id: "${id}") {
-            userPositions(
-                first: ${first}
-                where: {
-                    id_gt: "${userPositions_id_gt}"
-                }
-                orderBy: id
-            ) {
-                balance
-                id
-                position {
-                    id
-                    conditionIds
-                }
-                totalBalance
-                wrappedBalance
-            }
-        }
-    }
-    """
-)
-TRADES_QUERY = """
-query fpmms_query($fpmm: String, $id_gt: ID) {
-    fpmmTrades(
-        where: {fpmm: $fpmm, id_gt: $id_gt, type: Buy}
-        orderBy: id
-        orderDirection: asc
-        first: 1000
-    ) {
-        collateralAmount
-        outcomeIndex
-        outcomeTokensTraded
-        id
-        oldOutcomeTokenMarginalPrice
-        outcomeTokenMarginalPrice
-        type
-        collateralAmountUSD
-        creationTimestamp
-        feeAmount
-  }
-}
-"""

scripts/staking.py DELETED Viewed

@@ -1,302 +0,0 @@
-import json
-import sys
-from typing import Any, List
-from utils import RPC, DATA_DIR, TMP_DIR, JSON_DATA_DIR
-import requests
-from tqdm import tqdm
-from web3 import Web3
-import pandas as pd
-import pickle
-import os
-from concurrent.futures import ThreadPoolExecutor, as_completed
-NUM_WORKERS = 10
-DEPRECATED_STAKING_PROGRAMS = {
-    "quickstart_alpha_everest": "0x5add592ce0a1B5DceCebB5Dcac086Cd9F9e3eA5C",
-    "quickstart_alpha_alpine": "0x2Ef503950Be67a98746F484DA0bBAdA339DF3326",
-    "quickstart_alpha_coastal": "0x43fB32f25dce34EB76c78C7A42C8F40F84BCD237",
-}
-STAKING_PROGRAMS_QS = {
-    "quickstart_beta_hobbyist": "0x389B46c259631Acd6a69Bde8B6cEe218230bAE8C",
-    "quickstart_beta_hobbyist_2": "0x238EB6993b90a978ec6AAD7530d6429c949C08DA",
-    "quickstart_beta_expert": "0x5344B7DD311e5d3DdDd46A4f71481bD7b05AAA3e",
-    "quickstart_beta_expert_2": "0xb964e44c126410df341ae04B13aB10A985fE3513",
-    "quickstart_beta_expert_3": "0x80faD33Cadb5F53f9D29F02Db97D682E8b101618",
-    "quickstart_beta_expert_4": "0xaD9d891134443B443D7F30013c7e14Fe27F2E029",
-    "quickstart_beta_expert_5": "0xE56dF1E563De1B10715cB313D514af350D207212",
-    "quickstart_beta_expert_6": "0x2546214aEE7eEa4bEE7689C81231017CA231Dc93",
-    "quickstart_beta_expert_7": "0xD7A3C8b975f71030135f1a66e9e23164d54fF455",
-    "quickstart_beta_expert_8": "0x356C108D49C5eebd21c84c04E9162de41933030c",
-    "quickstart_beta_expert_9": "0x17dBAe44BC5618Cc254055b386A29576b4F87015",
-    "quickstart_beta_expert_10": "0xB0ef657b8302bd2c74B6E6D9B2b4b39145b19c6f",
-    "quickstart_beta_expert_11": "0x3112c1613eAC3dBAE3D4E38CeF023eb9E2C91CF7",
-    "quickstart_beta_expert_12": "0xF4a75F476801B3fBB2e7093aCDcc3576593Cc1fc",
-}
-STAKING_PROGRAMS_PEARL = {
-    "pearl_alpha": "0xEE9F19b5DF06c7E8Bfc7B28745dcf944C504198A",
-    "pearl_beta": "0xeF44Fb0842DDeF59D37f85D61A1eF492bbA6135d",
-    "pearl_beta_2": "0x1c2F82413666d2a3fD8bC337b0268e62dDF67434",
-    "pearl_beta_3": "0xBd59Ff0522aA773cB6074ce83cD1e4a05A457bc1",
-    "pearl_beta_4": "0x3052451e1eAee78e62E169AfdF6288F8791F2918",
-    "pearl_beta_5": "0x4Abe376Fda28c2F43b84884E5f822eA775DeA9F4",
-}
-SERVICE_REGISTRY_ADDRESS = "0x9338b5153AE39BB89f50468E608eD9d764B755fD"
-def _get_contract(address: str) -> Any:
-    w3 = Web3(Web3.HTTPProvider(RPC))
-    abi = _get_abi(address)
-    contract = w3.eth.contract(address=Web3.to_checksum_address(address), abi=abi)
-    return contract
-def _get_abi(address: str) -> List:
-    contract_abi_url = (
-        "https://gnosis.blockscout.com/api/v2/smart-contracts/{contract_address}"
-    )
-    response = requests.get(contract_abi_url.format(contract_address=address)).json()
-    if "result" in response:
-        result = response["result"]
-        try:
-            abi = json.loads(result)
-        except json.JSONDecodeError:
-            print("Error: Failed to parse 'result' field as JSON")
-            sys.exit(1)
-    else:
-        abi = response.get("abi")
-    return abi if abi else []
-def get_service_safe(service_id: int) -> str:
-    """Gets the service Safe"""
-    service_registry = _get_contract(SERVICE_REGISTRY_ADDRESS)
-    service_safe_address = service_registry.functions.getService(service_id).call()[1]
-    return service_safe_address
-def list_contract_functions(contract):
-    function_names = []
-    for item in contract.abi:
-        if item.get("type") == "function":
-            function_names.append(item.get("name"))
-    return function_names
-def get_service_data(service_registry: Any, service_id: int) -> dict:
-    tmp_map = {}
-    # Get the list of addresses
-    # print(f"getting addresses from service id ={service_id}")
-    # available_functions = list_contract_functions(service_registry)
-    # print("Available Contract Functions:")
-    # for func in available_functions:
-    #     print(f"- {func}")
-    data = service_registry.functions.getService(service_id).call()
-    try:
-        owner_data = service_registry.functions.ownerOf(service_id).call()
-    except Exception as e:
-        tqdm.write(f"Error: no owner data infor from {service_id}")
-        return None
-    # print(f"owner data = {owner_data}")
-    address = data[1]
-    state = data[-1]
-    # print(f"address = {address}")
-    # print(f"state={state}")
-    # PEARL trade
-    if address != "0x0000000000000000000000000000000000000000":
-        tmp_map[service_id] = {
-            "safe_address": address,
-            "state": state,
-            "owner_address": owner_data,
-        }
-    return tmp_map
-def update_service_map(start: int = 1, end: int = 2000):
-    if os.path.exists(DATA_DIR / "service_map.pkl"):
-        with open(DATA_DIR / "service_map.pkl", "rb") as f:
-            service_map = pickle.load(f)
-    else:
-        service_map = {}
-    print(f"updating service map from service id={start}")
-    # we do not know which is the last service id right now
-    service_registry = _get_contract(SERVICE_REGISTRY_ADDRESS)
-    with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
-        futures = []
-        for service_id in range(start, end):
-            futures.append(
-                executor.submit(
-                    get_service_data,
-                    service_registry,
-                    service_id,
-                )
-            )
-        for future in tqdm(
-            as_completed(futures),
-            total=len(futures),
-            desc=f"Fetching all service data from contracts",
-        ):
-            partial_dict = future.result()
-            if partial_dict:
-                service_map.update(partial_dict)
-    print(f"length of service map {len(service_map)}")
-    with open(DATA_DIR / "service_map.pkl", "wb") as f:
-        pickle.dump(service_map, f)
-def check_owner_staking_contract(owner_address: str) -> str:
-    staking = "non_staking"
-    owner_address = owner_address.lower()
-    # check quickstart staking contracts
-    qs_list = [x.lower() for x in STAKING_PROGRAMS_QS.values()]
-    if owner_address in qs_list:
-        return "quickstart"
-    # check pearl staking contracts
-    pearl_list = [x.lower() for x in STAKING_PROGRAMS_PEARL.values()]
-    if owner_address in pearl_list:
-        return "pearl"
-    # check legacy staking contracts
-    deprec_list = [x.lower() for x in DEPRECATED_STAKING_PROGRAMS.values()]
-    if owner_address in deprec_list:
-        return "quickstart"
-    return staking
-def get_trader_address_staking(trader_address: str, service_map: dict) -> str:
-    # check if there is any service id linked with that trader address
-    found_key = -1
-    for key, value in service_map.items():
-        if value["safe_address"].lower() == trader_address.lower():
-            # found a service
-            found_key = key
-            break
-    if found_key == -1:
-        return "non_Olas"
-    owner = service_map[found_key]["owner_address"]
-    return check_owner_staking_contract(owner_address=owner)
-def label_trades_by_staking(trades_df: pd.DataFrame, start: int = None) -> None:
-    with open(DATA_DIR / "service_map.pkl", "rb") as f:
-        service_map = pickle.load(f)
-    # get the last service id
-    keys = service_map.keys()
-    if start is None:
-        last_key = max(keys)
-    else:
-        last_key = start
-    print(f"last service key = {last_key}")
-    update_service_map(start=last_key)
-    all_traders = trades_df.trader_address.unique()
-    trades_df["staking"] = ""
-    for trader in tqdm(all_traders, desc="Labeling traders by staking", unit="trader"):
-        # tqdm.write(f"checking trader {trader}")
-        staking_label = get_trader_address_staking(trader, service_map)
-        if staking_label:
-            trades_df.loc[trades_df["trader_address"] == trader, "staking"] = (
-                staking_label
-            )
-        # tqdm.write(f"statking label {staking_label}")
-    return trades_df
-def generate_retention_activity_file():
-    tools = pd.read_parquet(TMP_DIR / "tools.parquet")
-    tools["request_time"] = pd.to_datetime(tools["request_time"])
-    tools["request_date"] = tools["request_time"].dt.date
-    tools = tools.sort_values(by="request_time", ascending=True)
-    reduced_tools_df = tools[
-        ["trader_address", "request_time", "market_creator", "request_date"]
-    ]
-    print(f"length of reduced tools before labeling = {len(reduced_tools_df)}")
-    reduced_tools_df = label_trades_by_staking(trades_df=reduced_tools_df)
-    print(f"labeling of tools activity. {reduced_tools_df.staking.value_counts()}")
-    print(f"length of reduced tools after labeling = {len(reduced_tools_df)}")
-    reduced_tools_df = reduced_tools_df.sort_values(by="request_time", ascending=True)
-    reduced_tools_df["month_year_week"] = (
-        pd.to_datetime(tools["request_time"])
-        .dt.to_period("W")
-        .dt.start_time.dt.strftime("%b-%d-%Y")
-    )
-    reduced_tools_df.to_parquet(TMP_DIR / "retention_activity.parquet")
-    return True
-def check_list_addresses(address_list: list):
-    with open(DATA_DIR / "service_map.pkl", "rb") as f:
-        service_map = pickle.load(f)
-    # check if it is part of any service id on the map
-    mapping = {}
-    print(f"length of service map={len(service_map)}")
-    keys = service_map.keys()
-    last_key = max(keys)
-    print(f"last service key = {last_key}")
-    update_service_map(start=last_key)
-    found_key = -1
-    trader_types = []
-    for trader_address in address_list:
-        for key, value in service_map.items():
-            if value["safe_address"].lower() == trader_address.lower():
-                # found a service
-                found_key = key
-                mapping[trader_address] = "Olas"
-                trader_types.append("Olas")
-                break
-        if found_key == -1:
-            mapping[trader_address] = "non_Olas"
-            trader_types.append("non_Olas")
-    return mapping
-def check_service_map():
-    with open(DATA_DIR / "service_map.pkl", "rb") as f:
-        service_map = pickle.load(f)
-    # check if it is part of any service id on the map
-    mapping = {}
-    print(f"length of service map={len(service_map)}")
-    keys = service_map.keys()
-    last_key = max(keys)
-    print(f"last key ={last_key}")
-    missing_keys = 0
-    for i in range(1, last_key):
-        if i not in keys:
-            missing_keys += 1
-            print(f"missing key = {i}")
-    print(f"total missing keys = {missing_keys}")
-if __name__ == "__main__":
-    # create_service_map()
-    trades_df = pd.read_parquet(JSON_DATA_DIR / "all_trades_df.parquet")
-    trades_df = trades_df.loc[trades_df["is_invalid"] == False]
-    trades_df = label_trades_by_staking(trades_df=trades_df)
-    print(trades_df.staking.value_counts())
-    # trades_df.to_parquet(TMP_DIR / "result_staking.parquet", index=False)
-    # generate_retention_activity_file()
-    # a_list = [
-    #     "0x027592700fafc4db3221bb662d7bdc7f546a2bb5",
-    #     "0x0845f4ad01a2f41da618848c7a9e56b64377965e",
-    # ]
-    # check_list_addresses(address_list=a_list)
-    # update_service_map()
-    # check_service_map()
-    # unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
-    # unknown_traders = label_trades_by_staking(trades_df=unknown_traders)
-    # unknown_traders.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)

scripts/tools.py DELETED Viewed

@@ -1,320 +0,0 @@
-#   -*- coding: utf-8 -*-
-#   ------------------------------------------------------------------------------
-#
-#     Copyright 2023 Valory AG
-#
-#     Licensed under the Apache License, Version 2.0 (the "License");
-#     you may not use this file except in compliance with the License.
-#     You may obtain a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#     Unless required by applicable law or agreed to in writing, software
-#     distributed under the License is distributed on an "AS IS" BASIS,
-#     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#     See the License for the specific language governing permissions and
-#     limitations under the License.
-#
-#   ------------------------------------------------------------------------------
-import json
-from typing import (
-    Optional,
-    List,
-    Dict,
-    Union,
-    Any,
-)
-import pandas as pd
-import requests
-from datetime import datetime
-from gnosis_timestamps import transform_timestamp_to_datetime
-from requests.adapters import HTTPAdapter
-from tqdm import tqdm
-from urllib3 import Retry
-from markets import add_market_creator
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from web3_utils import (
-    N_IPFS_RETRIES,
-)
-from utils import (
-    clean,
-    BLOCK_FIELD,
-    limit_text,
-    DATA_DIR,
-    JSON_DATA_DIR,
-    MechEvent,
-    MechEventName,
-    MechRequest,
-    MechResponse,
-    EVENT_TO_MECH_STRUCT,
-    REQUEST_ID,
-    HTTP,
-    HTTPS,
-    get_result_values,
-    get_vote,
-    get_win_probability,
-    get_prediction_values,
-)
-CONTRACTS_PATH = "contracts"
-MECH_TO_INFO = {
-    # this block number is when the creator had its first tx ever, and after this mech's creation
-    "0xff82123dfb52ab75c417195c5fdb87630145ae81": ("old_mech_abi.json", 28911547),
-    # this block number is when this mech was created
-    "0x77af31de935740567cf4ff1986d04b2c964a786a": ("new_mech_abi.json", 30776879),
-}
-# optionally set the latest block to stop searching for the delivered events
-EVENT_ARGUMENTS = "args"
-DATA = "data"
-IPFS_LINKS_SERIES_NAME = "ipfs_links"
-BACKOFF_FACTOR = 1
-STATUS_FORCELIST = [404, 500, 502, 503, 504]
-DEFAULT_FILENAME = "tools.parquet"
-ABI_ERROR = "The event signature did not match the provided ABI"
-# HTTP_TIMEOUT = 10
-# Increasing when ipfs is slow
-HTTP_TIMEOUT = 15
-IRRELEVANT_TOOLS = [
-    "openai-text-davinci-002",
-    "openai-text-davinci-003",
-    "openai-gpt-3.5-turbo",
-    "openai-gpt-4",
-    "stabilityai-stable-diffusion-v1-5",
-    "stabilityai-stable-diffusion-xl-beta-v2-2-2",
-    "stabilityai-stable-diffusion-512-v2-1",
-    "stabilityai-stable-diffusion-768-v2-1",
-    "deepmind-optimization-strong",
-    "deepmind-optimization",
-]
-# this is how frequently we will keep a snapshot of the progress so far in terms of blocks' batches
-# for example, the value 1 means that for every `BLOCKS_CHUNK_SIZE` blocks that we search,
-#  we also store the snapshot
-SNAPSHOT_RATE = 10
-NUM_WORKERS = 10
-GET_CONTENTS_BATCH_SIZE = 1000
-class TimestampedRetry(Retry):
-    def increment(self, *args, **kwargs):
-        print(f"Retry attempt at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
-        return super().increment(*args, **kwargs)
-def create_session() -> requests.Session:
-    """Create a session with a retry strategy."""
-    session = requests.Session()
-    retry_strategy = TimestampedRetry(
-        total=N_IPFS_RETRIES,
-        backoff_factor=BACKOFF_FACTOR,
-        status_forcelist=STATUS_FORCELIST,
-    )
-    adapter = HTTPAdapter(max_retries=retry_strategy)
-    for protocol in (HTTP, HTTPS):
-        session.mount(protocol, adapter)
-    return session
-def request(
-    session: requests.Session, url: str, timeout: int = HTTP_TIMEOUT
-) -> Optional[requests.Response]:
-    """Perform a request with a session."""
-    try:
-        response = session.get(url, timeout=timeout)
-        response.raise_for_status()
-    except requests.exceptions.HTTPError as exc:
-        tqdm.write(f"HTTP error occurred: {exc}.")
-    except Exception as exc:
-        tqdm.write(f"Unexpected error occurred: {exc}.")
-    else:
-        return response
-    return None
-def parse_ipfs_response(
-    session: requests.Session,
-    url: str,
-    event: MechEvent,
-    event_name: MechEventName,
-    response: requests.Response,
-) -> Optional[Dict[str, str]]:
-    """Parse a response from IPFS."""
-    try:
-        return response.json()
-    except requests.exceptions.JSONDecodeError:
-        # this is a workaround because the `metadata.json` file was introduced and removed multiple times
-        if event_name == MechEvent.REQUEST and url != event.ipfs_request_link:
-            url = event.ipfs_request_link
-            response = request(session, url)
-            if response is None:
-                tqdm.write(f"Skipping {event=}.")
-                return None
-            try:
-                return response.json()
-            except requests.exceptions.JSONDecodeError:
-                pass
-    tqdm.write(f"Failed to parse response into json for {url=}.")
-    return None
-def parse_ipfs_tools_content(
-    raw_content: Dict[str, str], event: MechEvent, event_name: MechEventName
-) -> Optional[Union[MechRequest, MechResponse]]:
-    """Parse tools content from IPFS."""
-    struct = EVENT_TO_MECH_STRUCT.get(event_name)
-    raw_content[REQUEST_ID] = str(event.requestId)
-    raw_content[BLOCK_FIELD] = str(event.for_block)
-    raw_content["sender"] = str(event.sender)
-    try:
-        mech_response = struct(**raw_content)
-    except (ValueError, TypeError, KeyError):
-        tqdm.write(f"Could not parse {limit_text(str(raw_content))}")
-        return None
-    if event_name == MechEventName.REQUEST and mech_response.tool in IRRELEVANT_TOOLS:
-        return None
-    return mech_response
-def parse_json_events(json_events: dict, keys_to_traverse: List[int]) -> pd.DataFrame:
-    """Function to parse the mech info in a json format"""
-    all_records = []
-    for key in keys_to_traverse:
-        try:
-            json_input = json_events[key]
-            output = {}
-            output["request_id"] = json_input["requestId"]
-            output["request_block"] = json_input["blockNumber"]
-            output["request_time"] = transform_timestamp_to_datetime(
-                int(json_input["blockTimestamp"])
-            )
-            output["tx_hash"] = json_input["transactionHash"]
-            output["prompt_request"] = json_input["ipfsContents"]["prompt"]
-            output["tool"] = json_input["ipfsContents"]["tool"]
-            output["nonce"] = json_input["ipfsContents"]["nonce"]
-            output["trader_address"] = json_input["sender"]
-            output["deliver_block"] = json_input["deliver"]["blockNumber"]
-            error_value, error_message, prediction_params = get_result_values(
-                json_input["deliver"]["ipfsContents"]["result"]
-            )
-            error_message_value = json_input.get("error_message", error_message)
-            output["error"] = error_value
-            output["error_message"] = error_message_value
-            output["prompt_response"] = json_input["deliver"]["ipfsContents"]["prompt"]
-            output["mech_address"] = json_input["deliver"]["sender"]
-            p_yes_value, p_no_value, confidence_value, info_utility_value = (
-                get_prediction_values(prediction_params)
-            )
-            output["p_yes"] = p_yes_value
-            output["p_no"] = p_no_value
-            output["confidence"] = confidence_value
-            output["info_utility"] = info_utility_value
-            output["vote"] = get_vote(p_yes_value, p_no_value)
-            output["win_probability"] = get_win_probability(p_yes_value, p_no_value)
-            all_records.append(output)
-        except Exception as e:
-            print(e)
-            print(f"Error parsing the key ={key}. Noted as error")
-            output["error"] = 1
-            output["error_message"] = "Response parsing error"
-            output["p_yes"] = None
-            output["p_no"] = None
-            output["confidence"] = None
-            output["info_utility"] = None
-            output["vote"] = None
-            output["win_probability"] = None
-            all_records.append(output)
-    return pd.DataFrame.from_dict(all_records, orient="columns")
-def transform_request(contents: pd.DataFrame) -> pd.DataFrame:
-    """Transform the requests dataframe."""
-    return clean(contents)
-def transform_deliver(contents: pd.DataFrame) -> pd.DataFrame:
-    """Transform the delivers dataframe."""
-    unpacked_result = pd.json_normalize(contents.result)
-    # # drop result column if it exists
-    if "result" in unpacked_result.columns:
-        unpacked_result.drop(columns=["result"], inplace=True)
-    # drop prompt column if it exists
-    if "prompt" in unpacked_result.columns:
-        unpacked_result.drop(columns=["prompt"], inplace=True)
-    # rename prompt column to prompt_deliver
-    unpacked_result.rename(columns={"prompt": "prompt_deliver"}, inplace=True)
-    contents = pd.concat((contents, unpacked_result), axis=1)
-    if "result" in contents.columns:
-        contents.drop(columns=["result"], inplace=True)
-    if "prompt" in contents.columns:
-        contents.drop(columns=["prompt"], inplace=True)
-    return clean(contents)
-def parse_store_json_events_parallel(json_events: Dict[str, Any], output_filename: str):
-    total_nr_events = len(json_events)
-    ids_to_traverse = list(json_events.keys())
-    print(f"Parsing {total_nr_events} events")
-    contents = []
-    with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
-        futures = []
-        for i in range(0, total_nr_events, GET_CONTENTS_BATCH_SIZE):
-            futures.append(
-                executor.submit(
-                    parse_json_events,
-                    json_events,
-                    ids_to_traverse[i : i + GET_CONTENTS_BATCH_SIZE],
-                )
-            )
-        for future in tqdm(
-            as_completed(futures),
-            total=len(futures),
-            desc=f"Fetching json contents",
-        ):
-            current_mech_contents = future.result()
-            contents.append(current_mech_contents)
-    tools = pd.concat(contents, ignore_index=True)
-    print(f"Adding market creators info. Length of the tools file = {len(tools)}")
-    tools = add_market_creator(tools)
-    print(
-        f"Length of the tools dataframe after adding market creators info= {len(tools)}"
-    )
-    print(tools.info())
-    try:
-        if "result" in tools.columns:
-            tools = tools.drop(columns=["result"])
-        tools.to_parquet(DATA_DIR / output_filename, index=False)
-    except Exception as e:
-        print(f"Failed to write tools data: {e}")
-    return tools
-def generate_tools_file(input_filename: str, output_filename: str):
-    """Function to parse the json mech events and generate the parquet tools file"""
-    try:
-        with open(JSON_DATA_DIR / input_filename, "r") as file:
-            file_contents = json.load(file)
-            parse_store_json_events_parallel(file_contents, output_filename)
-    except Exception as e:
-        print(f"An Exception happened while parsing the json events {e}")
-if __name__ == "__main__":
-    generate_tools_file()

scripts/tools_metrics.py DELETED Viewed

@@ -1,95 +0,0 @@
-import pandas as pd
-from typing import List
-from utils import TMP_DIR, INC_TOOLS, DATA_DIR
-def get_error_data_by_market(
-    tools_df: pd.DataFrame, inc_tools: List[str]
-) -> pd.DataFrame:
-    """Gets the error data for the given tools and calculates the error percentage."""
-    tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
-    error = (
-        tools_inc.groupby(
-            ["tool", "request_month_year_week", "market_creator", "error"], sort=False
-        )
-        .size()
-        .unstack()
-        .fillna(0)
-        .reset_index()
-    )
-    error["error_perc"] = (error[1] / (error[0] + error[1])) * 100
-    error["total_requests"] = error[0] + error[1]
-    return error
-def get_tool_winning_rate_by_market(
-    tools_df: pd.DataFrame, inc_tools: List[str]
-) -> pd.DataFrame:
-    """Gets the tool winning rate data for the given tools by market and calculates the winning percentage."""
-    tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
-    tools_non_error = tools_inc[tools_inc["error"] != 1]
-    tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace(
-        {"no": "No", "yes": "Yes"}
-    )
-    tools_non_error = tools_non_error[
-        tools_non_error["currentAnswer"].isin(["Yes", "No"])
-    ]
-    tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])]
-    tools_non_error["win"] = (
-        tools_non_error["currentAnswer"] == tools_non_error["vote"]
-    ).astype(int)
-    tools_non_error.columns = tools_non_error.columns.astype(str)
-    wins = (
-        tools_non_error.groupby(
-            ["tool", "request_month_year_week", "market_creator", "win"], sort=False
-        )
-        .size()
-        .unstack()
-        .fillna(0)
-    )
-    wins["win_perc"] = (wins[1] / (wins[0] + wins[1])) * 100
-    wins.reset_index(inplace=True)
-    wins["total_request"] = wins[0] + wins[1]
-    wins.columns = wins.columns.astype(str)
-    # Convert request_month_year_week to string and explicitly set type for Altair
-    # wins["request_month_year_week"] = wins["request_month_year_week"].astype(str)
-    return wins
-def prepare_tools(tools: pd.DataFrame) -> pd.DataFrame:
-    tools["request_time"] = pd.to_datetime(tools["request_time"])
-    tools = tools.sort_values(by="request_time", ascending=True)
-    tools["request_month_year_week"] = (
-        pd.to_datetime(tools["request_time"])
-        .dt.to_period("W")
-        .dt.start_time.dt.strftime("%b-%d-%Y")
-    )
-    # preparing the tools graph
-    # adding the total
-    tools_all = tools.copy(deep=True)
-    tools_all["market_creator"] = "all"
-    # merging both dataframes
-    tools = pd.concat([tools, tools_all], ignore_index=True)
-    tools = tools.sort_values(by="request_time", ascending=True)
-    return tools
-def compute_tools_based_datasets():
-    try:
-        tools_df = pd.read_parquet(TMP_DIR / "tools.parquet")
-        tools_df = prepare_tools(tools_df)
-    except Exception as e:
-        print(f"Error reading old tools parquet file {e}")
-        return None
-    # error by markets
-    error_by_markets = get_error_data_by_market(tools_df=tools_df, inc_tools=INC_TOOLS)
-    error_by_markets.to_parquet(DATA_DIR / "error_by_markets.parquet", index=False)
-    try:
-        tools_df = pd.read_parquet(TMP_DIR / "tools.parquet")
-        tools_df = prepare_tools(tools_df)
-    except Exception as e:
-        print(f"Error reading old tools parquet file {e}")
-        return None
-    winning_df = get_tool_winning_rate_by_market(tools_df, inc_tools=INC_TOOLS)
-    winning_df.to_parquet(DATA_DIR / "winning_df.parquet", index=False)

scripts/update_tools_accuracy.py DELETED Viewed

@@ -1,120 +0,0 @@
-import os
-import pandas as pd
-import ipfshttpclient
-from utils import INC_TOOLS
-from typing import List
-from utils import TMP_DIR, DATA_DIR
-ACCURACY_FILENAME = "tools_accuracy.csv"
-OLD_IPFS_SERVER = "/dns/registry.autonolas.tech/tcp/443/https"
-IPFS_SERVER = "/dns/registry.gcp.autonolas.tech/tcp/443/https"
-def update_tools_accuracy(
-    tools_acc: pd.DataFrame, tools_df: pd.DataFrame, inc_tools: List[str]
-) -> pd.DataFrame:
-    """To compute/update the latest accuracy information for the different mech tools"""
-    # computation of the accuracy information
-    tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
-    # filtering errors
-    tools_non_error = tools_inc[tools_inc["error"] != 1]
-    tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace(
-        {"no": "No", "yes": "Yes"}
-    )
-    tools_non_error = tools_non_error[
-        tools_non_error["currentAnswer"].isin(["Yes", "No"])
-    ]
-    tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])]
-    tools_non_error["win"] = (
-        tools_non_error["currentAnswer"] == tools_non_error["vote"]
-    ).astype(int)
-    tools_non_error.columns = tools_non_error.columns.astype(str)
-    wins = tools_non_error.groupby(["tool", "win"]).size().unstack().fillna(0)
-    wins["tool_accuracy"] = (wins[1] / (wins[0] + wins[1])) * 100
-    wins.reset_index(inplace=True)
-    wins["total_requests"] = wins[0] + wins[1]
-    wins.columns = wins.columns.astype(str)
-    wins = wins[["tool", "tool_accuracy", "total_requests"]]
-    no_timeline_info = False
-    try:
-        timeline = tools_non_error.groupby(["tool"])["request_time"].agg(["min", "max"])
-        print("timeline dataset")
-        print(timeline.head())
-        acc_info = wins.merge(timeline, how="left", on="tool")
-    except:
-        print("NO REQUEST TIME INFORMATION AVAILABLE")
-        no_timeline_info = True
-        acc_info = wins
-    if tools_acc is None:
-        print("Creating accuracy file for the first time")
-        return acc_info
-    # update the old information
-    print("Updating accuracy information")
-    tools_to_update = list(acc_info["tool"].values)
-    print("tools to update")
-    print(tools_to_update)
-    existing_tools = list(tools_acc["tool"].values)
-    # dt.strftime("%Y-%m-%d %H:%M:%S")
-    acc_info["min"] = acc_info["min"].dt.strftime("%Y-%m-%d %H:%M:%S")
-    acc_info["max"] = acc_info["max"].dt.strftime("%Y-%m-%d %H:%M:%S")
-    for tool in tools_to_update:
-        new_accuracy = acc_info[acc_info["tool"] == tool]["tool_accuracy"].values[0]
-        new_volume = acc_info[acc_info["tool"] == tool]["total_requests"].values[0]
-        if no_timeline_info:
-            new_min_timeline = None
-            new_max_timeline = None
-        else:
-            new_min_timeline = acc_info[acc_info["tool"] == tool]["min"].values[0]
-            new_max_timeline = acc_info[acc_info["tool"] == tool]["max"].values[0]
-        if tool in existing_tools:
-            tools_acc.loc[tools_acc["tool"] == tool, "tool_accuracy"] = new_accuracy
-            tools_acc.loc[tools_acc["tool"] == tool, "total_requests"] = new_volume
-            tools_acc.loc[tools_acc["tool"] == tool, "min"] = new_min_timeline
-            tools_acc.loc[tools_acc["tool"] == tool, "max"] = new_max_timeline
-        else:
-            # new tool to add to the file
-            # tool,tool_accuracy,total_requests,min,max
-            new_row = {
-                "tool": tool,
-                "tool_accuracy": new_accuracy,
-                "total_requests": new_volume,
-                "min": new_min_timeline,
-                "max": new_max_timeline,
-            }
-            tools_acc = pd.concat([tools_acc, pd.DataFrame(new_row)], ignore_index=True)
-    print(tools_acc)
-    return tools_acc
-def compute_tools_accuracy():
-    print("Computing accuracy of tools")
-    print("Reading tools parquet file")
-    tools = pd.read_parquet(TMP_DIR / "tools.parquet")
-    # Computing tools accuracy information
-    print("Computing tool accuracy information")
-    # Check if the file exists
-    acc_data = None
-    if os.path.exists(DATA_DIR / ACCURACY_FILENAME):
-        acc_data = pd.read_csv(DATA_DIR / ACCURACY_FILENAME)
-    acc_data = update_tools_accuracy(acc_data, tools, INC_TOOLS)
-    # save acc_data into a CSV file
-    print("Saving into a csv file")
-    acc_data.to_csv(DATA_DIR / ACCURACY_FILENAME, index=False)
-    print(acc_data.head())
-    # save the data into IPFS
-    client = ipfshttpclient.connect(IPFS_SERVER)
-    result = client.add(DATA_DIR / ACCURACY_FILENAME)
-    print(f"HASH of the tools accuracy file: {result['Hash']}")
-if __name__ == "__main__":
-    compute_tools_accuracy()

scripts/utils.py DELETED Viewed

@@ -1,431 +0,0 @@
-import json
-import os
-import time
-from typing import List, Any, Optional, Union, Tuple
-import numpy as np
-import pandas as pd
-import gc
-import re
-from dataclasses import dataclass
-from datetime import datetime, timezone
-from pathlib import Path
-from enum import Enum
-from string import Template
-from json.decoder import JSONDecodeError
-DEFAULT_MECH_FEE = 0.01
-REDUCE_FACTOR = 0.25
-SLEEP = 0.5
-REQUEST_ID_FIELD = "request_id"
-SCRIPTS_DIR = Path(__file__).parent
-ROOT_DIR = SCRIPTS_DIR.parent
-DATA_DIR = ROOT_DIR / "data"
-JSON_DATA_DIR = ROOT_DIR / "json_data"
-HIST_DIR = ROOT_DIR / "historical_data"
-TMP_DIR = ROOT_DIR / "tmp"
-BLOCK_FIELD = "block"
-CID_PREFIX = "f01701220"
-REQUEST_ID = "requestId"
-REQUEST_SENDER = "sender"
-PROMPT_FIELD = "prompt"
-HTTP = "http://"
-HTTPS = HTTP[:4] + "s" + HTTP[4:]
-FORMAT_UPDATE_BLOCK_NUMBER = 30411638
-INVALID_ANSWER_HEX = (
-    "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
-)
-OLD_IPFS_ADDRESS = "https://gateway.autonolas.tech/ipfs/"
-IPFS_ADDRESS = "https://gateway.gcp.autonolas.tech/ipfs/"
-INC_TOOLS = [
-    "prediction-online",
-    "prediction-offline",
-    "claude-prediction-online",
-    "claude-prediction-offline",
-    "prediction-offline-sme",
-    "prediction-online-sme",
-    "prediction-request-rag",
-    "prediction-request-reasoning",
-    "prediction-url-cot-claude",
-    "prediction-request-rag-claude",
-    "prediction-request-reasoning-claude",
-    "superforcaster",
-]
-SUBGRAPH_URL = Template(
-    """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/7s9rGBffUTL8kDZuxvvpuc46v44iuDarbrADBFw5uVp2"""
-)
-OMEN_SUBGRAPH_URL = Template(
-    """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/9fUVQpFwzpdWS9bq5WkAnmKbNNcoBwatMR4yZq81pbbz"""
-)
-NETWORK_SUBGRAPH_URL = Template(
-    """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/FxV6YUix58SpYmLBwc9gEHkwjfkqwe1X5FJQjn8nKPyA"""
-)
-# THEGRAPH_ENDPOINT = (
-#     "https://api.studio.thegraph.com/query/78829/mech-predict/version/latest"
-# )
-MECH_SUBGRAPH_URL = Template(
-    """https://gateway.thegraph.com/api/${subgraph_api_key}/subgraphs/id/4YGoX3iXUni1NBhWJS5xyKcntrAzssfytJK7PQxxQk5g"""
-)
-SUBGRAPH_API_KEY = os.environ.get("SUBGRAPH_API_KEY", None)
-RPC = os.environ.get("RPC", None)
-class MechEventName(Enum):
-    """The mech's event names."""
-    REQUEST = "Request"
-    DELIVER = "Deliver"
-@dataclass
-class MechEvent:
-    """A mech's on-chain event representation."""
-    for_block: int
-    requestId: int
-    data: bytes
-    sender: str
-    def _ipfs_link(self) -> Optional[str]:
-        """Get the ipfs link for the data."""
-        return f"{IPFS_ADDRESS}{CID_PREFIX}{self.data.hex()}"
-    @property
-    def ipfs_request_link(self) -> Optional[str]:
-        """Get the IPFS link for the request."""
-        return f"{self._ipfs_link()}/metadata.json"
-    @property
-    def ipfs_deliver_link(self) -> Optional[str]:
-        """Get the IPFS link for the deliver."""
-        if self.requestId is None:
-            return None
-        return f"{self._ipfs_link()}/{self.requestId}"
-    def ipfs_link(self, event_name: MechEventName) -> Optional[str]:
-        """Get the ipfs link based on the event."""
-        if event_name == MechEventName.REQUEST:
-            if self.for_block < FORMAT_UPDATE_BLOCK_NUMBER:
-                return self._ipfs_link()
-            return self.ipfs_request_link
-        if event_name == MechEventName.DELIVER:
-            return self.ipfs_deliver_link
-        return None
-@dataclass(init=False)
-class MechRequest:
-    """A structure for a request to a mech."""
-    request_id: Optional[int]
-    request_block: Optional[int]
-    prompt_request: Optional[str]
-    tool: Optional[str]
-    nonce: Optional[str]
-    trader_address: Optional[str]
-    def __init__(self, **kwargs: Any) -> None:
-        """Initialize the request ignoring extra keys."""
-        self.request_id = int(kwargs.pop(REQUEST_ID, 0))
-        self.request_block = int(kwargs.pop(BLOCK_FIELD, 0))
-        self.prompt_request = kwargs.pop(PROMPT_FIELD, None)
-        self.tool = kwargs.pop("tool", None)
-        self.nonce = kwargs.pop("nonce", None)
-        self.trader_address = kwargs.pop("sender", None)
-@dataclass(init=False)
-class PredictionResponse:
-    """A response of a prediction."""
-    p_yes: float
-    p_no: float
-    confidence: float
-    info_utility: float
-    vote: Optional[str]
-    win_probability: Optional[float]
-    def __init__(self, **kwargs: Any) -> None:
-        """Initialize the mech's prediction ignoring extra keys."""
-        try:
-            self.p_yes = float(kwargs.pop("p_yes"))
-            self.p_no = float(kwargs.pop("p_no"))
-            self.confidence = float(kwargs.pop("confidence"))
-            self.info_utility = float(kwargs.pop("info_utility"))
-            self.win_probability = 0
-            # Validate probabilities
-            probabilities = {
-                "p_yes": self.p_yes,
-                "p_no": self.p_no,
-                "confidence": self.confidence,
-                "info_utility": self.info_utility,
-            }
-            for name, prob in probabilities.items():
-                if not 0 <= prob <= 1:
-                    raise ValueError(f"{name} probability is out of bounds: {prob}")
-            if self.p_yes + self.p_no != 1:
-                raise ValueError(
-                    f"Sum of p_yes and p_no is not 1: {self.p_yes} + {self.p_no}"
-                )
-            self.vote = self.get_vote()
-            self.win_probability = self.get_win_probability()
-        except KeyError as e:
-            raise KeyError(f"Missing key in PredictionResponse: {e}")
-        except ValueError as e:
-            raise ValueError(f"Invalid value in PredictionResponse: {e}")
-    def get_vote(self) -> Optional[str]:
-        """Return the vote."""
-        if self.p_no == self.p_yes:
-            return None
-        if self.p_no > self.p_yes:
-            return "No"
-        return "Yes"
-    def get_win_probability(self) -> Optional[float]:
-        """Return the probability estimation for winning with vote."""
-        return max(self.p_no, self.p_yes)
-@dataclass(init=False)
-class MechResponse:
-    """A structure for the response of a mech."""
-    request_id: int
-    deliver_block: Optional[int]
-    result: Optional[PredictionResponse]
-    error: Optional[str]
-    error_message: Optional[str]
-    prompt_response: Optional[str]
-    mech_address: Optional[str]
-    def __init__(self, **kwargs: Any) -> None:
-        """Initialize the mech's response ignoring extra keys."""
-        self.error = kwargs.get("error", None)
-        self.request_id = int(kwargs.get(REQUEST_ID, 0))
-        self.deliver_block = int(kwargs.get(BLOCK_FIELD, 0))
-        self.result = kwargs.get("result", None)
-        self.prompt_response = kwargs.get(PROMPT_FIELD, None)
-        self.mech_address = kwargs.get("sender", None)
-        if self.result != "Invalid response":
-            self.error_message = kwargs.get("error_message", None)
-            try:
-                if isinstance(self.result, str):
-                    kwargs = json.loads(self.result)
-                    self.result = PredictionResponse(**kwargs)
-                    self.error = 0
-            except JSONDecodeError:
-                self.error_message = "Response parsing error"
-                self.error = 1
-            except Exception as e:
-                self.error_message = str(e)
-                self.error = 1
-        else:
-            self.error_message = "Invalid response from tool"
-            self.error = 1
-            self.result = None
-EVENT_TO_MECH_STRUCT = {
-    MechEventName.REQUEST: MechRequest,
-    MechEventName.DELIVER: MechResponse,
-}
-def transform_to_datetime(x):
-    return datetime.fromtimestamp(int(x), tz=timezone.utc)
-def measure_execution_time(func):
-    def wrapper(*args, **kwargs):
-        start_time = time.time()
-        result = func(*args, **kwargs)
-        end_time = time.time()
-        execution_time = end_time - start_time
-        print(f"Execution time: {execution_time:.6f} seconds")
-        return result
-    return wrapper
-def limit_text(text: str, limit: int = 200) -> str:
-    """Limit the given text"""
-    if len(text) > limit:
-        return f"{text[:limit]}..."
-    return text
-def check_for_dicts(df: pd.DataFrame) -> List[str]:
-    """Check for columns that contain dictionaries."""
-    dict_columns = []
-    for column in df.columns:
-        if df[column].apply(lambda x: isinstance(x, dict)).any():
-            dict_columns.append(column)
-    return dict_columns
-def drop_dict_rows(df: pd.DataFrame, dict_columns: List[str]) -> pd.DataFrame:
-    """Drop rows that contain dictionaries."""
-    for column in dict_columns:
-        df = df[~df[column].apply(lambda x: isinstance(x, dict))]
-    return df
-def clean(df: pd.DataFrame) -> pd.DataFrame:
-    """Clean the dataframe."""
-    dict_columns = check_for_dicts(df)
-    df = drop_dict_rows(df, dict_columns)
-    cleaned = df.drop_duplicates()
-    cleaned[REQUEST_ID_FIELD] = cleaned[REQUEST_ID_FIELD].astype("str")
-    return cleaned
-def gen_event_filename(event_name: MechEventName) -> str:
-    """Generate the filename of an event."""
-    return f"{event_name.value.lower()}s.parquet"
-def read_n_last_lines(filename: str, n: int = 1) -> str:
-    """Return the `n` last lines' content of a file."""
-    num_newlines = 0
-    with open(filename, "rb") as f:
-        try:
-            f.seek(-2, os.SEEK_END)
-            while num_newlines < n:
-                f.seek(-2, os.SEEK_CUR)
-                if f.read(1) == b"\n":
-                    num_newlines += 1
-        except OSError:
-            f.seek(0)
-        last_line = f.readline().decode()
-    return last_line
-def get_question(text: str) -> str:
-    """Get the question from a text."""
-    # Regex to find text within double quotes
-    pattern = r'"([^"]*)"'
-    # Find all occurrences
-    questions = re.findall(pattern, text)
-    # Assuming you want the first question if there are multiple
-    question = questions[0] if questions else None
-    return question
-def current_answer(text: str, fpmms: pd.DataFrame) -> Optional[str]:
-    """Get the current answer for a question."""
-    row = fpmms[fpmms["title"] == text]
-    if row.shape[0] == 0:
-        return None
-    return row["currentAnswer"].values[0]
-def convert_hex_to_int(x: Union[str, float]) -> Union[int, float]:
-    """Convert hex to int"""
-    if isinstance(x, float):
-        return np.nan
-    if isinstance(x, str):
-        if x == INVALID_ANSWER_HEX:
-            return -1
-        return int(x, 16)
-def wei_to_unit(wei: int) -> float:
-    """Converts wei to currency unit."""
-    return wei / 10**18
-def get_vote(p_yes, p_no) -> Optional[str]:
-    """Return the vote."""
-    if p_no == p_yes:
-        return None
-    if p_no > p_yes:
-        return "No"
-    return "Yes"
-def get_win_probability(p_yes, p_no) -> Optional[float]:
-    """Return the probability estimation for winning with vote."""
-    return max(p_no, p_yes)
-def get_result_values(result: str) -> Tuple:
-    if result == "Invalid response":
-        return 1, "Invalid response from tool", None
-    error_message = None
-    params = None
-    try:
-        if isinstance(result, str):
-            params = json.loads(result)
-            error_value = 0
-    except JSONDecodeError:
-        error_message = "Response parsing error"
-        error_value = 1
-    except Exception as e:
-        error_message = str(e)
-        error_value = 1
-    return error_value, error_message, params
-def get_prediction_values(params: dict) -> Tuple:
-    p_yes = float(params.pop("p_yes"))
-    p_no = float(params.pop("p_no"))
-    confidence = float(params.pop("confidence"))
-    info_utility = float(params.pop("info_utility"))
-    return p_yes, p_no, confidence, info_utility
-def to_content(q: str) -> dict[str, Any]:
-    """Convert the given query string to payload content, i.e., add it under a `queries` key and convert it to bytes."""
-    finalized_query = {
-        "query": q,
-        "variables": None,
-        "extensions": {"headers": None},
-    }
-    return finalized_query
-def read_parquet_files(tools_filename: str, trades_filename: str):
-    # Check if tools.parquet is in the same directory
-    try:
-        tools = pd.read_parquet(DATA_DIR / tools_filename)
-        # make sure creator_address is in the columns
-        assert "trader_address" in tools.columns, "trader_address column not found"
-        # lowercase and strip creator_address
-        tools["trader_address"] = tools["trader_address"].str.lower().str.strip()
-        # drop duplicates
-        tools.drop_duplicates(inplace=True)
-        print(f"{tools_filename} loaded")
-    except FileNotFoundError:
-        print("tools.parquet not found. Please run tools.py first.")
-        return
-    try:
-        fpmmTrades = pd.read_parquet(DATA_DIR / trades_filename)
-        fpmmTrades["trader_address"] = (
-            fpmmTrades["trader_address"].str.lower().str.strip()
-        )
-    except FileNotFoundError:
-        print("fpmmsTrades.parquet not found.")
-        return
-    return tools, fpmmTrades

scripts/web3_utils.py DELETED Viewed

@@ -1,276 +0,0 @@
-import sys
-import pickle
-import gc
-import time
-import requests
-from functools import partial
-from string import Template
-from datetime import datetime
-from concurrent.futures import ThreadPoolExecutor
-from collections import defaultdict
-from tqdm import tqdm
-from web3 import Web3
-from typing import Any, Optional
-from web3.types import BlockParams
-from utils import (
-    JSON_DATA_DIR,
-    DATA_DIR,
-    SUBGRAPH_API_KEY,
-    to_content,
-    SUBGRAPH_URL,
-    HIST_DIR,
-    TMP_DIR,
-)
-from queries import conditional_tokens_gc_user_query, omen_xdai_trades_query
-import pandas as pd
-REDUCE_FACTOR = 0.25
-SLEEP = 0.5
-QUERY_BATCH_SIZE = 1000
-FPMM_QS_CREATOR = "0x89c5cc945dd550bcffb72fe42bff002429f46fec"
-FPMM_PEARL_CREATOR = "0xFfc8029154ECD55ABED15BD428bA596E7D23f557"
-LATEST_BLOCK: Optional[int] = None
-LATEST_BLOCK_NAME: BlockParams = "latest"
-BLOCK_DATA_NUMBER = "number"
-BLOCKS_CHUNK_SIZE = 10_000
-N_IPFS_RETRIES = 4
-N_RPC_RETRIES = 100
-RPC_POLL_INTERVAL = 0.05
-SUBGRAPH_POLL_INTERVAL = 0.05
-IPFS_POLL_INTERVAL = 0.2  # 5 calls per second
-OMEN_SUBGRAPH_URL = Template(
-    """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/9fUVQpFwzpdWS9bq5WkAnmKbNNcoBwatMR4yZq81pbbz"""
-)
-headers = {
-    "Accept": "application/json, multipart/mixed",
-    "Content-Type": "application/json",
-}
-def parse_args() -> str:
-    """Parse the arguments and return the RPC."""
-    if len(sys.argv) != 2:
-        raise ValueError("Expected the RPC as a positional argument.")
-    return sys.argv[1]
-def read_abi(abi_path: str) -> str:
-    """Read and return the wxDAI contract's ABI."""
-    with open(abi_path) as abi_file:
-        return abi_file.read()
-def update_block_request_map(block_request_id_map: dict) -> None:
-    print("Saving block request id map info")
-    with open(JSON_DATA_DIR / "block_request_id_map.pickle", "wb") as handle:
-        pickle.dump(block_request_id_map, handle, protocol=pickle.HIGHEST_PROTOCOL)
-def reduce_window(contract_instance, event, from_block, batch_size, latest_block):
-    """Dynamically reduce the batch size window."""
-    keep_fraction = 1 - REDUCE_FACTOR
-    events_filter = contract_instance.events[event].build_filter()
-    events_filter.fromBlock = from_block
-    batch_size = int(batch_size * keep_fraction)
-    events_filter.toBlock = min(from_block + batch_size, latest_block)
-    tqdm.write(f"RPC timed out! Resizing batch size to {batch_size}.")
-    time.sleep(SLEEP)
-    return events_filter, batch_size
-def block_number_to_timestamp(block_number: int, web3: Web3) -> str:
-    """Convert a block number to a timestamp."""
-    block = web3.eth.get_block(block_number)
-    timestamp = datetime.utcfromtimestamp(block["timestamp"])
-    try:
-        timestamp_str = timestamp.strftime("%Y-%m-%d %H:%M:%S")
-        timestamp = datetime.strptime(timestamp_str, "%Y-%m-%dT%H:%M:%S.%f")
-    except Exception as e:
-        timestamp = datetime.utcfromtimestamp(block["timestamp"])
-    return timestamp.strftime("%Y-%m-%d %H:%M:%S")
-def parallelize_timestamp_conversion(df: pd.DataFrame, function: callable) -> list:
-    """Parallelize the timestamp conversion."""
-    block_numbers = df["request_block"].tolist()
-    with ThreadPoolExecutor(max_workers=10) as executor:
-        results = list(
-            tqdm(executor.map(function, block_numbers), total=len(block_numbers))
-        )
-    return results
-def updating_timestamps(rpc: str, tools_filename: str):
-    web3 = Web3(Web3.HTTPProvider(rpc))
-    tools = pd.read_parquet(TMP_DIR / tools_filename)
-    # Convert block number to timestamp
-    print("Converting block number to timestamp")
-    t_map = pickle.load(open(TMP_DIR / "t_map.pkl", "rb"))
-    tools["request_time"] = tools["request_block"].map(t_map)
-    no_data = tools["request_time"].isna().sum()
-    print(f"Total rows with no request time info = {no_data}")
-    # Identify tools with missing request_time and fill them
-    missing_time_indices = tools[tools["request_time"].isna()].index
-    if not missing_time_indices.empty:
-        partial_block_number_to_timestamp = partial(
-            block_number_to_timestamp, web3=web3
-        )
-        missing_timestamps = parallelize_timestamp_conversion(
-            tools.loc[missing_time_indices], partial_block_number_to_timestamp
-        )
-        # Update the original DataFrame with the missing timestamps
-        for i, timestamp in zip(missing_time_indices, missing_timestamps):
-            tools.at[i, "request_time"] = timestamp
-    tools["request_month_year"] = pd.to_datetime(tools["request_time"]).dt.strftime(
-        "%Y-%m"
-    )
-    tools["request_month_year_week"] = (
-        pd.to_datetime(tools["request_time"])
-        .dt.to_period("W")
-        .dt.start_time.dt.strftime("%b-%d-%Y")
-    )
-    # Save the tools data after the updates on the content
-    print(f"Updating file {tools_filename} with timestamps")
-    tools.to_parquet(TMP_DIR / tools_filename, index=False)
-    # Update t_map with new timestamps
-    new_timestamps = (
-        tools[["request_block", "request_time"]]
-        .dropna()
-        .set_index("request_block")
-        .to_dict()["request_time"]
-    )
-    t_map.update(new_timestamps)
-    # filtering old timestamps
-    cutoff_date = datetime(2024, 9, 9)
-    filtered_map = {
-        k: v
-        for k, v in t_map.items()
-        if datetime.strptime(v, "%Y-%m-%d %H:%M:%S") < cutoff_date
-    }
-    with open(DATA_DIR / "t_map.pkl", "wb") as f:
-        pickle.dump(filtered_map, f)
-    # clean and release all memory
-    del tools
-    del t_map
-    gc.collect()
-def query_conditional_tokens_gc_subgraph(creator: str) -> dict[str, Any]:
-    """Query the subgraph."""
-    subgraph = SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
-    all_results: dict[str, Any] = {"data": {"user": {"userPositions": []}}}
-    userPositions_id_gt = ""
-    while True:
-        query = conditional_tokens_gc_user_query.substitute(
-            id=creator.lower(),
-            first=QUERY_BATCH_SIZE,
-            userPositions_id_gt=userPositions_id_gt,
-        )
-        content_json = {"query": query}
-        # print("sending query to subgraph")
-        res = requests.post(subgraph, headers=headers, json=content_json)
-        result_json = res.json()
-        # print(f"result = {result_json}")
-        user_data = result_json.get("data", {}).get("user", {})
-        if not user_data:
-            break
-        user_positions = user_data.get("userPositions", [])
-        if user_positions:
-            all_results["data"]["user"]["userPositions"].extend(user_positions)
-            userPositions_id_gt = user_positions[len(user_positions) - 1]["id"]
-        else:
-            break
-    if len(all_results["data"]["user"]["userPositions"]) == 0:
-        return {"data": {"user": None}}
-    return all_results
-def query_omen_xdai_subgraph(
-    trader_category: str,
-    from_timestamp: float,
-    to_timestamp: float,
-    fpmm_from_timestamp: float,
-    fpmm_to_timestamp: float,
-) -> dict[str, Any]:
-    """Query the subgraph."""
-    omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
-    print(f"omen_subgraph = {omen_subgraph}")
-    grouped_results = defaultdict(list)
-    id_gt = ""
-    if trader_category == "quickstart":
-        creator_id = FPMM_QS_CREATOR.lower()
-    else:  # pearl
-        creator_id = FPMM_PEARL_CREATOR.lower()
-    while True:
-        query = omen_xdai_trades_query.substitute(
-            fpmm_creator=creator_id,
-            creationTimestamp_gte=int(from_timestamp),
-            creationTimestamp_lte=int(to_timestamp),
-            fpmm_creationTimestamp_gte=int(fpmm_from_timestamp),
-            fpmm_creationTimestamp_lte=int(fpmm_to_timestamp),
-            first=QUERY_BATCH_SIZE,
-            id_gt=id_gt,
-        )
-        print(f"omen query={query}")
-        content_json = to_content(query)
-        res = requests.post(omen_subgraph, headers=headers, json=content_json)
-        result_json = res.json()
-        # print(f"result = {result_json}")
-        user_trades = result_json.get("data", {}).get("fpmmTrades", [])
-        if not user_trades:
-            break
-        for trade in user_trades:
-            fpmm_id = trade.get("fpmm", {}).get("id")
-            grouped_results[fpmm_id].append(trade)
-        id_gt = user_trades[len(user_trades) - 1]["id"]
-    all_results = {
-        "data": {
-            "fpmmTrades": [
-                trade
-                for trades_list in grouped_results.values()
-                for trade in trades_list
-            ]
-        }
-    }
-    return all_results
-# def get_earliest_block(event_name: MechEventName) -> int:
-#     """Get the earliest block number to use when filtering for events."""
-#     filename = gen_event_filename(event_name)
-#     if not os.path.exists(DATA_DIR / filename):
-#         return 0
-#     df = pd.read_parquet(DATA_DIR / filename)
-#     block_field = f"{event_name.value.lower()}_{BLOCK_FIELD}"
-#     earliest_block = int(df[block_field].max())
-#     # clean and release all memory
-#     del df
-#     gc.collect()
-#     return earliest_block