rosacastillo commited on
Commit
82a220c
·
1 Parent(s): 6d1c646

cleaning old folders not used anymore here

Browse files
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import gradio as gr
2
  import pandas as pd
3
- import duckdb
4
  import gzip
5
  import shutil
6
  import os
@@ -98,6 +97,7 @@ def load_all_data():
98
 
99
  # Now read the decompressed parquet file
100
  df2 = pd.read_parquet(parquet_file_path)
 
101
 
102
  # tools_accuracy
103
  tools_accuracy = pd.read_csv(
@@ -135,59 +135,6 @@ def load_all_data():
135
  return df1, df2, df3, df4, df5, df6
136
 
137
 
138
- def get_all_data():
139
- """
140
- Get all data from the parquet files
141
- """
142
- logger.info("Getting all data")
143
-
144
- con = duckdb.connect(":memory:")
145
- query6 = f"""
146
- SELECT *
147
- FROM read_parquet('./data/winning_df.parquet')
148
- """
149
- df6 = con.execute(query6).fetchdf()
150
-
151
- query5 = f"""
152
- SELECT *
153
- FROM read_parquet('./data/unknown_traders.parquet')
154
- """
155
- df5 = con.execute(query5).fetchdf()
156
-
157
- # Query to fetch invalid trades data
158
- query4 = f"""
159
- SELECT *
160
- FROM read_parquet('./data/invalid_trades.parquet')
161
- """
162
- df4 = con.execute(query4).fetchdf()
163
-
164
- # Query to fetch tools accuracy data
165
- query3 = f"""
166
- SELECT *
167
- FROM read_csv('./data/tools_accuracy.csv')
168
- """
169
- df3 = con.execute(query3).fetchdf()
170
-
171
- # Query to fetch data from all_trades_profitability.parquet
172
- query2 = f"""
173
- SELECT *
174
- FROM read_parquet('./data/all_trades_profitability.parquet')
175
- """
176
- df2 = con.execute(query2).fetchdf()
177
- logger.info("Got all data from all_trades_profitability.parquet")
178
-
179
- query1 = f"""
180
- SELECT *
181
- FROM read_parquet('./data/error_by_markets.parquet')
182
- """
183
- df1 = con.execute(query1).fetchdf()
184
- logger.info("Got all data from error_by_markets.parquet")
185
-
186
- con.close()
187
-
188
- return df1, df2, df3, df4, df5, df6
189
-
190
-
191
  def prepare_data():
192
  """
193
  Prepare the data for the dashboard
 
1
  import gradio as gr
2
  import pandas as pd
 
3
  import gzip
4
  import shutil
5
  import os
 
97
 
98
  # Now read the decompressed parquet file
99
  df2 = pd.read_parquet(parquet_file_path)
100
+ os.remove(parquet_file_path)
101
 
102
  # tools_accuracy
103
  tools_accuracy = pd.read_csv(
 
135
  return df1, df2, df3, df4, df5, df6
136
 
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  def prepare_data():
139
  """
140
  Prepare the data for the dashboard
contracts/new_mech_abi.json DELETED
@@ -1,718 +0,0 @@
1
- [
2
- {
3
- "inputs": [
4
- {
5
- "internalType": "address",
6
- "name": "_token",
7
- "type": "address"
8
- },
9
- {
10
- "internalType": "uint256",
11
- "name": "_tokenId",
12
- "type": "uint256"
13
- },
14
- {
15
- "internalType": "uint256",
16
- "name": "_price",
17
- "type": "uint256"
18
- }
19
- ],
20
- "stateMutability": "nonpayable",
21
- "type": "constructor"
22
- },
23
- {
24
- "inputs": [
25
- {
26
- "internalType": "uint256",
27
- "name": "agentId",
28
- "type": "uint256"
29
- }
30
- ],
31
- "name": "AgentNotFound",
32
- "type": "error"
33
- },
34
- {
35
- "inputs": [
36
- {
37
- "internalType": "uint256",
38
- "name": "provided",
39
- "type": "uint256"
40
- },
41
- {
42
- "internalType": "uint256",
43
- "name": "expected",
44
- "type": "uint256"
45
- }
46
- ],
47
- "name": "NotEnoughPaid",
48
- "type": "error"
49
- },
50
- {
51
- "inputs": [
52
- {
53
- "internalType": "uint256",
54
- "name": "provided",
55
- "type": "uint256"
56
- },
57
- {
58
- "internalType": "uint256",
59
- "name": "max",
60
- "type": "uint256"
61
- }
62
- ],
63
- "name": "Overflow",
64
- "type": "error"
65
- },
66
- {
67
- "inputs": [
68
- {
69
- "internalType": "uint256",
70
- "name": "requestId",
71
- "type": "uint256"
72
- }
73
- ],
74
- "name": "RequestIdNotFound",
75
- "type": "error"
76
- },
77
- {
78
- "inputs": [],
79
- "name": "ZeroAddress",
80
- "type": "error"
81
- },
82
- {
83
- "anonymous": false,
84
- "inputs": [
85
- {
86
- "indexed": true,
87
- "internalType": "address",
88
- "name": "sender",
89
- "type": "address"
90
- },
91
- {
92
- "indexed": false,
93
- "internalType": "uint256",
94
- "name": "requestId",
95
- "type": "uint256"
96
- },
97
- {
98
- "indexed": false,
99
- "internalType": "bytes",
100
- "name": "data",
101
- "type": "bytes"
102
- }
103
- ],
104
- "name": "Deliver",
105
- "type": "event"
106
- },
107
- {
108
- "anonymous": false,
109
- "inputs": [
110
- {
111
- "indexed": false,
112
- "internalType": "uint256",
113
- "name": "price",
114
- "type": "uint256"
115
- }
116
- ],
117
- "name": "PriceUpdated",
118
- "type": "event"
119
- },
120
- {
121
- "anonymous": false,
122
- "inputs": [
123
- {
124
- "indexed": true,
125
- "internalType": "address",
126
- "name": "sender",
127
- "type": "address"
128
- },
129
- {
130
- "indexed": false,
131
- "internalType": "uint256",
132
- "name": "requestId",
133
- "type": "uint256"
134
- },
135
- {
136
- "indexed": false,
137
- "internalType": "bytes",
138
- "name": "data",
139
- "type": "bytes"
140
- }
141
- ],
142
- "name": "Request",
143
- "type": "event"
144
- },
145
- {
146
- "inputs": [
147
- {
148
- "internalType": "uint256",
149
- "name": "requestId",
150
- "type": "uint256"
151
- },
152
- {
153
- "internalType": "bytes",
154
- "name": "data",
155
- "type": "bytes"
156
- }
157
- ],
158
- "name": "deliver",
159
- "outputs": [],
160
- "stateMutability": "nonpayable",
161
- "type": "function"
162
- },
163
- {
164
- "inputs": [],
165
- "name": "entryPoint",
166
- "outputs": [
167
- {
168
- "internalType": "contract IEntryPoint",
169
- "name": "",
170
- "type": "address"
171
- }
172
- ],
173
- "stateMutability": "view",
174
- "type": "function"
175
- },
176
- {
177
- "inputs": [
178
- {
179
- "internalType": "address",
180
- "name": "to",
181
- "type": "address"
182
- },
183
- {
184
- "internalType": "uint256",
185
- "name": "value",
186
- "type": "uint256"
187
- },
188
- {
189
- "internalType": "bytes",
190
- "name": "data",
191
- "type": "bytes"
192
- },
193
- {
194
- "internalType": "enum Enum.Operation",
195
- "name": "operation",
196
- "type": "uint8"
197
- },
198
- {
199
- "internalType": "uint256",
200
- "name": "txGas",
201
- "type": "uint256"
202
- }
203
- ],
204
- "name": "exec",
205
- "outputs": [
206
- {
207
- "internalType": "bytes",
208
- "name": "returnData",
209
- "type": "bytes"
210
- }
211
- ],
212
- "stateMutability": "nonpayable",
213
- "type": "function"
214
- },
215
- {
216
- "inputs": [
217
- {
218
- "internalType": "address",
219
- "name": "account",
220
- "type": "address"
221
- },
222
- {
223
- "internalType": "bytes",
224
- "name": "data",
225
- "type": "bytes"
226
- }
227
- ],
228
- "name": "getRequestId",
229
- "outputs": [
230
- {
231
- "internalType": "uint256",
232
- "name": "requestId",
233
- "type": "uint256"
234
- }
235
- ],
236
- "stateMutability": "pure",
237
- "type": "function"
238
- },
239
- {
240
- "inputs": [
241
- {
242
- "internalType": "address",
243
- "name": "account",
244
- "type": "address"
245
- }
246
- ],
247
- "name": "getRequestsCount",
248
- "outputs": [
249
- {
250
- "internalType": "uint256",
251
- "name": "requestsCount",
252
- "type": "uint256"
253
- }
254
- ],
255
- "stateMutability": "view",
256
- "type": "function"
257
- },
258
- {
259
- "inputs": [
260
- {
261
- "internalType": "uint256",
262
- "name": "size",
263
- "type": "uint256"
264
- },
265
- {
266
- "internalType": "uint256",
267
- "name": "offset",
268
- "type": "uint256"
269
- }
270
- ],
271
- "name": "getUndeliveredRequestIds",
272
- "outputs": [
273
- {
274
- "internalType": "uint256[]",
275
- "name": "requestIds",
276
- "type": "uint256[]"
277
- }
278
- ],
279
- "stateMutability": "view",
280
- "type": "function"
281
- },
282
- {
283
- "inputs": [
284
- {
285
- "internalType": "address",
286
- "name": "signer",
287
- "type": "address"
288
- }
289
- ],
290
- "name": "isOperator",
291
- "outputs": [
292
- {
293
- "internalType": "bool",
294
- "name": "",
295
- "type": "bool"
296
- }
297
- ],
298
- "stateMutability": "view",
299
- "type": "function"
300
- },
301
- {
302
- "inputs": [
303
- {
304
- "internalType": "bytes32",
305
- "name": "hash",
306
- "type": "bytes32"
307
- },
308
- {
309
- "internalType": "bytes",
310
- "name": "signature",
311
- "type": "bytes"
312
- }
313
- ],
314
- "name": "isValidSignature",
315
- "outputs": [
316
- {
317
- "internalType": "bytes4",
318
- "name": "magicValue",
319
- "type": "bytes4"
320
- }
321
- ],
322
- "stateMutability": "view",
323
- "type": "function"
324
- },
325
- {
326
- "inputs": [
327
- {
328
- "internalType": "uint256",
329
- "name": "",
330
- "type": "uint256"
331
- },
332
- {
333
- "internalType": "uint256",
334
- "name": "",
335
- "type": "uint256"
336
- }
337
- ],
338
- "name": "mapRequestIds",
339
- "outputs": [
340
- {
341
- "internalType": "uint256",
342
- "name": "",
343
- "type": "uint256"
344
- }
345
- ],
346
- "stateMutability": "view",
347
- "type": "function"
348
- },
349
- {
350
- "inputs": [
351
- {
352
- "internalType": "address",
353
- "name": "",
354
- "type": "address"
355
- }
356
- ],
357
- "name": "mapRequestsCounts",
358
- "outputs": [
359
- {
360
- "internalType": "uint256",
361
- "name": "",
362
- "type": "uint256"
363
- }
364
- ],
365
- "stateMutability": "view",
366
- "type": "function"
367
- },
368
- {
369
- "inputs": [],
370
- "name": "nonce",
371
- "outputs": [
372
- {
373
- "internalType": "uint256",
374
- "name": "",
375
- "type": "uint256"
376
- }
377
- ],
378
- "stateMutability": "view",
379
- "type": "function"
380
- },
381
- {
382
- "inputs": [],
383
- "name": "numUndeliveredRequests",
384
- "outputs": [
385
- {
386
- "internalType": "uint256",
387
- "name": "",
388
- "type": "uint256"
389
- }
390
- ],
391
- "stateMutability": "view",
392
- "type": "function"
393
- },
394
- {
395
- "inputs": [
396
- {
397
- "internalType": "address",
398
- "name": "",
399
- "type": "address"
400
- },
401
- {
402
- "internalType": "address",
403
- "name": "",
404
- "type": "address"
405
- },
406
- {
407
- "internalType": "uint256[]",
408
- "name": "",
409
- "type": "uint256[]"
410
- },
411
- {
412
- "internalType": "uint256[]",
413
- "name": "",
414
- "type": "uint256[]"
415
- },
416
- {
417
- "internalType": "bytes",
418
- "name": "",
419
- "type": "bytes"
420
- }
421
- ],
422
- "name": "onERC1155BatchReceived",
423
- "outputs": [
424
- {
425
- "internalType": "bytes4",
426
- "name": "",
427
- "type": "bytes4"
428
- }
429
- ],
430
- "stateMutability": "pure",
431
- "type": "function"
432
- },
433
- {
434
- "inputs": [
435
- {
436
- "internalType": "address",
437
- "name": "",
438
- "type": "address"
439
- },
440
- {
441
- "internalType": "address",
442
- "name": "",
443
- "type": "address"
444
- },
445
- {
446
- "internalType": "uint256",
447
- "name": "",
448
- "type": "uint256"
449
- },
450
- {
451
- "internalType": "uint256",
452
- "name": "",
453
- "type": "uint256"
454
- },
455
- {
456
- "internalType": "bytes",
457
- "name": "",
458
- "type": "bytes"
459
- }
460
- ],
461
- "name": "onERC1155Received",
462
- "outputs": [
463
- {
464
- "internalType": "bytes4",
465
- "name": "",
466
- "type": "bytes4"
467
- }
468
- ],
469
- "stateMutability": "pure",
470
- "type": "function"
471
- },
472
- {
473
- "inputs": [
474
- {
475
- "internalType": "address",
476
- "name": "",
477
- "type": "address"
478
- },
479
- {
480
- "internalType": "address",
481
- "name": "",
482
- "type": "address"
483
- },
484
- {
485
- "internalType": "uint256",
486
- "name": "",
487
- "type": "uint256"
488
- },
489
- {
490
- "internalType": "bytes",
491
- "name": "",
492
- "type": "bytes"
493
- }
494
- ],
495
- "name": "onERC721Received",
496
- "outputs": [
497
- {
498
- "internalType": "bytes4",
499
- "name": "",
500
- "type": "bytes4"
501
- }
502
- ],
503
- "stateMutability": "pure",
504
- "type": "function"
505
- },
506
- {
507
- "inputs": [],
508
- "name": "price",
509
- "outputs": [
510
- {
511
- "internalType": "uint256",
512
- "name": "",
513
- "type": "uint256"
514
- }
515
- ],
516
- "stateMutability": "view",
517
- "type": "function"
518
- },
519
- {
520
- "inputs": [
521
- {
522
- "internalType": "bytes",
523
- "name": "data",
524
- "type": "bytes"
525
- }
526
- ],
527
- "name": "request",
528
- "outputs": [
529
- {
530
- "internalType": "uint256",
531
- "name": "requestId",
532
- "type": "uint256"
533
- }
534
- ],
535
- "stateMutability": "payable",
536
- "type": "function"
537
- },
538
- {
539
- "inputs": [
540
- {
541
- "internalType": "uint256",
542
- "name": "newPrice",
543
- "type": "uint256"
544
- }
545
- ],
546
- "name": "setPrice",
547
- "outputs": [],
548
- "stateMutability": "nonpayable",
549
- "type": "function"
550
- },
551
- {
552
- "inputs": [
553
- {
554
- "internalType": "bytes",
555
- "name": "initParams",
556
- "type": "bytes"
557
- }
558
- ],
559
- "name": "setUp",
560
- "outputs": [],
561
- "stateMutability": "nonpayable",
562
- "type": "function"
563
- },
564
- {
565
- "inputs": [],
566
- "name": "token",
567
- "outputs": [
568
- {
569
- "internalType": "contract IERC721",
570
- "name": "",
571
- "type": "address"
572
- }
573
- ],
574
- "stateMutability": "view",
575
- "type": "function"
576
- },
577
- {
578
- "inputs": [],
579
- "name": "tokenId",
580
- "outputs": [
581
- {
582
- "internalType": "uint256",
583
- "name": "",
584
- "type": "uint256"
585
- }
586
- ],
587
- "stateMutability": "view",
588
- "type": "function"
589
- },
590
- {
591
- "inputs": [
592
- {
593
- "internalType": "address",
594
- "name": "",
595
- "type": "address"
596
- },
597
- {
598
- "internalType": "address",
599
- "name": "",
600
- "type": "address"
601
- },
602
- {
603
- "internalType": "address",
604
- "name": "",
605
- "type": "address"
606
- },
607
- {
608
- "internalType": "uint256",
609
- "name": "",
610
- "type": "uint256"
611
- },
612
- {
613
- "internalType": "bytes",
614
- "name": "",
615
- "type": "bytes"
616
- },
617
- {
618
- "internalType": "bytes",
619
- "name": "",
620
- "type": "bytes"
621
- }
622
- ],
623
- "name": "tokensReceived",
624
- "outputs": [],
625
- "stateMutability": "pure",
626
- "type": "function"
627
- },
628
- {
629
- "inputs": [
630
- {
631
- "components": [
632
- {
633
- "internalType": "address",
634
- "name": "sender",
635
- "type": "address"
636
- },
637
- {
638
- "internalType": "uint256",
639
- "name": "nonce",
640
- "type": "uint256"
641
- },
642
- {
643
- "internalType": "bytes",
644
- "name": "initCode",
645
- "type": "bytes"
646
- },
647
- {
648
- "internalType": "bytes",
649
- "name": "callData",
650
- "type": "bytes"
651
- },
652
- {
653
- "internalType": "uint256",
654
- "name": "callGasLimit",
655
- "type": "uint256"
656
- },
657
- {
658
- "internalType": "uint256",
659
- "name": "verificationGasLimit",
660
- "type": "uint256"
661
- },
662
- {
663
- "internalType": "uint256",
664
- "name": "preVerificationGas",
665
- "type": "uint256"
666
- },
667
- {
668
- "internalType": "uint256",
669
- "name": "maxFeePerGas",
670
- "type": "uint256"
671
- },
672
- {
673
- "internalType": "uint256",
674
- "name": "maxPriorityFeePerGas",
675
- "type": "uint256"
676
- },
677
- {
678
- "internalType": "bytes",
679
- "name": "paymasterAndData",
680
- "type": "bytes"
681
- },
682
- {
683
- "internalType": "bytes",
684
- "name": "signature",
685
- "type": "bytes"
686
- }
687
- ],
688
- "internalType": "struct UserOperation",
689
- "name": "userOp",
690
- "type": "tuple"
691
- },
692
- {
693
- "internalType": "bytes32",
694
- "name": "userOpHash",
695
- "type": "bytes32"
696
- },
697
- {
698
- "internalType": "uint256",
699
- "name": "missingAccountFunds",
700
- "type": "uint256"
701
- }
702
- ],
703
- "name": "validateUserOp",
704
- "outputs": [
705
- {
706
- "internalType": "uint256",
707
- "name": "validationData",
708
- "type": "uint256"
709
- }
710
- ],
711
- "stateMutability": "nonpayable",
712
- "type": "function"
713
- },
714
- {
715
- "stateMutability": "payable",
716
- "type": "receive"
717
- }
718
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
contracts/old_mech_abi.json DELETED
@@ -1,605 +0,0 @@
1
- [
2
- {
3
- "inputs": [
4
- {
5
- "internalType": "address",
6
- "name": "_token",
7
- "type": "address"
8
- },
9
- {
10
- "internalType": "uint256",
11
- "name": "_tokenId",
12
- "type": "uint256"
13
- },
14
- {
15
- "internalType": "uint256",
16
- "name": "_price",
17
- "type": "uint256"
18
- }
19
- ],
20
- "stateMutability": "nonpayable",
21
- "type": "constructor"
22
- },
23
- {
24
- "inputs": [
25
- {
26
- "internalType": "uint256",
27
- "name": "agentId",
28
- "type": "uint256"
29
- }
30
- ],
31
- "name": "AgentNotFound",
32
- "type": "error"
33
- },
34
- {
35
- "inputs": [
36
- {
37
- "internalType": "uint256",
38
- "name": "provided",
39
- "type": "uint256"
40
- },
41
- {
42
- "internalType": "uint256",
43
- "name": "expected",
44
- "type": "uint256"
45
- }
46
- ],
47
- "name": "NotEnoughPaid",
48
- "type": "error"
49
- },
50
- {
51
- "inputs": [],
52
- "name": "ZeroAddress",
53
- "type": "error"
54
- },
55
- {
56
- "anonymous": false,
57
- "inputs": [
58
- {
59
- "indexed": false,
60
- "internalType": "uint256",
61
- "name": "requestId",
62
- "type": "uint256"
63
- },
64
- {
65
- "indexed": false,
66
- "internalType": "bytes",
67
- "name": "data",
68
- "type": "bytes"
69
- }
70
- ],
71
- "name": "Deliver",
72
- "type": "event"
73
- },
74
- {
75
- "anonymous": false,
76
- "inputs": [
77
- {
78
- "indexed": true,
79
- "internalType": "address",
80
- "name": "sender",
81
- "type": "address"
82
- },
83
- {
84
- "indexed": false,
85
- "internalType": "bytes32",
86
- "name": "taskHash",
87
- "type": "bytes32"
88
- }
89
- ],
90
- "name": "Perform",
91
- "type": "event"
92
- },
93
- {
94
- "anonymous": false,
95
- "inputs": [
96
- {
97
- "indexed": false,
98
- "internalType": "uint256",
99
- "name": "price",
100
- "type": "uint256"
101
- }
102
- ],
103
- "name": "PriceUpdated",
104
- "type": "event"
105
- },
106
- {
107
- "anonymous": false,
108
- "inputs": [
109
- {
110
- "indexed": true,
111
- "internalType": "address",
112
- "name": "sender",
113
- "type": "address"
114
- },
115
- {
116
- "indexed": false,
117
- "internalType": "uint256",
118
- "name": "requestId",
119
- "type": "uint256"
120
- },
121
- {
122
- "indexed": false,
123
- "internalType": "bytes",
124
- "name": "data",
125
- "type": "bytes"
126
- }
127
- ],
128
- "name": "Request",
129
- "type": "event"
130
- },
131
- {
132
- "inputs": [
133
- {
134
- "internalType": "uint256",
135
- "name": "requestId",
136
- "type": "uint256"
137
- },
138
- {
139
- "internalType": "bytes",
140
- "name": "data",
141
- "type": "bytes"
142
- }
143
- ],
144
- "name": "deliver",
145
- "outputs": [],
146
- "stateMutability": "nonpayable",
147
- "type": "function"
148
- },
149
- {
150
- "inputs": [],
151
- "name": "entryPoint",
152
- "outputs": [
153
- {
154
- "internalType": "contract IEntryPoint",
155
- "name": "",
156
- "type": "address"
157
- }
158
- ],
159
- "stateMutability": "view",
160
- "type": "function"
161
- },
162
- {
163
- "inputs": [
164
- {
165
- "internalType": "address",
166
- "name": "to",
167
- "type": "address"
168
- },
169
- {
170
- "internalType": "uint256",
171
- "name": "value",
172
- "type": "uint256"
173
- },
174
- {
175
- "internalType": "bytes",
176
- "name": "data",
177
- "type": "bytes"
178
- },
179
- {
180
- "internalType": "enum Enum.Operation",
181
- "name": "operation",
182
- "type": "uint8"
183
- },
184
- {
185
- "internalType": "uint256",
186
- "name": "txGas",
187
- "type": "uint256"
188
- }
189
- ],
190
- "name": "exec",
191
- "outputs": [
192
- {
193
- "internalType": "bytes",
194
- "name": "returnData",
195
- "type": "bytes"
196
- }
197
- ],
198
- "stateMutability": "nonpayable",
199
- "type": "function"
200
- },
201
- {
202
- "inputs": [
203
- {
204
- "internalType": "address",
205
- "name": "account",
206
- "type": "address"
207
- },
208
- {
209
- "internalType": "bytes",
210
- "name": "data",
211
- "type": "bytes"
212
- }
213
- ],
214
- "name": "getRequestId",
215
- "outputs": [
216
- {
217
- "internalType": "uint256",
218
- "name": "requestId",
219
- "type": "uint256"
220
- }
221
- ],
222
- "stateMutability": "pure",
223
- "type": "function"
224
- },
225
- {
226
- "inputs": [
227
- {
228
- "internalType": "address",
229
- "name": "signer",
230
- "type": "address"
231
- }
232
- ],
233
- "name": "isOperator",
234
- "outputs": [
235
- {
236
- "internalType": "bool",
237
- "name": "",
238
- "type": "bool"
239
- }
240
- ],
241
- "stateMutability": "view",
242
- "type": "function"
243
- },
244
- {
245
- "inputs": [
246
- {
247
- "internalType": "bytes32",
248
- "name": "hash",
249
- "type": "bytes32"
250
- },
251
- {
252
- "internalType": "bytes",
253
- "name": "signature",
254
- "type": "bytes"
255
- }
256
- ],
257
- "name": "isValidSignature",
258
- "outputs": [
259
- {
260
- "internalType": "bytes4",
261
- "name": "magicValue",
262
- "type": "bytes4"
263
- }
264
- ],
265
- "stateMutability": "view",
266
- "type": "function"
267
- },
268
- {
269
- "inputs": [],
270
- "name": "nonce",
271
- "outputs": [
272
- {
273
- "internalType": "uint256",
274
- "name": "",
275
- "type": "uint256"
276
- }
277
- ],
278
- "stateMutability": "view",
279
- "type": "function"
280
- },
281
- {
282
- "inputs": [
283
- {
284
- "internalType": "address",
285
- "name": "",
286
- "type": "address"
287
- },
288
- {
289
- "internalType": "address",
290
- "name": "",
291
- "type": "address"
292
- },
293
- {
294
- "internalType": "uint256[]",
295
- "name": "",
296
- "type": "uint256[]"
297
- },
298
- {
299
- "internalType": "uint256[]",
300
- "name": "",
301
- "type": "uint256[]"
302
- },
303
- {
304
- "internalType": "bytes",
305
- "name": "",
306
- "type": "bytes"
307
- }
308
- ],
309
- "name": "onERC1155BatchReceived",
310
- "outputs": [
311
- {
312
- "internalType": "bytes4",
313
- "name": "",
314
- "type": "bytes4"
315
- }
316
- ],
317
- "stateMutability": "pure",
318
- "type": "function"
319
- },
320
- {
321
- "inputs": [
322
- {
323
- "internalType": "address",
324
- "name": "",
325
- "type": "address"
326
- },
327
- {
328
- "internalType": "address",
329
- "name": "",
330
- "type": "address"
331
- },
332
- {
333
- "internalType": "uint256",
334
- "name": "",
335
- "type": "uint256"
336
- },
337
- {
338
- "internalType": "uint256",
339
- "name": "",
340
- "type": "uint256"
341
- },
342
- {
343
- "internalType": "bytes",
344
- "name": "",
345
- "type": "bytes"
346
- }
347
- ],
348
- "name": "onERC1155Received",
349
- "outputs": [
350
- {
351
- "internalType": "bytes4",
352
- "name": "",
353
- "type": "bytes4"
354
- }
355
- ],
356
- "stateMutability": "pure",
357
- "type": "function"
358
- },
359
- {
360
- "inputs": [
361
- {
362
- "internalType": "address",
363
- "name": "",
364
- "type": "address"
365
- },
366
- {
367
- "internalType": "address",
368
- "name": "",
369
- "type": "address"
370
- },
371
- {
372
- "internalType": "uint256",
373
- "name": "",
374
- "type": "uint256"
375
- },
376
- {
377
- "internalType": "bytes",
378
- "name": "",
379
- "type": "bytes"
380
- }
381
- ],
382
- "name": "onERC721Received",
383
- "outputs": [
384
- {
385
- "internalType": "bytes4",
386
- "name": "",
387
- "type": "bytes4"
388
- }
389
- ],
390
- "stateMutability": "pure",
391
- "type": "function"
392
- },
393
- {
394
- "inputs": [],
395
- "name": "price",
396
- "outputs": [
397
- {
398
- "internalType": "uint256",
399
- "name": "",
400
- "type": "uint256"
401
- }
402
- ],
403
- "stateMutability": "view",
404
- "type": "function"
405
- },
406
- {
407
- "inputs": [
408
- {
409
- "internalType": "bytes",
410
- "name": "data",
411
- "type": "bytes"
412
- }
413
- ],
414
- "name": "request",
415
- "outputs": [
416
- {
417
- "internalType": "uint256",
418
- "name": "requestId",
419
- "type": "uint256"
420
- }
421
- ],
422
- "stateMutability": "payable",
423
- "type": "function"
424
- },
425
- {
426
- "inputs": [
427
- {
428
- "internalType": "uint256",
429
- "name": "newPrice",
430
- "type": "uint256"
431
- }
432
- ],
433
- "name": "setPrice",
434
- "outputs": [],
435
- "stateMutability": "nonpayable",
436
- "type": "function"
437
- },
438
- {
439
- "inputs": [
440
- {
441
- "internalType": "bytes",
442
- "name": "initParams",
443
- "type": "bytes"
444
- }
445
- ],
446
- "name": "setUp",
447
- "outputs": [],
448
- "stateMutability": "nonpayable",
449
- "type": "function"
450
- },
451
- {
452
- "inputs": [],
453
- "name": "token",
454
- "outputs": [
455
- {
456
- "internalType": "contract IERC721",
457
- "name": "",
458
- "type": "address"
459
- }
460
- ],
461
- "stateMutability": "view",
462
- "type": "function"
463
- },
464
- {
465
- "inputs": [],
466
- "name": "tokenId",
467
- "outputs": [
468
- {
469
- "internalType": "uint256",
470
- "name": "",
471
- "type": "uint256"
472
- }
473
- ],
474
- "stateMutability": "view",
475
- "type": "function"
476
- },
477
- {
478
- "inputs": [
479
- {
480
- "internalType": "address",
481
- "name": "",
482
- "type": "address"
483
- },
484
- {
485
- "internalType": "address",
486
- "name": "",
487
- "type": "address"
488
- },
489
- {
490
- "internalType": "address",
491
- "name": "",
492
- "type": "address"
493
- },
494
- {
495
- "internalType": "uint256",
496
- "name": "",
497
- "type": "uint256"
498
- },
499
- {
500
- "internalType": "bytes",
501
- "name": "",
502
- "type": "bytes"
503
- },
504
- {
505
- "internalType": "bytes",
506
- "name": "",
507
- "type": "bytes"
508
- }
509
- ],
510
- "name": "tokensReceived",
511
- "outputs": [],
512
- "stateMutability": "pure",
513
- "type": "function"
514
- },
515
- {
516
- "inputs": [
517
- {
518
- "components": [
519
- {
520
- "internalType": "address",
521
- "name": "sender",
522
- "type": "address"
523
- },
524
- {
525
- "internalType": "uint256",
526
- "name": "nonce",
527
- "type": "uint256"
528
- },
529
- {
530
- "internalType": "bytes",
531
- "name": "initCode",
532
- "type": "bytes"
533
- },
534
- {
535
- "internalType": "bytes",
536
- "name": "callData",
537
- "type": "bytes"
538
- },
539
- {
540
- "internalType": "uint256",
541
- "name": "callGasLimit",
542
- "type": "uint256"
543
- },
544
- {
545
- "internalType": "uint256",
546
- "name": "verificationGasLimit",
547
- "type": "uint256"
548
- },
549
- {
550
- "internalType": "uint256",
551
- "name": "preVerificationGas",
552
- "type": "uint256"
553
- },
554
- {
555
- "internalType": "uint256",
556
- "name": "maxFeePerGas",
557
- "type": "uint256"
558
- },
559
- {
560
- "internalType": "uint256",
561
- "name": "maxPriorityFeePerGas",
562
- "type": "uint256"
563
- },
564
- {
565
- "internalType": "bytes",
566
- "name": "paymasterAndData",
567
- "type": "bytes"
568
- },
569
- {
570
- "internalType": "bytes",
571
- "name": "signature",
572
- "type": "bytes"
573
- }
574
- ],
575
- "internalType": "struct UserOperation",
576
- "name": "userOp",
577
- "type": "tuple"
578
- },
579
- {
580
- "internalType": "bytes32",
581
- "name": "userOpHash",
582
- "type": "bytes32"
583
- },
584
- {
585
- "internalType": "uint256",
586
- "name": "missingAccountFunds",
587
- "type": "uint256"
588
- }
589
- ],
590
- "name": "validateUserOp",
591
- "outputs": [
592
- {
593
- "internalType": "uint256",
594
- "name": "validationData",
595
- "type": "uint256"
596
- }
597
- ],
598
- "stateMutability": "nonpayable",
599
- "type": "function"
600
- },
601
- {
602
- "stateMutability": "payable",
603
- "type": "receive"
604
- }
605
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
notebooks/staking.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/tools_accuracy.ipynb DELETED
@@ -1,1216 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "import pandas as pd\n",
10
- "import matplotlib.pyplot as plt\n",
11
- "import seaborn as sns\n",
12
- "import json\n",
13
- "sns.set_style(\"darkgrid\")"
14
- ]
15
- },
16
- {
17
- "cell_type": "code",
18
- "execution_count": 2,
19
- "metadata": {},
20
- "outputs": [],
21
- "source": [
22
- "tools = pd.read_parquet('../data/tools.parquet')"
23
- ]
24
- },
25
- {
26
- "cell_type": "code",
27
- "execution_count": 3,
28
- "metadata": {},
29
- "outputs": [
30
- {
31
- "data": {
32
- "text/plain": [
33
- "Timestamp('2024-12-10 07:50:55+0000', tz='UTC')"
34
- ]
35
- },
36
- "execution_count": 3,
37
- "metadata": {},
38
- "output_type": "execute_result"
39
- }
40
- ],
41
- "source": [
42
- "max(tools.request_time)"
43
- ]
44
- },
45
- {
46
- "cell_type": "code",
47
- "execution_count": 4,
48
- "metadata": {},
49
- "outputs": [
50
- {
51
- "data": {
52
- "text/plain": [
53
- "Timestamp('2024-10-13 00:00:30+0000', tz='UTC')"
54
- ]
55
- },
56
- "execution_count": 4,
57
- "metadata": {},
58
- "output_type": "execute_result"
59
- }
60
- ],
61
- "source": [
62
- "min(tools.request_time)"
63
- ]
64
- },
65
- {
66
- "cell_type": "code",
67
- "execution_count": 4,
68
- "metadata": {},
69
- "outputs": [
70
- {
71
- "name": "stdout",
72
- "output_type": "stream",
73
- "text": [
74
- "<class 'pandas.core.frame.DataFrame'>\n",
75
- "RangeIndex: 358454 entries, 0 to 358453\n",
76
- "Data columns (total 23 columns):\n",
77
- " # Column Non-Null Count Dtype \n",
78
- "--- ------ -------------- ----- \n",
79
- " 0 request_id 358454 non-null object \n",
80
- " 1 request_block 358454 non-null object \n",
81
- " 2 prompt_request 358454 non-null object \n",
82
- " 3 tool 358454 non-null object \n",
83
- " 4 nonce 358454 non-null object \n",
84
- " 5 trader_address 358454 non-null object \n",
85
- " 6 deliver_block 358454 non-null object \n",
86
- " 7 error 358454 non-null int64 \n",
87
- " 8 error_message 3772 non-null object \n",
88
- " 9 prompt_response 357509 non-null object \n",
89
- " 10 mech_address 357601 non-null object \n",
90
- " 11 p_yes 354682 non-null float64\n",
91
- " 12 p_no 354682 non-null float64\n",
92
- " 13 confidence 354682 non-null float64\n",
93
- " 14 info_utility 354682 non-null float64\n",
94
- " 15 vote 261707 non-null object \n",
95
- " 16 win_probability 354682 non-null float64\n",
96
- " 17 market_creator 358454 non-null object \n",
97
- " 18 title 358454 non-null object \n",
98
- " 19 currentAnswer 287126 non-null object \n",
99
- " 20 request_time 358454 non-null object \n",
100
- " 21 request_month_year 358454 non-null object \n",
101
- " 22 request_month_year_week 358454 non-null object \n",
102
- "dtypes: float64(5), int64(1), object(17)\n",
103
- "memory usage: 62.9+ MB\n"
104
- ]
105
- }
106
- ],
107
- "source": [
108
- "tools.info()"
109
- ]
110
- },
111
- {
112
- "cell_type": "code",
113
- "execution_count": 5,
114
- "metadata": {},
115
- "outputs": [
116
- {
117
- "data": {
118
- "text/plain": [
119
- "Index(['request_id', 'request_block', 'prompt_request', 'tool', 'nonce',\n",
120
- " 'trader_address', 'deliver_block', 'error', 'error_message',\n",
121
- " 'prompt_response', 'mech_address', 'p_yes', 'p_no', 'confidence',\n",
122
- " 'info_utility', 'vote', 'win_probability', 'market_creator', 'title',\n",
123
- " 'currentAnswer', 'request_time', 'request_month_year',\n",
124
- " 'request_month_year_week'],\n",
125
- " dtype='object')"
126
- ]
127
- },
128
- "execution_count": 5,
129
- "metadata": {},
130
- "output_type": "execute_result"
131
- }
132
- ],
133
- "source": [
134
- "tools.columns"
135
- ]
136
- },
137
- {
138
- "cell_type": "code",
139
- "execution_count": 8,
140
- "metadata": {},
141
- "outputs": [
142
- {
143
- "data": {
144
- "text/plain": [
145
- "str"
146
- ]
147
- },
148
- "execution_count": 8,
149
- "metadata": {},
150
- "output_type": "execute_result"
151
- }
152
- ],
153
- "source": [
154
- "type(tools.iloc[0].request_time)"
155
- ]
156
- },
157
- {
158
- "cell_type": "code",
159
- "execution_count": 23,
160
- "metadata": {},
161
- "outputs": [
162
- {
163
- "data": {
164
- "text/plain": [
165
- "dict"
166
- ]
167
- },
168
- "execution_count": 23,
169
- "metadata": {},
170
- "output_type": "execute_result"
171
- }
172
- ],
173
- "source": [
174
- "import pickle\n",
175
- "t_map = pickle.load(open(\"../data/t_map.pkl\", \"rb\"))\n",
176
- "type(t_map)"
177
- ]
178
- },
179
- {
180
- "cell_type": "code",
181
- "execution_count": 24,
182
- "metadata": {},
183
- "outputs": [
184
- {
185
- "data": {
186
- "text/plain": [
187
- "475329"
188
- ]
189
- },
190
- "execution_count": 24,
191
- "metadata": {},
192
- "output_type": "execute_result"
193
- }
194
- ],
195
- "source": [
196
- "len(t_map)"
197
- ]
198
- },
199
- {
200
- "cell_type": "code",
201
- "execution_count": 25,
202
- "metadata": {},
203
- "outputs": [
204
- {
205
- "name": "stdout",
206
- "output_type": "stream",
207
- "text": [
208
- "29624577\n"
209
- ]
210
- }
211
- ],
212
- "source": [
213
- "for item in t_map:\n",
214
- " print(item)\n",
215
- " break"
216
- ]
217
- },
218
- {
219
- "cell_type": "code",
220
- "execution_count": 27,
221
- "metadata": {},
222
- "outputs": [
223
- {
224
- "data": {
225
- "text/plain": [
226
- "'2023-07-12 11:58:40'"
227
- ]
228
- },
229
- "execution_count": 27,
230
- "metadata": {},
231
- "output_type": "execute_result"
232
- }
233
- ],
234
- "source": [
235
- "min(t_map.values())"
236
- ]
237
- },
238
- {
239
- "cell_type": "code",
240
- "execution_count": 26,
241
- "metadata": {},
242
- "outputs": [
243
- {
244
- "data": {
245
- "text/plain": [
246
- "'2023-08-24 16:04:50'"
247
- ]
248
- },
249
- "execution_count": 26,
250
- "metadata": {},
251
- "output_type": "execute_result"
252
- }
253
- ],
254
- "source": [
255
- "t_map[29624577]"
256
- ]
257
- },
258
- {
259
- "cell_type": "code",
260
- "execution_count": 28,
261
- "metadata": {},
262
- "outputs": [
263
- {
264
- "data": {
265
- "text/plain": [
266
- "'2024-09-04 07:32:15'"
267
- ]
268
- },
269
- "execution_count": 28,
270
- "metadata": {},
271
- "output_type": "execute_result"
272
- }
273
- ],
274
- "source": [
275
- "max(t_map.values())"
276
- ]
277
- },
278
- {
279
- "cell_type": "code",
280
- "execution_count": 21,
281
- "metadata": {},
282
- "outputs": [
283
- {
284
- "data": {
285
- "text/plain": [
286
- "request_id 1155886186807766696223563218518399229072982679...\n",
287
- "request_block 35356121\n",
288
- "prompt_request Please take over the role of a Data Scientist ...\n",
289
- "tool prediction-request-rag-claude\n",
290
- "nonce 2c4c8c5c-afcf-4e28-a09a-aa2bae3f5444\n",
291
- "trader_address 0x22335c348afa4eae4cc6d2158c1ac259aaaecdfe\n",
292
- "deliver_block 35356134\n",
293
- "error 0\n",
294
- "error_message None\n",
295
- "prompt_response \\nYou will be evaluating the likelihood of an ...\n",
296
- "mech_address 0x5e1d1eb61e1164d5a50b28c575da73a29595dff7\n",
297
- "p_yes 0.6\n",
298
- "p_no 0.4\n",
299
- "confidence 0.7\n",
300
- "info_utility 0.7\n",
301
- "vote Yes\n",
302
- "win_probability 0.6\n",
303
- "title Will there be an increase in the wasp populati...\n",
304
- "currentAnswer Yes\n",
305
- "Name: 0, dtype: object"
306
- ]
307
- },
308
- "execution_count": 21,
309
- "metadata": {},
310
- "output_type": "execute_result"
311
- }
312
- ],
313
- "source": [
314
- "tools.iloc[0]"
315
- ]
316
- },
317
- {
318
- "cell_type": "code",
319
- "execution_count": 18,
320
- "metadata": {},
321
- "outputs": [
322
- {
323
- "data": {
324
- "text/plain": [
325
- "vote\n",
326
- "Yes 55881\n",
327
- "No 51741\n",
328
- "Name: count, dtype: int64"
329
- ]
330
- },
331
- "execution_count": 18,
332
- "metadata": {},
333
- "output_type": "execute_result"
334
- }
335
- ],
336
- "source": [
337
- "tools.vote.value_counts()"
338
- ]
339
- },
340
- {
341
- "cell_type": "code",
342
- "execution_count": 25,
343
- "metadata": {},
344
- "outputs": [
345
- {
346
- "name": "stdout",
347
- "output_type": "stream",
348
- "text": [
349
- "<class 'pandas.core.frame.DataFrame'>\n",
350
- "RangeIndex: 132150 entries, 0 to 132149\n",
351
- "Data columns (total 22 columns):\n",
352
- " # Column Non-Null Count Dtype \n",
353
- "--- ------ -------------- ----- \n",
354
- " 0 request_id 132150 non-null object \n",
355
- " 1 request_block 132150 non-null int64 \n",
356
- " 2 prompt_request 132150 non-null object \n",
357
- " 3 tool 132150 non-null object \n",
358
- " 4 nonce 132150 non-null object \n",
359
- " 5 trader_address 132150 non-null object \n",
360
- " 6 deliver_block 132150 non-null int64 \n",
361
- " 7 error 132149 non-null float64\n",
362
- " 8 error_message 9702 non-null object \n",
363
- " 9 prompt_response 132060 non-null object \n",
364
- " 10 mech_address 132150 non-null object \n",
365
- " 11 p_yes 122447 non-null float64\n",
366
- " 12 p_no 122447 non-null float64\n",
367
- " 13 confidence 122447 non-null float64\n",
368
- " 14 info_utility 122447 non-null float64\n",
369
- " 15 vote 102396 non-null object \n",
370
- " 16 win_probability 122447 non-null float64\n",
371
- " 17 title 124256 non-null object \n",
372
- " 18 currentAnswer 85763 non-null object \n",
373
- " 19 request_time 132150 non-null object \n",
374
- " 20 request_month_year 132150 non-null object \n",
375
- " 21 request_month_year_week 132150 non-null object \n",
376
- "dtypes: float64(6), int64(2), object(14)\n",
377
- "memory usage: 22.2+ MB\n"
378
- ]
379
- }
380
- ],
381
- "source": [
382
- "tools.info()"
383
- ]
384
- },
385
- {
386
- "cell_type": "code",
387
- "execution_count": 31,
388
- "metadata": {},
389
- "outputs": [],
390
- "source": [
391
- "fpmms = pd.read_parquet('../data/fpmms.parquet')"
392
- ]
393
- },
394
- {
395
- "cell_type": "code",
396
- "execution_count": 6,
397
- "metadata": {},
398
- "outputs": [
399
- {
400
- "data": {
401
- "text/html": [
402
- "<div>\n",
403
- "<style scoped>\n",
404
- " .dataframe tbody tr th:only-of-type {\n",
405
- " vertical-align: middle;\n",
406
- " }\n",
407
- "\n",
408
- " .dataframe tbody tr th {\n",
409
- " vertical-align: top;\n",
410
- " }\n",
411
- "\n",
412
- " .dataframe thead th {\n",
413
- " text-align: right;\n",
414
- " }\n",
415
- "</style>\n",
416
- "<table border=\"1\" class=\"dataframe\">\n",
417
- " <thead>\n",
418
- " <tr style=\"text-align: right;\">\n",
419
- " <th></th>\n",
420
- " <th>currentAnswer</th>\n",
421
- " <th>id</th>\n",
422
- " <th>title</th>\n",
423
- " </tr>\n",
424
- " </thead>\n",
425
- " <tbody>\n",
426
- " <tr>\n",
427
- " <th>0</th>\n",
428
- " <td>No</td>\n",
429
- " <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5</td>\n",
430
- " <td>Will the first floating offshore wind research...</td>\n",
431
- " </tr>\n",
432
- " <tr>\n",
433
- " <th>1</th>\n",
434
- " <td>No</td>\n",
435
- " <td>0x0020d13c89140b47e10db54cbd53852b90bc1391</td>\n",
436
- " <td>Will the Francis Scott Key Bridge in Baltimore...</td>\n",
437
- " </tr>\n",
438
- " <tr>\n",
439
- " <th>2</th>\n",
440
- " <td>No</td>\n",
441
- " <td>0x003ae5e007cc38b3f86b0ed7c82f938a1285ac07</td>\n",
442
- " <td>Will FC Saarbrucken reach the final of the Ger...</td>\n",
443
- " </tr>\n",
444
- " <tr>\n",
445
- " <th>3</th>\n",
446
- " <td>Yes</td>\n",
447
- " <td>0x004c8d4c619dc6b9caa940f5ea7ef699ae85359c</td>\n",
448
- " <td>Will the pro-life activists convicted for 'con...</td>\n",
449
- " </tr>\n",
450
- " <tr>\n",
451
- " <th>4</th>\n",
452
- " <td>Yes</td>\n",
453
- " <td>0x005e3f7a90585acbec807425a750fbba1d0c2b5c</td>\n",
454
- " <td>Will Apple announce the release of a new M4 ch...</td>\n",
455
- " </tr>\n",
456
- " </tbody>\n",
457
- "</table>\n",
458
- "</div>"
459
- ],
460
- "text/plain": [
461
- " currentAnswer id \\\n",
462
- "0 No 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5 \n",
463
- "1 No 0x0020d13c89140b47e10db54cbd53852b90bc1391 \n",
464
- "2 No 0x003ae5e007cc38b3f86b0ed7c82f938a1285ac07 \n",
465
- "3 Yes 0x004c8d4c619dc6b9caa940f5ea7ef699ae85359c \n",
466
- "4 Yes 0x005e3f7a90585acbec807425a750fbba1d0c2b5c \n",
467
- "\n",
468
- " title \n",
469
- "0 Will the first floating offshore wind research... \n",
470
- "1 Will the Francis Scott Key Bridge in Baltimore... \n",
471
- "2 Will FC Saarbrucken reach the final of the Ger... \n",
472
- "3 Will the pro-life activists convicted for 'con... \n",
473
- "4 Will Apple announce the release of a new M4 ch... "
474
- ]
475
- },
476
- "execution_count": 6,
477
- "metadata": {},
478
- "output_type": "execute_result"
479
- }
480
- ],
481
- "source": [
482
- "fpmms.head()"
483
- ]
484
- },
485
- {
486
- "cell_type": "code",
487
- "execution_count": 7,
488
- "metadata": {},
489
- "outputs": [
490
- {
491
- "data": {
492
- "text/plain": [
493
- "4251"
494
- ]
495
- },
496
- "execution_count": 7,
497
- "metadata": {},
498
- "output_type": "execute_result"
499
- }
500
- ],
501
- "source": [
502
- "len(fpmms)"
503
- ]
504
- },
505
- {
506
- "cell_type": "code",
507
- "execution_count": 18,
508
- "metadata": {},
509
- "outputs": [
510
- {
511
- "name": "stdout",
512
- "output_type": "stream",
513
- "text": [
514
- "<class 'pandas.core.frame.DataFrame'>\n",
515
- "RangeIndex: 18035 entries, 0 to 18034\n",
516
- "Data columns (total 20 columns):\n",
517
- " # Column Non-Null Count Dtype \n",
518
- "--- ------ -------------- ----- \n",
519
- " 0 trader_address 18035 non-null object \n",
520
- " 1 market_creator 18035 non-null object \n",
521
- " 2 trade_id 18035 non-null object \n",
522
- " 3 creation_timestamp 18035 non-null datetime64[ns, UTC]\n",
523
- " 4 title 18035 non-null object \n",
524
- " 5 market_status 18035 non-null object \n",
525
- " 6 collateral_amount 18035 non-null float64 \n",
526
- " 7 outcome_index 18035 non-null object \n",
527
- " 8 trade_fee_amount 18035 non-null float64 \n",
528
- " 9 outcomes_tokens_traded 18035 non-null float64 \n",
529
- " 10 current_answer 18035 non-null int64 \n",
530
- " 11 is_invalid 18035 non-null bool \n",
531
- " 12 winning_trade 18035 non-null bool \n",
532
- " 13 earnings 18035 non-null float64 \n",
533
- " 14 redeemed 18035 non-null bool \n",
534
- " 15 redeemed_amount 18035 non-null float64 \n",
535
- " 16 num_mech_calls 18035 non-null int64 \n",
536
- " 17 mech_fee_amount 18035 non-null float64 \n",
537
- " 18 net_earnings 18035 non-null float64 \n",
538
- " 19 roi 18035 non-null float64 \n",
539
- "dtypes: bool(3), datetime64[ns, UTC](1), float64(8), int64(2), object(6)\n",
540
- "memory usage: 2.4+ MB\n"
541
- ]
542
- }
543
- ],
544
- "source": [
545
- "prof = pd.read_parquet('../data/all_trades_profitability.parquet')\n",
546
- "prof.info()"
547
- ]
548
- },
549
- {
550
- "cell_type": "code",
551
- "execution_count": 20,
552
- "metadata": {},
553
- "outputs": [
554
- {
555
- "data": {
556
- "text/plain": [
557
- "market_creator\n",
558
- "quickstart 16775\n",
559
- "pearl 1260\n",
560
- "Name: count, dtype: int64"
561
- ]
562
- },
563
- "execution_count": 20,
564
- "metadata": {},
565
- "output_type": "execute_result"
566
- }
567
- ],
568
- "source": [
569
- "prof.market_creator.value_counts()"
570
- ]
571
- },
572
- {
573
- "cell_type": "code",
574
- "execution_count": 12,
575
- "metadata": {},
576
- "outputs": [],
577
- "source": [
578
- "trades = pd.read_parquet(\"../data/fpmmTrades.parquet\")"
579
- ]
580
- },
581
- {
582
- "cell_type": "code",
583
- "execution_count": 22,
584
- "metadata": {},
585
- "outputs": [
586
- {
587
- "data": {
588
- "text/html": [
589
- "<div>\n",
590
- "<style scoped>\n",
591
- " .dataframe tbody tr th:only-of-type {\n",
592
- " vertical-align: middle;\n",
593
- " }\n",
594
- "\n",
595
- " .dataframe tbody tr th {\n",
596
- " vertical-align: top;\n",
597
- " }\n",
598
- "\n",
599
- " .dataframe thead th {\n",
600
- " text-align: right;\n",
601
- " }\n",
602
- "</style>\n",
603
- "<table border=\"1\" class=\"dataframe\">\n",
604
- " <thead>\n",
605
- " <tr style=\"text-align: right;\">\n",
606
- " <th></th>\n",
607
- " <th>request_id</th>\n",
608
- " <th>request_block</th>\n",
609
- " <th>prompt_request</th>\n",
610
- " <th>tool</th>\n",
611
- " <th>nonce</th>\n",
612
- " <th>trader_address</th>\n",
613
- " <th>deliver_block</th>\n",
614
- " <th>error</th>\n",
615
- " <th>error_message</th>\n",
616
- " <th>prompt_response</th>\n",
617
- " <th>mech_address</th>\n",
618
- " <th>p_yes</th>\n",
619
- " <th>p_no</th>\n",
620
- " <th>confidence</th>\n",
621
- " <th>info_utility</th>\n",
622
- " <th>vote</th>\n",
623
- " <th>win_probability</th>\n",
624
- " </tr>\n",
625
- " </thead>\n",
626
- " <tbody>\n",
627
- " </tbody>\n",
628
- "</table>\n",
629
- "</div>"
630
- ],
631
- "text/plain": [
632
- "Empty DataFrame\n",
633
- "Columns: [request_id, request_block, prompt_request, tool, nonce, trader_address, deliver_block, error, error_message, prompt_response, mech_address, p_yes, p_no, confidence, info_utility, vote, win_probability]\n",
634
- "Index: []"
635
- ]
636
- },
637
- "execution_count": 22,
638
- "metadata": {},
639
- "output_type": "execute_result"
640
- }
641
- ],
642
- "source": [
643
- "tools.head()"
644
- ]
645
- },
646
- {
647
- "cell_type": "code",
648
- "execution_count": 13,
649
- "metadata": {},
650
- "outputs": [
651
- {
652
- "data": {
653
- "text/html": [
654
- "<div>\n",
655
- "<style scoped>\n",
656
- " .dataframe tbody tr th:only-of-type {\n",
657
- " vertical-align: middle;\n",
658
- " }\n",
659
- "\n",
660
- " .dataframe tbody tr th {\n",
661
- " vertical-align: top;\n",
662
- " }\n",
663
- "\n",
664
- " .dataframe thead th {\n",
665
- " text-align: right;\n",
666
- " }\n",
667
- "</style>\n",
668
- "<table border=\"1\" class=\"dataframe\">\n",
669
- " <thead>\n",
670
- " <tr style=\"text-align: right;\">\n",
671
- " <th></th>\n",
672
- " <th>collateralAmount</th>\n",
673
- " <th>collateralAmountUSD</th>\n",
674
- " <th>collateralToken</th>\n",
675
- " <th>creationTimestamp</th>\n",
676
- " <th>trader_address</th>\n",
677
- " <th>feeAmount</th>\n",
678
- " <th>id</th>\n",
679
- " <th>oldOutcomeTokenMarginalPrice</th>\n",
680
- " <th>outcomeIndex</th>\n",
681
- " <th>outcomeTokenMarginalPrice</th>\n",
682
- " <th>...</th>\n",
683
- " <th>market_creator</th>\n",
684
- " <th>fpmm.answerFinalizedTimestamp</th>\n",
685
- " <th>fpmm.arbitrationOccurred</th>\n",
686
- " <th>fpmm.currentAnswer</th>\n",
687
- " <th>fpmm.id</th>\n",
688
- " <th>fpmm.isPendingArbitration</th>\n",
689
- " <th>fpmm.openingTimestamp</th>\n",
690
- " <th>fpmm.outcomes</th>\n",
691
- " <th>fpmm.title</th>\n",
692
- " <th>fpmm.condition.id</th>\n",
693
- " </tr>\n",
694
- " </thead>\n",
695
- " <tbody>\n",
696
- " <tr>\n",
697
- " <th>0</th>\n",
698
- " <td>450426474650738688</td>\n",
699
- " <td>0.4504269694034145716308073094168006</td>\n",
700
- " <td>0xe91d153e0b41518a2ce8dd3d7944fa863463a97d</td>\n",
701
- " <td>1724553455</td>\n",
702
- " <td>0x022b36c50b85b8ae7addfb8a35d76c59d5814834</td>\n",
703
- " <td>9008529493014773</td>\n",
704
- " <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x02...</td>\n",
705
- " <td>0.592785210609610270634125335572129</td>\n",
706
- " <td>1</td>\n",
707
- " <td>0.6171295391012242250994586583534301</td>\n",
708
- " <td>...</td>\n",
709
- " <td>quickstart</td>\n",
710
- " <td>1725071760</td>\n",
711
- " <td>False</td>\n",
712
- " <td>0x00000000000000000000000000000000000000000000...</td>\n",
713
- " <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5</td>\n",
714
- " <td>False</td>\n",
715
- " <td>1724976000</td>\n",
716
- " <td>[Yes, No]</td>\n",
717
- " <td>Will the first floating offshore wind research...</td>\n",
718
- " <td>0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1...</td>\n",
719
- " </tr>\n",
720
- " <tr>\n",
721
- " <th>1</th>\n",
722
- " <td>610163214546941400</td>\n",
723
- " <td>0.6101636232215150135654007337015298</td>\n",
724
- " <td>0xe91d153e0b41518a2ce8dd3d7944fa863463a97d</td>\n",
725
- " <td>1724811940</td>\n",
726
- " <td>0x034c4ad84f7ac6638bf19300d5bbe7d9b981e736</td>\n",
727
- " <td>12203264290938828</td>\n",
728
- " <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x03...</td>\n",
729
- " <td>0.842992636523755061934822129394812</td>\n",
730
- " <td>1</td>\n",
731
- " <td>0.8523396372892128845826889719620915</td>\n",
732
- " <td>...</td>\n",
733
- " <td>quickstart</td>\n",
734
- " <td>1725071760</td>\n",
735
- " <td>False</td>\n",
736
- " <td>0x00000000000000000000000000000000000000000000...</td>\n",
737
- " <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5</td>\n",
738
- " <td>False</td>\n",
739
- " <td>1724976000</td>\n",
740
- " <td>[Yes, No]</td>\n",
741
- " <td>Will the first floating offshore wind research...</td>\n",
742
- " <td>0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1...</td>\n",
743
- " </tr>\n",
744
- " <tr>\n",
745
- " <th>2</th>\n",
746
- " <td>789065092332460672</td>\n",
747
- " <td>0.7890644120527324071908793822796086</td>\n",
748
- " <td>0xe91d153e0b41518a2ce8dd3d7944fa863463a97d</td>\n",
749
- " <td>1724815755</td>\n",
750
- " <td>0x09e9d42a029e8b0c2df3871709a762117a681d92</td>\n",
751
- " <td>15781301846649213</td>\n",
752
- " <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x09...</td>\n",
753
- " <td>0.7983775743712442891104598770339028</td>\n",
754
- " <td>1</td>\n",
755
- " <td>0.8152123711444691659642000374025623</td>\n",
756
- " <td>...</td>\n",
757
- " <td>quickstart</td>\n",
758
- " <td>1725071760</td>\n",
759
- " <td>False</td>\n",
760
- " <td>0x00000000000000000000000000000000000000000000...</td>\n",
761
- " <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5</td>\n",
762
- " <td>False</td>\n",
763
- " <td>1724976000</td>\n",
764
- " <td>[Yes, No]</td>\n",
765
- " <td>Will the first floating offshore wind research...</td>\n",
766
- " <td>0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1...</td>\n",
767
- " </tr>\n",
768
- " <tr>\n",
769
- " <th>3</th>\n",
770
- " <td>1000000000000000000</td>\n",
771
- " <td>1.000000605383660329048491794939126</td>\n",
772
- " <td>0xe91d153e0b41518a2ce8dd3d7944fa863463a97d</td>\n",
773
- " <td>1724546620</td>\n",
774
- " <td>0x09e9d42a029e8b0c2df3871709a762117a681d92</td>\n",
775
- " <td>20000000000000000</td>\n",
776
- " <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x09...</td>\n",
777
- " <td>0.5110745907733438805447072252622708</td>\n",
778
- " <td>1</td>\n",
779
- " <td>0.5746805204222762335911904727318937</td>\n",
780
- " <td>...</td>\n",
781
- " <td>quickstart</td>\n",
782
- " <td>1725071760</td>\n",
783
- " <td>False</td>\n",
784
- " <td>0x00000000000000000000000000000000000000000000...</td>\n",
785
- " <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5</td>\n",
786
- " <td>False</td>\n",
787
- " <td>1724976000</td>\n",
788
- " <td>[Yes, No]</td>\n",
789
- " <td>Will the first floating offshore wind research...</td>\n",
790
- " <td>0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1...</td>\n",
791
- " </tr>\n",
792
- " <tr>\n",
793
- " <th>4</th>\n",
794
- " <td>100000000000000000</td>\n",
795
- " <td>0.1000004271262862419547394646567906</td>\n",
796
- " <td>0xe91d153e0b41518a2ce8dd3d7944fa863463a97d</td>\n",
797
- " <td>1724771260</td>\n",
798
- " <td>0x0d049dcaece0ecb6fc81a460da7bcc2a4785d6e5</td>\n",
799
- " <td>2000000000000000</td>\n",
800
- " <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x0d...</td>\n",
801
- " <td>0.2713968218662319388988681987389408</td>\n",
802
- " <td>0</td>\n",
803
- " <td>0.2804586217805511523845593360379658</td>\n",
804
- " <td>...</td>\n",
805
- " <td>quickstart</td>\n",
806
- " <td>1725071760</td>\n",
807
- " <td>False</td>\n",
808
- " <td>0x00000000000000000000000000000000000000000000...</td>\n",
809
- " <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5</td>\n",
810
- " <td>False</td>\n",
811
- " <td>1724976000</td>\n",
812
- " <td>[Yes, No]</td>\n",
813
- " <td>Will the first floating offshore wind research...</td>\n",
814
- " <td>0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1...</td>\n",
815
- " </tr>\n",
816
- " </tbody>\n",
817
- "</table>\n",
818
- "<p>5 rows × 24 columns</p>\n",
819
- "</div>"
820
- ],
821
- "text/plain": [
822
- " collateralAmount collateralAmountUSD \\\n",
823
- "0 450426474650738688 0.4504269694034145716308073094168006 \n",
824
- "1 610163214546941400 0.6101636232215150135654007337015298 \n",
825
- "2 789065092332460672 0.7890644120527324071908793822796086 \n",
826
- "3 1000000000000000000 1.000000605383660329048491794939126 \n",
827
- "4 100000000000000000 0.1000004271262862419547394646567906 \n",
828
- "\n",
829
- " collateralToken creationTimestamp \\\n",
830
- "0 0xe91d153e0b41518a2ce8dd3d7944fa863463a97d 1724553455 \n",
831
- "1 0xe91d153e0b41518a2ce8dd3d7944fa863463a97d 1724811940 \n",
832
- "2 0xe91d153e0b41518a2ce8dd3d7944fa863463a97d 1724815755 \n",
833
- "3 0xe91d153e0b41518a2ce8dd3d7944fa863463a97d 1724546620 \n",
834
- "4 0xe91d153e0b41518a2ce8dd3d7944fa863463a97d 1724771260 \n",
835
- "\n",
836
- " trader_address feeAmount \\\n",
837
- "0 0x022b36c50b85b8ae7addfb8a35d76c59d5814834 9008529493014773 \n",
838
- "1 0x034c4ad84f7ac6638bf19300d5bbe7d9b981e736 12203264290938828 \n",
839
- "2 0x09e9d42a029e8b0c2df3871709a762117a681d92 15781301846649213 \n",
840
- "3 0x09e9d42a029e8b0c2df3871709a762117a681d92 20000000000000000 \n",
841
- "4 0x0d049dcaece0ecb6fc81a460da7bcc2a4785d6e5 2000000000000000 \n",
842
- "\n",
843
- " id \\\n",
844
- "0 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x02... \n",
845
- "1 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x03... \n",
846
- "2 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x09... \n",
847
- "3 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x09... \n",
848
- "4 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x0d... \n",
849
- "\n",
850
- " oldOutcomeTokenMarginalPrice outcomeIndex \\\n",
851
- "0 0.592785210609610270634125335572129 1 \n",
852
- "1 0.842992636523755061934822129394812 1 \n",
853
- "2 0.7983775743712442891104598770339028 1 \n",
854
- "3 0.5110745907733438805447072252622708 1 \n",
855
- "4 0.2713968218662319388988681987389408 0 \n",
856
- "\n",
857
- " outcomeTokenMarginalPrice ... market_creator \\\n",
858
- "0 0.6171295391012242250994586583534301 ... quickstart \n",
859
- "1 0.8523396372892128845826889719620915 ... quickstart \n",
860
- "2 0.8152123711444691659642000374025623 ... quickstart \n",
861
- "3 0.5746805204222762335911904727318937 ... quickstart \n",
862
- "4 0.2804586217805511523845593360379658 ... quickstart \n",
863
- "\n",
864
- " fpmm.answerFinalizedTimestamp fpmm.arbitrationOccurred \\\n",
865
- "0 1725071760 False \n",
866
- "1 1725071760 False \n",
867
- "2 1725071760 False \n",
868
- "3 1725071760 False \n",
869
- "4 1725071760 False \n",
870
- "\n",
871
- " fpmm.currentAnswer \\\n",
872
- "0 0x00000000000000000000000000000000000000000000... \n",
873
- "1 0x00000000000000000000000000000000000000000000... \n",
874
- "2 0x00000000000000000000000000000000000000000000... \n",
875
- "3 0x00000000000000000000000000000000000000000000... \n",
876
- "4 0x00000000000000000000000000000000000000000000... \n",
877
- "\n",
878
- " fpmm.id fpmm.isPendingArbitration \\\n",
879
- "0 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5 False \n",
880
- "1 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5 False \n",
881
- "2 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5 False \n",
882
- "3 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5 False \n",
883
- "4 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5 False \n",
884
- "\n",
885
- " fpmm.openingTimestamp fpmm.outcomes \\\n",
886
- "0 1724976000 [Yes, No] \n",
887
- "1 1724976000 [Yes, No] \n",
888
- "2 1724976000 [Yes, No] \n",
889
- "3 1724976000 [Yes, No] \n",
890
- "4 1724976000 [Yes, No] \n",
891
- "\n",
892
- " fpmm.title \\\n",
893
- "0 Will the first floating offshore wind research... \n",
894
- "1 Will the first floating offshore wind research... \n",
895
- "2 Will the first floating offshore wind research... \n",
896
- "3 Will the first floating offshore wind research... \n",
897
- "4 Will the first floating offshore wind research... \n",
898
- "\n",
899
- " fpmm.condition.id \n",
900
- "0 0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1... \n",
901
- "1 0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1... \n",
902
- "2 0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1... \n",
903
- "3 0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1... \n",
904
- "4 0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1... \n",
905
- "\n",
906
- "[5 rows x 24 columns]"
907
- ]
908
- },
909
- "execution_count": 13,
910
- "metadata": {},
911
- "output_type": "execute_result"
912
- }
913
- ],
914
- "source": [
915
- "trades.head()"
916
- ]
917
- },
918
- {
919
- "cell_type": "code",
920
- "execution_count": 14,
921
- "metadata": {},
922
- "outputs": [
923
- {
924
- "name": "stdout",
925
- "output_type": "stream",
926
- "text": [
927
- "<class 'pandas.core.frame.DataFrame'>\n",
928
- "RangeIndex: 23455 entries, 0 to 23454\n",
929
- "Data columns (total 24 columns):\n",
930
- " # Column Non-Null Count Dtype \n",
931
- "--- ------ -------------- ----- \n",
932
- " 0 collateralAmount 23455 non-null object\n",
933
- " 1 collateralAmountUSD 23455 non-null object\n",
934
- " 2 collateralToken 23455 non-null object\n",
935
- " 3 creationTimestamp 23455 non-null object\n",
936
- " 4 trader_address 23455 non-null object\n",
937
- " 5 feeAmount 23455 non-null object\n",
938
- " 6 id 23455 non-null object\n",
939
- " 7 oldOutcomeTokenMarginalPrice 23455 non-null object\n",
940
- " 8 outcomeIndex 23455 non-null object\n",
941
- " 9 outcomeTokenMarginalPrice 23455 non-null object\n",
942
- " 10 outcomeTokensTraded 23455 non-null object\n",
943
- " 11 title 23455 non-null object\n",
944
- " 12 transactionHash 23455 non-null object\n",
945
- " 13 type 23455 non-null object\n",
946
- " 14 market_creator 23455 non-null object\n",
947
- " 15 fpmm.answerFinalizedTimestamp 21489 non-null object\n",
948
- " 16 fpmm.arbitrationOccurred 23455 non-null bool \n",
949
- " 17 fpmm.currentAnswer 21489 non-null object\n",
950
- " 18 fpmm.id 23455 non-null object\n",
951
- " 19 fpmm.isPendingArbitration 23455 non-null bool \n",
952
- " 20 fpmm.openingTimestamp 23455 non-null object\n",
953
- " 21 fpmm.outcomes 23455 non-null object\n",
954
- " 22 fpmm.title 23455 non-null object\n",
955
- " 23 fpmm.condition.id 23455 non-null object\n",
956
- "dtypes: bool(2), object(22)\n",
957
- "memory usage: 4.0+ MB\n"
958
- ]
959
- }
960
- ],
961
- "source": [
962
- "trades.info()"
963
- ]
964
- },
965
- {
966
- "cell_type": "code",
967
- "execution_count": 15,
968
- "metadata": {},
969
- "outputs": [
970
- {
971
- "data": {
972
- "text/plain": [
973
- "market_creator\n",
974
- "quickstart 21852\n",
975
- "pearl 1603\n",
976
- "Name: count, dtype: int64"
977
- ]
978
- },
979
- "execution_count": 15,
980
- "metadata": {},
981
- "output_type": "execute_result"
982
- }
983
- ],
984
- "source": [
985
- "trades.market_creator.value_counts()"
986
- ]
987
- },
988
- {
989
- "cell_type": "code",
990
- "execution_count": 33,
991
- "metadata": {},
992
- "outputs": [],
993
- "source": [
994
- "fpmms_trades = pd.read_parquet('../data/fpmmTrades.parquet')"
995
- ]
996
- },
997
- {
998
- "cell_type": "markdown",
999
- "metadata": {},
1000
- "source": [
1001
- "## Adding market creator info"
1002
- ]
1003
- },
1004
- {
1005
- "cell_type": "code",
1006
- "execution_count": 35,
1007
- "metadata": {},
1008
- "outputs": [
1009
- {
1010
- "name": "stdout",
1011
- "output_type": "stream",
1012
- "text": [
1013
- "ERROR getting the market creator of 0xae7b042059b179dcac3169657fac111d7925f8dd\n",
1014
- "ERROR getting the market creator of 0x347e4ef0ff34cf39d1c7e08bc07c68c41a4836d6\n",
1015
- "ERROR getting the market creator of 0x8e03d3a7a3dfd930f73577ba4204deadf12b33f3\n",
1016
- "ERROR getting the market creator of 0x2a0b461417fa0ae8bbeb28ed265fbe3944772435\n",
1017
- "ERROR getting the market creator of 0x8069ea45a18910fa493a6a736438971b0e59ec9b\n",
1018
- "ERROR getting the market creator of 0xdf91eac2a8573646c7e8e95c740877fe3d38f11f\n",
1019
- "ERROR getting the market creator of 0x87f0fcfe810502555f8d1439793155cbfa2eb583\n",
1020
- "ERROR getting the market creator of 0x7bcf0f480e52da1597d7437d5b4a4644b1e7ec23\n",
1021
- "ERROR getting the market creator of 0xcfef6a50bd9439d1e1a15fcfe99068a57e533d95\n",
1022
- "ERROR getting the market creator of 0xaeb8c31302361d42ec806faf406ef0c30b6eba5f\n",
1023
- "ERROR getting the market creator of 0x9db7e7a0c82a229a7f3bb15046ff0c3a778b7291\n",
1024
- "ERROR getting the market creator of 0x85c31bbeaab5468d97900e69d87a459aba997fa5\n",
1025
- "ERROR getting the market creator of 0x36660fec571bb4d5849a433f9ec64622416f1dbb\n",
1026
- "ERROR getting the market creator of 0x5ebe6dcb1ac4470bb71c89cf1e6b9abc48b637ba\n",
1027
- "ERROR getting the market creator of 0xa0acfecc55465870c9baa7c954a0e81165fb112c\n",
1028
- "ERROR getting the market creator of 0xd6d6951a8fa033f91a2227d75fb1eebc139e2e57\n",
1029
- "ERROR getting the market creator of 0x651d04044b780e68f3f952796fb7c06fb0928ad2\n",
1030
- "ERROR getting the market creator of 0xe271378e094db9d64e34c6c14a7492bcccd11dfb\n",
1031
- "ERROR getting the market creator of 0x37c241945001f6c26c886c8d551cc2e6cf34c214\n",
1032
- "ERROR getting the market creator of 0x20b9e32b17360310c633e5676f28430bd723f4bd\n",
1033
- "ERROR getting the market creator of 0x06d873e7465a6680f5487905d7b5daf7f2c6e299\n",
1034
- "ERROR getting the market creator of 0xd28b5e2f2ce950705354cd2ceaf4eab9d23db52b\n",
1035
- "ERROR getting the market creator of 0x4d70e1ac779094e9790c8b74954d15729371e6bc\n",
1036
- "ERROR getting the market creator of 0x81489c0eab196fb88704f08ef34b8a9ed7137c91\n",
1037
- "ERROR getting the market creator of 0x223c99787f25179d51a9934a426b6d1b252bb4bd\n",
1038
- "ERROR getting the market creator of 0xd61b2c4f70645c830bd5af76426d6b22af63c152\n",
1039
- "ERROR getting the market creator of 0xe66e931f7b065361f56e41d61f599adab3b167c2\n",
1040
- "ERROR getting the market creator of 0x5ccf21332df9af6195a5b1ba78d15562db915a35\n",
1041
- "ERROR getting the market creator of 0xf8e68d9f66d2534df36c23db6770467da1c1ff1b\n",
1042
- "ERROR getting the market creator of 0x2b9274ddf2213d8a6b2930a5b82801165df55017\n",
1043
- "ERROR getting the market creator of 0xf9349c5ea0b5559abd8dfa6cdd4e4d5d913e1e61\n",
1044
- "ERROR getting the market creator of 0xad8aa6f927bb6a38af8121418f1b64d4ed8be99c\n",
1045
- "ERROR getting the market creator of 0x3dcc00904249d796a89943de15c85ac11afc5d66\n",
1046
- "ERROR getting the market creator of 0x10ece1553b5017414388fe78f64720814d7f8799\n",
1047
- "ERROR getting the market creator of 0x0930bcc328a695419d596dae380dec7fb43cd715\n",
1048
- "ERROR getting the market creator of 0x956d8bbc930372482a361dec7e4707b15d8b02f4\n",
1049
- "ERROR getting the market creator of 0x14da1cc12b382142ac3e2422162f122a0a31ec45\n",
1050
- "ERROR getting the market creator of 0x28dd86a2c82ce02970eff7f4ea9ebde97750adc8\n",
1051
- "ERROR getting the market creator of 0xb997d5e2fddf39b8a197715c7b200df612d74360\n",
1052
- "ERROR getting the market creator of 0x2064ceecb78a382f4988d41f881abef89b6e785c\n",
1053
- "ERROR getting the market creator of 0xe715cc8f264ab48f75bb1b5c11d7dbaf949d73c5\n",
1054
- "ERROR getting the market creator of 0x5fc7213135962250147030c5dd30b84a80f2ad1e\n",
1055
- "ERROR getting the market creator of 0x10ccffdc8e801ab4fda98371723cda4e30e6d672\n",
1056
- "ERROR getting the market creator of 0x6e5d93fdcc14db02a58ace636c2dcff8db36039d\n",
1057
- "ERROR getting the market creator of 0xf792f6a308525b72b5d47f12798668c140f5968e\n",
1058
- "ERROR getting the market creator of 0x00897abcbbefe4f558956b7a9d1b7819677e4d90\n",
1059
- "ERROR getting the market creator of 0x29448445959cc5045c03b7f316fa3332cc2b37b7\n",
1060
- "ERROR getting the market creator of 0xdb8c2038cd17645216125f323048dcd4c9845826\n",
1061
- "ERROR getting the market creator of 0x32969cce1791f13dc5d500b9e701ffb931baae03\n",
1062
- "ERROR getting the market creator of 0x84aeb93d348c6da1ea4b0016c207aefc26edaa44\n",
1063
- "ERROR getting the market creator of 0xdda87f7ec43aab7080e9ac23ae1550e5bc89d6cc\n",
1064
- "ERROR getting the market creator of 0xafd80421ce35298d3698ca0f4008477a169c9ea2\n",
1065
- "ERROR getting the market creator of 0xffc47cb1ecd41daae58e39fd4193d6fe9a6f5d2e\n",
1066
- "ERROR getting the market creator of 0x351d430d229740f986ee240612c932c66188dd09\n",
1067
- "ERROR getting the market creator of 0xd72455c8d5398a2b3b822bbc7cc0de638ea35519\n",
1068
- "ERROR getting the market creator of 0x2c83cf4bb92e55e35b6e4af6eca6c0a85fb73650\n",
1069
- "ERROR getting the market creator of 0xf2baf410b7d42d7572fb2f39cf216ffae8d4cafe\n",
1070
- "ERROR getting the market creator of 0xb42a955a0e06b3e6bdf229c9abfd2fdad20688a7\n",
1071
- "ERROR getting the market creator of 0x35021fcc0d15c4e87fc1c7fb527f389829dde3d9\n",
1072
- "ERROR getting the market creator of 0xaa19120a9976c75dc569ab2cfcc087cd224db4e2\n",
1073
- "ERROR getting the market creator of 0x6e79766698f58a25d2548b76601de9535c5080d3\n",
1074
- "ERROR getting the market creator of 0x6915dcb7601802ea4a2dd840c44b6ed4473b5ce2\n",
1075
- "ERROR getting the market creator of 0x6957f7ac4a0a09f237a901749e518a678d1a614a\n",
1076
- "ERROR getting the market creator of 0x785a9d3329955ffd7cd24ca7a89ce2da21ac62da\n",
1077
- "ERROR getting the market creator of 0x1e738f7e82102e2f56fef62df473d3f1f1dc53b1\n",
1078
- "ERROR getting the market creator of 0x8e23b89649f22a6e8084b34a1a5de28d9ddf5a88\n",
1079
- "ERROR getting the market creator of 0x31c6b19cae793ba90ee9c70263af773c27df2774\n",
1080
- "ERROR getting the market creator of 0x3a2d7bf095988f30daf308b5484cd74903d82c22\n",
1081
- "ERROR getting the market creator of 0xde10d01d4315cf64d9feeb79e9a593d78da8a50b\n",
1082
- "ERROR getting the market creator of 0xa57b7f04bb813b5a6ded7cc92c5bd56586d8f7d4\n",
1083
- "ERROR getting the market creator of 0x97609769fddc72ea9f45f62cef1f7a9658dd1efe\n",
1084
- "ERROR getting the market creator of 0x7ddbfbebbec1635315f9217cbf9de8afd272c8de\n",
1085
- "ERROR getting the market creator of 0x37cdc93194dc7f46f3cc377cf4350f56455a4f85\n",
1086
- "ERROR getting the market creator of 0x75c10935141d740b71e1763aa6a3139643754655\n",
1087
- "ERROR getting the market creator of 0x0f98789650877b1928960490a5a19769ac1c84b3\n",
1088
- "ERROR getting the market creator of 0x9f87c202db8b3270406a3084817909a9d4afc6ea\n",
1089
- "ERROR getting the market creator of 0x8cfb5af2b0287b34a423755d0481478f0a8f1356\n",
1090
- "ERROR getting the market creator of 0x09244905029648aca18830291bb62634b04d9a46\n",
1091
- "ERROR getting the market creator of 0x4e9a5580ce24dd06ed8d6b1d75a7ccce7abf7361\n",
1092
- "ERROR getting the market creator of 0x8bbeb8a3e1f6fdc9e95aa0d7e80ebc6dc1468b7a\n",
1093
- "ERROR getting the market creator of 0xcb279a4ebb3f0d78cb15817e942cc7aea01b8545\n",
1094
- "ERROR getting the market creator of 0xb36fa15e34dd50b8199c57305573dc48d1271b50\n",
1095
- "ERROR getting the market creator of 0x2198981fc1d8b3c61e7df9a50cf240708c057dfa\n",
1096
- "ERROR getting the market creator of 0x37bab68f9ae4f9c7ce915d9e1f3404e7cd1794cc\n",
1097
- "ERROR getting the market creator of 0x97f59586921ebdcfc07694ba8376f59871db11f9\n",
1098
- "ERROR getting the market creator of 0xc79bf3f6370e8a8002a3093c379752f395a3c291\n",
1099
- "ERROR getting the market creator of 0x178021f40d4e1ed270f2d2125f9f80d3e78a1836\n",
1100
- "ERROR getting the market creator of 0xcca6ccde20a551caec29d6c1318f4f2ec7e6063c\n"
1101
- ]
1102
- }
1103
- ],
1104
- "source": [
1105
- "tools[\"market_creator\"] = \"\"\n",
1106
- "# traverse the list of traders\n",
1107
- "traders_list = list(tools.trader_address.unique())\n",
1108
- "for trader_address in traders_list:\n",
1109
- " market_creator = \"\"\n",
1110
- " try:\n",
1111
- " trades = fpmms_trades[fpmms_trades[\"trader_address\"] == trader_address]\n",
1112
- " market_creator = trades.iloc[0][\"market_creator\"] # first value is enough\n",
1113
- " except Exception:\n",
1114
- " print(f\"ERROR getting the market creator of {trader_address}\")\n",
1115
- " tools_of_the_trader = tools[tools[\"trader_address\"] == trader_address]\n",
1116
- " # update\n",
1117
- " tools.loc[tools[\"trader_address\"] == trader_address, \"market_creator\"] = market_creator"
1118
- ]
1119
- },
1120
- {
1121
- "cell_type": "code",
1122
- "execution_count": 37,
1123
- "metadata": {},
1124
- "outputs": [
1125
- {
1126
- "data": {
1127
- "text/plain": [
1128
- "market_creator\n",
1129
- "quickstart 121106\n",
1130
- "pearl 12729\n",
1131
- " 5182\n",
1132
- "Name: count, dtype: int64"
1133
- ]
1134
- },
1135
- "execution_count": 37,
1136
- "metadata": {},
1137
- "output_type": "execute_result"
1138
- }
1139
- ],
1140
- "source": [
1141
- "tools.market_creator.value_counts()"
1142
- ]
1143
- },
1144
- {
1145
- "cell_type": "code",
1146
- "execution_count": 38,
1147
- "metadata": {},
1148
- "outputs": [
1149
- {
1150
- "data": {
1151
- "text/plain": [
1152
- "139017"
1153
- ]
1154
- },
1155
- "execution_count": 38,
1156
- "metadata": {},
1157
- "output_type": "execute_result"
1158
- }
1159
- ],
1160
- "source": [
1161
- "len(tools)"
1162
- ]
1163
- },
1164
- {
1165
- "cell_type": "code",
1166
- "execution_count": 39,
1167
- "metadata": {},
1168
- "outputs": [
1169
- {
1170
- "data": {
1171
- "text/plain": [
1172
- "0.03727601660228605"
1173
- ]
1174
- },
1175
- "execution_count": 39,
1176
- "metadata": {},
1177
- "output_type": "execute_result"
1178
- }
1179
- ],
1180
- "source": [
1181
- "5182/139017"
1182
- ]
1183
- },
1184
- {
1185
- "cell_type": "code",
1186
- "execution_count": 40,
1187
- "metadata": {},
1188
- "outputs": [],
1189
- "source": [
1190
- "tools = tools.loc[tools[\"market_creator\"] != \"\"]\n",
1191
- "tools.to_parquet(\"../data/tools.parquet\", index=False)"
1192
- ]
1193
- }
1194
- ],
1195
- "metadata": {
1196
- "kernelspec": {
1197
- "display_name": "hf_dashboards",
1198
- "language": "python",
1199
- "name": "python3"
1200
- },
1201
- "language_info": {
1202
- "codemirror_mode": {
1203
- "name": "ipython",
1204
- "version": 3
1205
- },
1206
- "file_extension": ".py",
1207
- "mimetype": "text/x-python",
1208
- "name": "python",
1209
- "nbconvert_exporter": "python",
1210
- "pygments_lexer": "ipython3",
1211
- "version": "3.12.2"
1212
- }
1213
- },
1214
- "nbformat": 4,
1215
- "nbformat_minor": 2
1216
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
notebooks/weekly_analysis.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/weighted_accuracy_ranking.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
scripts/__init__.py DELETED
File without changes
scripts/active_traders.py DELETED
@@ -1,91 +0,0 @@
1
- import pandas as pd
2
- import pickle
3
- from web3_utils import DATA_DIR, TMP_DIR
4
- from staking import check_list_addresses
5
-
6
-
7
- def get_trader_type(address: str, service_map: dict) -> str:
8
- # check if it is part of any service id on the map
9
- keys = service_map.keys()
10
- last_key = max(keys)
11
-
12
- for key, value in service_map.items():
13
- if value["safe_address"].lower() == address.lower():
14
- # found a service
15
- return "Olas"
16
-
17
- return "non_Olas"
18
-
19
-
20
- def compute_active_traders_dataset():
21
- """Function to prepare the active traders dataset"""
22
- with open(DATA_DIR / "service_map.pkl", "rb") as f:
23
- service_map = pickle.load(f)
24
- # read tools info
25
- tools_df = pd.read_parquet(TMP_DIR / "tools.parquet")
26
- # rename the request_month_year_week
27
- tools_df.rename(
28
- columns={"request_month_year_week": "month_year_week"}, inplace=True
29
- )
30
- tool_traders = tools_df.trader_address.unique()
31
- mapping = check_list_addresses(tool_traders)
32
- # add trader type to tools_df
33
- tools_df["trader_type"] = tools_df.trader_address.apply(lambda x: mapping[x])
34
- tools_df = tools_df[
35
- ["month_year_week", "market_creator", "trader_type", "trader_address"]
36
- ]
37
- tools_df.drop_duplicates(inplace=True)
38
- # read trades info
39
- all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
40
-
41
- # read unknown info
42
- unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
43
- unknown_traders["creation_timestamp"] = pd.to_datetime(
44
- unknown_traders["creation_timestamp"]
45
- )
46
- unknown_traders["creation_timestamp"] = unknown_traders[
47
- "creation_timestamp"
48
- ].dt.tz_convert("UTC")
49
- unknown_traders = unknown_traders.sort_values(
50
- by="creation_timestamp", ascending=True
51
- )
52
- unknown_traders["month_year_week"] = (
53
- unknown_traders["creation_timestamp"]
54
- .dt.to_period("W")
55
- .dt.start_time.dt.strftime("%b-%d-%Y")
56
- )
57
- unknown_traders["trader_type"] = "unknown"
58
- unknown_traders = unknown_traders[
59
- ["month_year_week", "trader_type", "market_creator", "trader_address"]
60
- ]
61
- unknown_traders.drop_duplicates(inplace=True)
62
-
63
- all_trades["creation_timestamp"] = pd.to_datetime(all_trades["creation_timestamp"])
64
- all_trades["creation_timestamp"] = all_trades["creation_timestamp"].dt.tz_convert(
65
- "UTC"
66
- )
67
- all_trades = all_trades.sort_values(by="creation_timestamp", ascending=True)
68
- all_trades["month_year_week"] = (
69
- all_trades["creation_timestamp"]
70
- .dt.to_period("W")
71
- .dt.start_time.dt.strftime("%b-%d-%Y")
72
- )
73
- all_trades["trader_type"] = all_trades["staking"].apply(
74
- lambda x: "non_Olas" if x == "non_Olas" else "Olas"
75
- )
76
- all_trades = all_trades[
77
- ["month_year_week", "market_creator", "trader_type", "trader_address"]
78
- ]
79
- all_trades.drop_duplicates(inplace=True)
80
- filtered_traders_data = pd.concat([all_trades, tools_df], axis=0)
81
- filtered_traders_data.drop_duplicates(inplace=True)
82
- if len(unknown_traders) > 0:
83
- # merge
84
- filtered_traders_data = pd.concat(
85
- [filtered_traders_data, unknown_traders], axis=0
86
- )
87
- filtered_traders_data.to_parquet(TMP_DIR / "active_traders.parquet")
88
-
89
-
90
- if __name__ == "__main__":
91
- compute_active_traders_dataset()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/cleaning_old_info.py DELETED
@@ -1,110 +0,0 @@
1
- import pandas as pd
2
- from utils import DATA_DIR, TMP_DIR, transform_to_datetime
3
-
4
-
5
- def clean_old_data_from_parquet_files(cutoff_date: str):
6
- print("Cleaning oldest data")
7
- # Convert the string to datetime64[ns, UTC]
8
- min_date_utc = pd.to_datetime(cutoff_date, format="%Y-%m-%d", utc=True)
9
-
10
- # clean tools.parquet
11
- try:
12
- tools = pd.read_parquet(TMP_DIR / "tools.parquet")
13
-
14
- # make sure creator_address is in the columns
15
- assert "trader_address" in tools.columns, "trader_address column not found"
16
-
17
- # lowercase and strip creator_address
18
- tools["trader_address"] = tools["trader_address"].str.lower().str.strip()
19
-
20
- tools["request_time"] = pd.to_datetime(tools["request_time"], utc=True)
21
-
22
- print(f"length before filtering {len(tools)}")
23
- tools = tools.loc[tools["request_time"] > min_date_utc]
24
- print(f"length after filtering {len(tools)}")
25
- tools.to_parquet(TMP_DIR / "tools.parquet", index=False)
26
-
27
- except Exception as e:
28
- print(f"Error cleaning tools file {e}")
29
-
30
- # clean all_trades_profitability.parquet
31
- try:
32
- all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
33
-
34
- all_trades["creation_timestamp"] = pd.to_datetime(
35
- all_trades["creation_timestamp"], utc=True
36
- )
37
-
38
- print(f"length before filtering {len(all_trades)}")
39
- all_trades = all_trades.loc[all_trades["creation_timestamp"] > min_date_utc]
40
- print(f"length after filtering {len(all_trades)}")
41
- all_trades.to_parquet(
42
- DATA_DIR / "all_trades_profitability.parquet", index=False
43
- )
44
-
45
- except Exception as e:
46
- print(f"Error cleaning all trades profitability file {e}")
47
-
48
- # clean unknown_traders.parquet
49
- try:
50
- unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
51
-
52
- unknown_traders["creation_timestamp"] = pd.to_datetime(
53
- unknown_traders["creation_timestamp"], utc=True
54
- )
55
-
56
- print(f"length unknown traders before filtering {len(unknown_traders)}")
57
- unknown_traders = unknown_traders.loc[
58
- unknown_traders["creation_timestamp"] > min_date_utc
59
- ]
60
- print(f"length unknown traders after filtering {len(unknown_traders)}")
61
- unknown_traders.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
62
-
63
- except Exception as e:
64
- print(f"Error cleaning unknown_traders file {e}")
65
-
66
- # clean fpmmTrades.parquet
67
- try:
68
- fpmmTrades = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
69
- try:
70
- fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
71
- lambda x: transform_to_datetime(x)
72
- )
73
- except Exception as e:
74
- print(f"Transformation not needed")
75
- fpmmTrades["creation_timestamp"] = pd.to_datetime(
76
- fpmmTrades["creationTimestamp"]
77
- )
78
- fpmmTrades["creation_timestamp"] = pd.to_datetime(
79
- fpmmTrades["creation_timestamp"], utc=True
80
- )
81
-
82
- print(f"length before filtering {len(fpmmTrades)}")
83
- fpmmTrades = fpmmTrades.loc[fpmmTrades["creation_timestamp"] > min_date_utc]
84
- print(f"length after filtering {len(fpmmTrades)}")
85
- fpmmTrades.to_parquet(TMP_DIR / "fpmmTrades.parquet", index=False)
86
-
87
- except Exception as e:
88
- print(f"Error cleaning fpmmTrades file {e}")
89
-
90
- # clean invalid trades parquet
91
- try:
92
- invalid_trades = pd.read_parquet(DATA_DIR / "invalid_trades.parquet")
93
-
94
- invalid_trades["creation_timestamp"] = pd.to_datetime(
95
- invalid_trades["creation_timestamp"], utc=True
96
- )
97
-
98
- print(f"length before filtering {len(invalid_trades)}")
99
- invalid_trades = invalid_trades.loc[
100
- invalid_trades["creation_timestamp"] > min_date_utc
101
- ]
102
- print(f"length after filtering {len(invalid_trades)}")
103
- invalid_trades.to_parquet(DATA_DIR / "invalid_trades.parquet", index=False)
104
-
105
- except Exception as e:
106
- print(f"Error cleaning fpmmTrades file {e}")
107
-
108
-
109
- if __name__ == "__main__":
110
- clean_old_data_from_parquet_files("2024-10-25")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/cloud_storage.py DELETED
@@ -1,93 +0,0 @@
1
- from minio import Minio
2
- from minio.error import S3Error
3
- import os
4
- import argparse
5
-
6
- from utils import HIST_DIR
7
-
8
- MINIO_ENDPOINT = "minio.autonolas.tech"
9
- ACCESS_KEY = os.environ.get("CLOUD_ACCESS_KEY", None)
10
- SECRET_KEY = os.environ.get("CLOUD_SECRET_KEY", None)
11
- BUCKET_NAME = "weekly-stats"
12
- FOLDER_NAME = "historical_data"
13
-
14
-
15
- def initialize_client():
16
- # Initialize the MinIO client
17
- client = Minio(
18
- MINIO_ENDPOINT,
19
- access_key=ACCESS_KEY,
20
- secret_key=SECRET_KEY,
21
- secure=True, # Set to False if not using HTTPS
22
- )
23
- return client
24
-
25
-
26
- def upload_file(client, filename: str, file_path: str) -> bool:
27
- """Upload a file to the bucket"""
28
- try:
29
- OBJECT_NAME = FOLDER_NAME + "/" + filename
30
- print(
31
- f"filename={filename}, object_name={OBJECT_NAME} and file_path={file_path}"
32
- )
33
- client.fput_object(
34
- BUCKET_NAME, OBJECT_NAME, file_path, part_size=10 * 1024 * 1024
35
- ) # 10MB parts
36
- print(f"File '{file_path}' uploaded as '{OBJECT_NAME}'.")
37
- return True
38
- except S3Error as err:
39
- print(f"Error uploading file: {err}")
40
- return False
41
-
42
-
43
- def download_file(client, filename: str, file_path: str):
44
- """Download the file back"""
45
- try:
46
- OBJECT_NAME = FOLDER_NAME + "/" + filename
47
- client.fget_object(BUCKET_NAME, OBJECT_NAME, "downloaded_" + file_path)
48
- print(f"File '{OBJECT_NAME}' downloaded as 'downloaded_{file_path}'.")
49
- except S3Error as err:
50
- print(f"Error downloading file: {err}")
51
-
52
-
53
- def load_historical_file(client, filename: str) -> bool:
54
- """Function to load one file into the cloud storage"""
55
- file_path = filename
56
- file_path = HIST_DIR / filename
57
- return upload_file(client, filename, file_path)
58
-
59
-
60
- def upload_historical_file(filename: str):
61
- client = initialize_client()
62
- load_historical_file(client=client, filename=filename)
63
-
64
-
65
- def process_historical_files(client):
66
- """Process all parquet files in historical_data folder"""
67
-
68
- # Walk through all files in the folder
69
- for filename in os.listdir(HIST_DIR):
70
- # Check if file is a parquet file
71
- if filename.endswith(".parquet"):
72
- try:
73
- if load_historical_file(client, filename):
74
- print(f"Successfully processed {filename}")
75
- else:
76
- print("Error loading the files")
77
- except Exception as e:
78
- print(f"Error processing {filename}: {str(e)}")
79
-
80
-
81
- if __name__ == "__main__":
82
- # parser = argparse.ArgumentParser(
83
- # description="Load files to the cloud storate for historical data"
84
- # )
85
- # parser.add_argument("param_1", type=str, help="Name of the file to upload")
86
-
87
- # # Parse the arguments
88
- # args = parser.parse_args()
89
- # filename = args.param_1
90
-
91
- client = initialize_client()
92
- # load_historical_file(client, filename)
93
- process_historical_files(client)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/daily_data.py DELETED
@@ -1,61 +0,0 @@
1
- import logging
2
- from utils import measure_execution_time, DATA_DIR, TMP_DIR
3
- from profitability import (
4
- analyse_all_traders,
5
- label_trades_by_staking,
6
- )
7
- import pandas as pd
8
- from nr_mech_calls import (
9
- create_unknown_traders_df,
10
- compute_daily_mech_calls,
11
- transform_to_datetime,
12
- )
13
- from markets import check_current_week_data
14
- from staking import generate_retention_activity_file
15
-
16
- logging.basicConfig(level=logging.INFO)
17
-
18
-
19
- @measure_execution_time
20
- def prepare_live_metrics(
21
- tools_filename="new_tools.parquet", trades_filename="new_fpmmTrades.parquet"
22
- ):
23
- fpmmTrades = pd.read_parquet(TMP_DIR / trades_filename)
24
- tools = pd.read_parquet(TMP_DIR / tools_filename)
25
-
26
- # TODO if monday data of the week is missing in new_fpmmTrades then take it from the general file
27
- try:
28
- fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
29
- lambda x: transform_to_datetime(x)
30
- )
31
- except Exception as e:
32
- print(f"Transformation not needed")
33
- # check missing data from Monday
34
- fpmmTrades = check_current_week_data(fpmmTrades)
35
-
36
- print("Computing the estimated mech calls dataset")
37
- trader_mech_calls = compute_daily_mech_calls(fpmmTrades=fpmmTrades, tools=tools)
38
- print("Analysing trades...")
39
- all_trades_df = analyse_all_traders(fpmmTrades, trader_mech_calls, daily_info=True)
40
-
41
- # staking label
42
- all_trades_df = label_trades_by_staking(all_trades_df)
43
-
44
- # create the unknown traders dataset
45
- unknown_traders_df, all_trades_df = create_unknown_traders_df(
46
- trades_df=all_trades_df
47
- )
48
- unknown_traders_df.to_parquet(
49
- TMP_DIR / "unknown_daily_traders.parquet", index=False
50
- )
51
-
52
- # save into a separate file
53
- all_trades_df.to_parquet(DATA_DIR / "daily_info.parquet", index=False)
54
-
55
- # prepare the retention info file
56
- generate_retention_activity_file()
57
-
58
-
59
- if __name__ == "__main__":
60
- prepare_live_metrics()
61
- # generate_retention_activity_file()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/get_mech_info.py DELETED
@@ -1,322 +0,0 @@
1
- from string import Template
2
- from typing import Any
3
- from datetime import datetime, timedelta, UTC
4
- from utils import (
5
- SUBGRAPH_API_KEY,
6
- measure_execution_time,
7
- DATA_DIR,
8
- TMP_DIR,
9
- NETWORK_SUBGRAPH_URL,
10
- transform_to_datetime,
11
- )
12
- import requests
13
- import pandas as pd
14
- import numpy as np
15
- from mech_request_utils import (
16
- collect_all_mech_delivers,
17
- collect_all_mech_requests,
18
- clean_mech_delivers,
19
- fix_duplicate_requestIds,
20
- merge_requests_delivers,
21
- get_ipfs_data,
22
- merge_json_files,
23
- )
24
-
25
- SUBGRAPH_HEADERS = {
26
- "Accept": "application/json, multipart/mixed",
27
- "Content-Type": "application/json",
28
- }
29
-
30
- QUERY_BATCH_SIZE = 1000
31
- DATETIME_60_DAYS_AGO = datetime.now(UTC) - timedelta(days=60)
32
- DATETIME_10_DAYS_AGO = datetime.now(UTC) - timedelta(days=10)
33
- DATETIME_10_HOURS_AGO = datetime.now(UTC) - timedelta(hours=10)
34
- BLOCK_NUMBER = Template(
35
- """
36
- {
37
- blocks(
38
- first: 1,
39
- orderBy: timestamp,
40
- orderDirection: asc,
41
- where: {
42
- timestamp_gte: "${timestamp_from}",
43
- timestamp_lte: "${timestamp_to}"
44
- }
45
- ){
46
- id,
47
- number,
48
- }
49
- }
50
- """
51
- )
52
-
53
- LATEST_BLOCK_QUERY = """
54
- {
55
- blocks(
56
- first: 1,
57
- orderBy: timestamp,
58
- orderDirection: desc,
59
- ){
60
- id,
61
- number,
62
- }
63
- }
64
- """
65
-
66
-
67
- def fetch_last_block_number() -> dict:
68
- # print(f"Sending query for the subgraph = {query}")
69
- network_subgraph_url = NETWORK_SUBGRAPH_URL.substitute(
70
- subgraph_api_key=SUBGRAPH_API_KEY
71
- )
72
- query = LATEST_BLOCK_QUERY
73
- response = requests.post(
74
- network_subgraph_url,
75
- headers=SUBGRAPH_HEADERS,
76
- json={"query": query},
77
- timeout=300,
78
- )
79
-
80
- result_json = response.json()
81
- print(f"Response of the query={result_json}")
82
- blocks = result_json.get("data", {}).get("blocks", "")
83
- if len(blocks) == 0:
84
- raise ValueError(f"The query {query} did not return any results")
85
- return blocks[0]
86
-
87
-
88
- def fetch_block_number(timestamp_from: int, timestamp_to: int) -> dict:
89
- """Get a block number by its timestamp margins."""
90
-
91
- query = BLOCK_NUMBER.substitute(
92
- timestamp_from=timestamp_from, timestamp_to=timestamp_to
93
- )
94
- # print(f"Sending query for the subgraph = {query}")
95
- network_subgraph_url = NETWORK_SUBGRAPH_URL.substitute(
96
- subgraph_api_key=SUBGRAPH_API_KEY
97
- )
98
- response = requests.post(
99
- network_subgraph_url,
100
- headers=SUBGRAPH_HEADERS,
101
- json={"query": query},
102
- timeout=300,
103
- )
104
- # print(f"block query: {query}")
105
- result_json = response.json()
106
- print(f"Response of the query={result_json}")
107
- blocks = result_json.get("data", {}).get("blocks", "")
108
- if len(blocks) == 0:
109
- raise ValueError(f"The query {query} did not return any results")
110
- return blocks[0]
111
-
112
-
113
- def update_json_files():
114
- merge_json_files("mech_requests.json", "new_mech_requests.json")
115
- merge_json_files("mech_delivers.json", "new_mech_delivers.json")
116
- merge_json_files("merged_requests.json", "new_merged_requests.json")
117
- merge_json_files("tools_info.json", "new_tools_info.json")
118
-
119
-
120
- def update_all_trades_parquet(new_trades_df: pd.DataFrame) -> pd.DataFrame:
121
- # Read old all_trades parquet file
122
- try:
123
- old_trades_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
124
- except Exception as e:
125
- print(f"Error reading old trades parquet file {e}")
126
- return None
127
- # merge two dataframes
128
- merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)
129
-
130
- # Check for duplicates
131
- print(f"Initial length before removing duplicates in all_trades= {len(merge_df)}")
132
-
133
- # Remove duplicates
134
- merge_df.drop_duplicates("trade_id", inplace=True)
135
- print(f"Final length after removing duplicates in all_trades = {len(merge_df)}")
136
- return merge_df
137
-
138
-
139
- def update_tools_parquet(new_tools_filename: pd.DataFrame):
140
- try:
141
- old_tools_df = pd.read_parquet(TMP_DIR / "tools.parquet")
142
- except Exception as e:
143
- print(f"Error reading old tools parquet file {e}")
144
- return None
145
- try:
146
- new_tools_df = pd.read_parquet(DATA_DIR / new_tools_filename)
147
-
148
- except Exception as e:
149
- print(f"Error reading new trades parquet file {e}")
150
- return None
151
-
152
- # merge two dataframes
153
- merge_df = pd.concat([old_tools_df, new_tools_df], ignore_index=True)
154
-
155
- # Check for duplicates
156
- print(f"Initial length before removing duplicates in tools= {len(merge_df)}")
157
-
158
- # Remove duplicates
159
- merge_df.drop_duplicates(
160
- subset=["request_id", "request_time"], keep="last", inplace=True
161
- )
162
- print(f"Final length after removing duplicates in tools= {len(merge_df)}")
163
-
164
- # save the parquet file
165
- merge_df.to_parquet(TMP_DIR / "tools.parquet", index=False)
166
-
167
-
168
- def get_mech_info_2024() -> dict[str, Any]:
169
- """Query the subgraph to get the 2024 information from mech."""
170
-
171
- date = "2024-01-01"
172
- datetime_jan_2024 = datetime.strptime(date, "%Y-%m-%d")
173
- timestamp_jan_2024 = int(datetime_jan_2024.timestamp())
174
- margin = timedelta(seconds=5)
175
- timestamp_jan_2024_plus_margin = int((datetime_jan_2024 + margin).timestamp())
176
-
177
- jan_block_number = fetch_block_number(
178
- timestamp_jan_2024, timestamp_jan_2024_plus_margin
179
- )
180
- # expecting only one block
181
- jan_block_number = jan_block_number.get("number", "")
182
- if jan_block_number.isdigit():
183
- jan_block_number = int(jan_block_number)
184
-
185
- if jan_block_number == "":
186
- raise ValueError(
187
- "Could not find a valid block number for the first of January 2024"
188
- )
189
- MECH_TO_INFO = {
190
- # this block number is when the creator had its first tx ever, and after this mech's creation
191
- "0xff82123dfb52ab75c417195c5fdb87630145ae81": (
192
- "old_mech_abi.json",
193
- jan_block_number,
194
- ),
195
- # this block number is when this mech was created
196
- "0x77af31de935740567cf4ff1986d04b2c964a786a": (
197
- "new_mech_abi.json",
198
- jan_block_number,
199
- ),
200
- }
201
- return MECH_TO_INFO
202
-
203
-
204
- def get_last_block_number() -> int:
205
- last_block_number = fetch_last_block_number()
206
- # expecting only one block
207
- last_block_number = last_block_number.get("number", "")
208
- if last_block_number.isdigit():
209
- last_block_number = int(last_block_number)
210
-
211
- if last_block_number == "":
212
- raise ValueError("Could not find a valid block number for last month data")
213
- return last_block_number
214
-
215
-
216
- def get_last_60_days_block_number() -> int:
217
- timestamp_60_days_ago = int((DATETIME_60_DAYS_AGO).timestamp())
218
- margin = timedelta(seconds=5)
219
- timestamp_60_days_ago_plus_margin = int((DATETIME_60_DAYS_AGO + margin).timestamp())
220
-
221
- last_month_block_number = fetch_block_number(
222
- timestamp_60_days_ago, timestamp_60_days_ago_plus_margin
223
- )
224
- # expecting only one block
225
- last_month_block_number = last_month_block_number.get("number", "")
226
- if last_month_block_number.isdigit():
227
- last_month_block_number = int(last_month_block_number)
228
-
229
- if last_month_block_number == "":
230
- raise ValueError("Could not find a valid block number for last month data")
231
- return last_month_block_number
232
-
233
-
234
- def get_mech_info_last_60_days() -> dict[str, Any]:
235
- """Query the subgraph to get the last 60 days of information from mech."""
236
- last_month_block_number = get_last_60_days_block_number()
237
-
238
- MECH_TO_INFO = {
239
- # this block number is when the creator had its first tx ever, and after this mech's creation
240
- "0xff82123dfb52ab75c417195c5fdb87630145ae81": (
241
- "old_mech_abi.json",
242
- last_month_block_number,
243
- ),
244
- # this block number is when this mech was created
245
- "0x77af31de935740567cf4ff1986d04b2c964a786a": (
246
- "new_mech_abi.json",
247
- last_month_block_number,
248
- ),
249
- }
250
- print(f"last 60 days block number {last_month_block_number}")
251
- return MECH_TO_INFO
252
-
253
-
254
- @measure_execution_time
255
- def get_mech_events_since_last_run(logger):
256
- """Function to download only the new events since the last execution."""
257
-
258
- # Read the latest date from stored data
259
- try:
260
- all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
261
- # latest_timestamp = max(all_trades.creation_timestamp)
262
- cutoff_date = "2025-01-13"
263
- latest_timestamp = pd.Timestamp(
264
- datetime.strptime(cutoff_date, "%Y-%m-%d")
265
- ).tz_localize("UTC")
266
- print(f"Updating data since {latest_timestamp}")
267
- except Exception:
268
- print("Error while reading the profitability parquet file")
269
- return None
270
-
271
- # Get the block number of lastest date
272
- five_seconds = np.timedelta64(5, "s")
273
- last_run_block_number = fetch_block_number(
274
- int(latest_timestamp.timestamp()),
275
- int((latest_timestamp + five_seconds).timestamp()),
276
- )
277
- # expecting only one block
278
- last_run_block_number = last_run_block_number.get("number", "")
279
- if last_run_block_number.isdigit():
280
- last_run_block_number = int(last_run_block_number)
281
-
282
- if last_run_block_number == "":
283
- raise ValueError("Could not find a valid block number for last collected data")
284
- last_block_number = get_last_block_number()
285
-
286
- # mech requests
287
- requests_dict, duplicatedReqId, nr_errors = collect_all_mech_requests(
288
- from_block=last_run_block_number,
289
- to_block=last_block_number,
290
- filename="new_mech_requests.json",
291
- )
292
- print(f"NUMBER OF MECH REQUEST ERRORS={nr_errors}")
293
- # mech delivers
294
- delivers_dict, duplicatedIds, nr_errors = collect_all_mech_delivers(
295
- from_block=last_run_block_number,
296
- to_block=last_block_number,
297
- filename="new_mech_delivers.json",
298
- )
299
- print(f"NUMBER OF MECH DELIVER ERRORS={nr_errors}")
300
- if delivers_dict is None:
301
- return None
302
- # clean delivers
303
- clean_mech_delivers("new_mech_requests.json", "new_mech_delivers.json")
304
-
305
- # solve duplicated requestIds
306
- block_map = fix_duplicate_requestIds(
307
- "new_mech_requests.json", "new_mech_delivers.json"
308
- )
309
- # merge the two files into one source
310
- not_found = merge_requests_delivers(
311
- "new_mech_requests.json", "new_mech_delivers.json", "new_merged_requests.json"
312
- )
313
-
314
- # Add ipfs contents
315
- get_ipfs_data("new_merged_requests.json", "new_tools_info.json", logger)
316
- return latest_timestamp
317
-
318
-
319
- if __name__ == "__main__":
320
- get_mech_events_since_last_run()
321
- # result = get_mech_info_last_60_days()
322
- # print(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/gnosis_timestamps.py DELETED
@@ -1,186 +0,0 @@
1
- from web3 import Web3
2
- import os
3
- import requests
4
- import time
5
- import pickle
6
- from datetime import datetime, timezone
7
- from functools import partial
8
- import pandas as pd
9
- import pytz
10
- from tqdm import tqdm
11
- from utils import DATA_DIR, TMP_DIR, measure_execution_time
12
- from concurrent.futures import ThreadPoolExecutor
13
-
14
- GNOSIS_API_INTERVAL = 0.2 # 5 calls in 1 second
15
- GNOSIS_URL = "https://api.gnosisscan.io/api"
16
- GNOSIS_API_KEY = os.environ.get("GNOSIS_API_KEY", None)
17
- # https://api.gnosisscan.io/api?module=account&action=txlist&address=0x1fe2b09de07475b1027b0c73a5bf52693b31a52e&startblock=36626348&endblock=36626348&page=1&offset=10&sort=asc&apikey=${gnosis_api_key}""
18
-
19
- # Connect to Gnosis Chain RPC
20
- w3 = Web3(Web3.HTTPProvider("https://rpc.gnosischain.com"))
21
-
22
-
23
- def parallelize_timestamp_computation(df: pd.DataFrame, function: callable) -> list:
24
- """Parallelize the timestamp conversion."""
25
- tx_hashes = df["tx_hash"].tolist()
26
- with ThreadPoolExecutor(max_workers=10) as executor:
27
- results = list(tqdm(executor.map(function, tx_hashes), total=len(tx_hashes)))
28
- return results
29
-
30
-
31
- def transform_timestamp_to_datetime(timestamp):
32
- dt = datetime.fromtimestamp(timestamp, timezone.utc)
33
- return dt
34
-
35
-
36
- def get_tx_hash(trader_address, request_block):
37
- """Function to get the transaction hash from the address and block number"""
38
- params = {
39
- "module": "account",
40
- "action": "txlist",
41
- "address": trader_address,
42
- "page": 1,
43
- "offset": 100,
44
- "startblock": request_block,
45
- "endblock": request_block,
46
- "sort": "asc",
47
- "apikey": GNOSIS_API_KEY,
48
- }
49
-
50
- try:
51
- response = requests.get(GNOSIS_URL, params=params)
52
- tx_list = response.json()["result"]
53
- time.sleep(GNOSIS_API_INTERVAL)
54
- if len(tx_list) > 1:
55
- raise ValueError("More than one transaction found")
56
- return tx_list[0]["hash"]
57
- except Exception as e:
58
- return None
59
-
60
-
61
- def add_tx_hash_info(filename: str = "tools.parquet"):
62
- """Function to add the hash info to the saved tools parquet file"""
63
- tools = pd.read_parquet(DATA_DIR / filename)
64
- tools["tx_hash"] = None
65
- total_errors = 0
66
- for i, mech_request in tqdm(
67
- tools.iterrows(), total=len(tools), desc="Adding tx hash"
68
- ):
69
- try:
70
- trader_address = mech_request["trader_address"]
71
- block_number = mech_request["request_block"]
72
- tools.at[i, "tx_hash"] = get_tx_hash(
73
- trader_address=trader_address, request_block=block_number
74
- )
75
- except Exception as e:
76
- print(f"Error with mech request {mech_request}")
77
- total_errors += 1
78
- continue
79
-
80
- print(f"Total number of errors = {total_errors}")
81
- tools.to_parquet(DATA_DIR / filename)
82
-
83
-
84
- def get_transaction_timestamp(tx_hash: str, web3: Web3):
85
-
86
- try:
87
- # Get transaction data
88
- tx = web3.eth.get_transaction(tx_hash)
89
- # Get block data
90
- block = web3.eth.get_block(tx["blockNumber"])
91
- # Get timestamp
92
- timestamp = block["timestamp"]
93
-
94
- # Convert to datetime
95
- dt = datetime.fromtimestamp(timestamp, tz=pytz.UTC)
96
-
97
- # return {
98
- # "timestamp": timestamp,
99
- # "datetime": dt,
100
- # "from_address": tx["from"],
101
- # "to_address": tx["to"],
102
- # "success": True,
103
- # }
104
- return dt.strftime("%Y-%m-%d %H:%M:%S")
105
- except Exception as e:
106
- print(f"Error getting the timestamp from {tx_hash}")
107
- return None
108
-
109
-
110
- @measure_execution_time
111
- def compute_request_time(tools_df: pd.DataFrame) -> pd.DataFrame:
112
- """Function to compute the request timestamp from the tx hash"""
113
- # read the local info
114
- try:
115
- gnosis_info = pickle.load(open(TMP_DIR / "gnosis_info.pkl", "rb"))
116
- except Exception:
117
- print("File not found or not created. Creating a new one")
118
- gnosis_info = {}
119
-
120
- # any previous information?
121
- tools_df["request_time"] = tools_df["tx_hash"].map(gnosis_info)
122
-
123
- # Identify tools with missing request_time and fill them
124
- missing_time_indices = tools_df[tools_df["request_time"].isna()].index
125
- print(f"length of missing_time_indices = {len(missing_time_indices)}")
126
- # traverse all tx hashes and get the timestamp of each tx
127
- partial_mech_request_timestamp = partial(get_transaction_timestamp, web3=w3)
128
- missing_timestamps = parallelize_timestamp_computation(
129
- tools_df.loc[missing_time_indices], partial_mech_request_timestamp
130
- )
131
-
132
- # Update the original DataFrame with the missing timestamps
133
- for i, timestamp in zip(missing_time_indices, missing_timestamps):
134
- tools_df.at[i, "request_time"] = timestamp
135
- # creating other time fields
136
- tools_df["request_month_year"] = pd.to_datetime(
137
- tools_df["request_time"]
138
- ).dt.strftime("%Y-%m")
139
- tools_df["request_month_year_week"] = (
140
- pd.to_datetime(tools_df["request_time"])
141
- .dt.to_period("W")
142
- .dt.start_time.dt.strftime("%b-%d-%Y")
143
- )
144
- # Update t_map with new timestamps
145
- new_timestamps = (
146
- tools_df[["tx_hash", "request_time"]]
147
- .dropna()
148
- .set_index("tx_hash")
149
- .to_dict()["request_time"]
150
- )
151
- gnosis_info.update(new_timestamps)
152
- # saving gnosis info
153
- with open(TMP_DIR / "gnosis_info.pkl", "wb") as f:
154
- pickle.dump(gnosis_info, f)
155
- return tools_df
156
-
157
-
158
- def get_account_details(address):
159
- # gnosis_url = GNOSIS_URL.substitute(gnosis_api_key=GNOSIS_API_KEY, tx_hash=tx_hash)
160
-
161
- params = {
162
- "module": "account",
163
- "action": "txlistinternal",
164
- "address": address,
165
- #'page': 1,
166
- #'offset': 100,
167
- #'startblock': 0,
168
- #'endblock': 9999999999,
169
- #'sort': 'asc',
170
- "apikey": GNOSIS_API_KEY,
171
- }
172
-
173
- try:
174
- response = requests.get(GNOSIS_URL, params=params)
175
- return response.json()
176
- except Exception as e:
177
- return {"error": str(e)}
178
-
179
-
180
- if __name__ == "__main__":
181
- # tx_data = "0x783BFA045BDE2D0BCD65280D97A29E7BD9E4FDC10985848690C9797E767140F4"
182
- new_tools = pd.read_parquet(DATA_DIR / "new_tools.parquet")
183
- new_tools = compute_request_time(new_tools)
184
- new_tools.to_parquet(DATA_DIR / "new_tools.parquet")
185
- # result = get_tx_hash("0x1fe2b09de07475b1027b0c73a5bf52693b31a52e", 36626348)
186
- # print(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/manage_space_files.py DELETED
@@ -1,40 +0,0 @@
1
- import os
2
- import shutil
3
-
4
- # Define the file names to move
5
- files_to_move = [
6
- "new_tools.parquet",
7
- "new_fpmmTrades.parquet",
8
- "fpmms.parquet",
9
- "fpmmTrades.parquet",
10
- ]
11
-
12
- # Get the current working directory
13
- current_dir = os.getcwd()
14
-
15
- # Define source and destination paths
16
- source_dir = os.path.join(current_dir, "data")
17
- dest_dir = os.path.join(current_dir, "tmp")
18
-
19
-
20
- def move_files():
21
- # Create tmp directory if it doesn't exist
22
- if not os.path.exists(dest_dir):
23
- os.makedirs(dest_dir)
24
- # Move each file
25
- for file_name in files_to_move:
26
- source_file = os.path.join(source_dir, file_name)
27
- dest_file = os.path.join(dest_dir, file_name)
28
-
29
- try:
30
- if os.path.exists(source_file):
31
- shutil.move(source_file, dest_file)
32
- print(f"Moved {file_name} successfully")
33
- else:
34
- print(f"File not found: {file_name}")
35
- except Exception as e:
36
- print(f"Error moving {file_name}: {str(e)}")
37
-
38
-
39
- if __name__ == "__main__":
40
- move_files()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/markets.py DELETED
@@ -1,464 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # ------------------------------------------------------------------------------
3
- #
4
- # Copyright 2023 Valory AG
5
- #
6
- # Licensed under the Apache License, Version 2.0 (the "License");
7
- # you may not use this file except in compliance with the License.
8
- # You may obtain a copy of the License at
9
- #
10
- # http://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing, software
13
- # distributed under the License is distributed on an "AS IS" BASIS,
14
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
- # See the License for the specific language governing permissions and
16
- # limitations under the License.
17
- #
18
- # ------------------------------------------------------------------------------
19
-
20
- import functools
21
- import warnings
22
- from datetime import datetime, timedelta
23
- from typing import Optional, Generator, Callable
24
- import pandas as pd
25
- import requests
26
- from tqdm import tqdm
27
- from typing import List, Dict
28
- from utils import SUBGRAPH_API_KEY, DATA_DIR, TMP_DIR, transform_to_datetime
29
- from web3_utils import (
30
- FPMM_QS_CREATOR,
31
- FPMM_PEARL_CREATOR,
32
- query_omen_xdai_subgraph,
33
- OMEN_SUBGRAPH_URL,
34
- )
35
- from queries import (
36
- FPMMS_QUERY,
37
- ID_FIELD,
38
- DATA_FIELD,
39
- ANSWER_FIELD,
40
- QUERY_FIELD,
41
- TITLE_FIELD,
42
- OUTCOMES_FIELD,
43
- ERROR_FIELD,
44
- QUESTION_FIELD,
45
- FPMMS_FIELD,
46
- )
47
-
48
- ResponseItemType = List[Dict[str, str]]
49
- SubgraphResponseType = Dict[str, ResponseItemType]
50
- BATCH_SIZE = 1000
51
- DEFAULT_TO_TIMESTAMP = 2147483647 # around year 2038
52
- DEFAULT_FROM_TIMESTAMP = 0
53
-
54
- MAX_UINT_HEX = "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
55
- DEFAULT_FILENAME = "fpmms.parquet"
56
- market_creators_map = {"quickstart": FPMM_QS_CREATOR, "pearl": FPMM_PEARL_CREATOR}
57
-
58
-
59
- class RetriesExceeded(Exception):
60
- """Exception to raise when retries are exceeded during data-fetching."""
61
-
62
- def __init__(
63
- self, msg="Maximum retries were exceeded while trying to fetch the data!"
64
- ):
65
- super().__init__(msg)
66
-
67
-
68
- def hacky_retry(func: Callable, n_retries: int = 3) -> Callable:
69
- """Create a hacky retry strategy.
70
- Unfortunately, we cannot use `requests.packages.urllib3.util.retry.Retry`,
71
- because the subgraph does not return the appropriate status codes in case of failure.
72
- Instead, it always returns code 200. Thus, we raise exceptions manually inside `make_request`,
73
- catch those exceptions in the hacky retry decorator and try again.
74
- Finally, if the allowed number of retries is exceeded, we raise a custom `RetriesExceeded` exception.
75
-
76
- :param func: the input request function.
77
- :param n_retries: the maximum allowed number of retries.
78
- :return: The request method with the hacky retry strategy applied.
79
- """
80
-
81
- @functools.wraps(func)
82
- def wrapper_hacky_retry(*args, **kwargs) -> SubgraphResponseType:
83
- """The wrapper for the hacky retry.
84
-
85
- :return: a response dictionary.
86
- """
87
- retried = 0
88
-
89
- while retried <= n_retries:
90
- try:
91
- if retried > 0:
92
- warnings.warn(f"Retrying {retried}/{n_retries}...")
93
-
94
- return func(*args, **kwargs)
95
- except (ValueError, ConnectionError) as e:
96
- warnings.warn(e.args[0])
97
- finally:
98
- retried += 1
99
-
100
- raise RetriesExceeded()
101
-
102
- return wrapper_hacky_retry
103
-
104
-
105
- @hacky_retry
106
- def query_subgraph(url: str, query: str, key: str) -> SubgraphResponseType:
107
- """Query a subgraph.
108
-
109
- Args:
110
- url: the subgraph's URL.
111
- query: the query to be used.
112
- key: the key to use in order to access the required data.
113
-
114
- Returns:
115
- a response dictionary.
116
- """
117
- content = {QUERY_FIELD: query}
118
- headers = {
119
- "Accept": "application/json",
120
- "Content-Type": "application/json",
121
- }
122
- res = requests.post(url, json=content, headers=headers)
123
-
124
- if res.status_code != 200:
125
- raise ConnectionError(
126
- "Something went wrong while trying to communicate with the subgraph "
127
- f"(Error: {res.status_code})!\n{res.text}"
128
- )
129
-
130
- body = res.json()
131
- if ERROR_FIELD in body.keys():
132
- raise ValueError(f"The given query is not correct: {body[ERROR_FIELD]}")
133
-
134
- data = body.get(DATA_FIELD, {}).get(key, None)
135
- if data is None:
136
- raise ValueError(f"Unknown error encountered!\nRaw response: \n{body}")
137
-
138
- return data
139
-
140
-
141
- def transform_fpmmTrades(df: pd.DataFrame) -> pd.DataFrame:
142
- print("Transforming trades dataframe")
143
- # convert creator to address
144
- df["creator"] = df["creator"].apply(lambda x: x["id"])
145
-
146
- # normalize fpmm column
147
- fpmm = pd.json_normalize(df["fpmm"])
148
- fpmm.columns = [f"fpmm.{col}" for col in fpmm.columns]
149
- df = pd.concat([df, fpmm], axis=1)
150
-
151
- # drop fpmm column
152
- df.drop(["fpmm"], axis=1, inplace=True)
153
-
154
- # change creator to creator_address
155
- df.rename(columns={"creator": "trader_address"}, inplace=True)
156
- return df
157
-
158
-
159
- def create_fpmmTrades(
160
- from_timestamp: int = DEFAULT_FROM_TIMESTAMP,
161
- to_timestamp: int = DEFAULT_TO_TIMESTAMP,
162
- ):
163
- """Create fpmmTrades for all trades."""
164
- print("Getting trades from Quickstart markets")
165
- # Quickstart trades
166
- qs_trades_json = query_omen_xdai_subgraph(
167
- trader_category="quickstart",
168
- from_timestamp=from_timestamp,
169
- to_timestamp=to_timestamp,
170
- fpmm_from_timestamp=from_timestamp,
171
- fpmm_to_timestamp=to_timestamp,
172
- )
173
-
174
- print(f"length of the qs_trades_json dataset {len(qs_trades_json)}")
175
-
176
- # convert to dataframe
177
- qs_df = pd.DataFrame(qs_trades_json["data"]["fpmmTrades"])
178
- qs_df["market_creator"] = "quickstart"
179
- qs_df = transform_fpmmTrades(qs_df)
180
-
181
- # Pearl trades
182
- print("Getting trades from Pearl markets")
183
- pearl_trades_json = query_omen_xdai_subgraph(
184
- trader_category="pearl",
185
- from_timestamp=from_timestamp,
186
- to_timestamp=DEFAULT_TO_TIMESTAMP,
187
- fpmm_from_timestamp=from_timestamp,
188
- fpmm_to_timestamp=DEFAULT_TO_TIMESTAMP,
189
- )
190
-
191
- print(f"length of the pearl_trades_json dataset {len(pearl_trades_json)}")
192
-
193
- # convert to dataframe
194
- pearl_df = pd.DataFrame(pearl_trades_json["data"]["fpmmTrades"])
195
- pearl_df["market_creator"] = "pearl"
196
- pearl_df = transform_fpmmTrades(pearl_df)
197
-
198
- return pd.concat([qs_df, pearl_df], ignore_index=True)
199
-
200
-
201
- def fpmms_fetcher(trader_category: str) -> Generator[ResponseItemType, int, None]:
202
- """An indefinite fetcher for the FPMMs."""
203
- omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
204
- print(f"omen_subgraph = {omen_subgraph}")
205
-
206
- if trader_category == "pearl":
207
- creator_id = FPMM_PEARL_CREATOR
208
- else: # quickstart
209
- creator_id = FPMM_QS_CREATOR
210
- while True:
211
- fpmm_id = yield
212
- fpmms_query = FPMMS_QUERY.substitute(
213
- creator=creator_id,
214
- fpmm_id=fpmm_id,
215
- fpmms_field=FPMMS_FIELD,
216
- first=BATCH_SIZE,
217
- id_field=ID_FIELD,
218
- answer_field=ANSWER_FIELD,
219
- question_field=QUESTION_FIELD,
220
- outcomes_field=OUTCOMES_FIELD,
221
- title_field=TITLE_FIELD,
222
- )
223
- print(f"markets query = {fpmms_query}")
224
- yield query_subgraph(omen_subgraph, fpmms_query, FPMMS_FIELD)
225
-
226
-
227
- def fetch_qs_fpmms() -> pd.DataFrame:
228
- """Fetch all the fpmms of the creator."""
229
- latest_id = ""
230
- fpmms = []
231
- trader_category = "quickstart"
232
- print(f"Getting markets for {trader_category}")
233
- fetcher = fpmms_fetcher(trader_category)
234
- for _ in tqdm(fetcher, unit="fpmms", unit_scale=BATCH_SIZE):
235
- batch = fetcher.send(latest_id)
236
- if len(batch) == 0:
237
- break
238
-
239
- latest_id = batch[-1].get(ID_FIELD, "")
240
- if latest_id == "":
241
- raise ValueError(f"Unexpected data format retrieved: {batch}")
242
-
243
- fpmms.extend(batch)
244
-
245
- return pd.DataFrame(fpmms)
246
-
247
-
248
- def fetch_pearl_fpmms() -> pd.DataFrame:
249
- """Fetch all the fpmms of the creator."""
250
- latest_id = ""
251
- fpmms = []
252
- trader_category = "pearl"
253
- print(f"Getting markets for {trader_category}")
254
- fetcher = fpmms_fetcher(trader_category)
255
- for _ in tqdm(fetcher, unit="fpmms", unit_scale=BATCH_SIZE):
256
- batch = fetcher.send(latest_id)
257
- if len(batch) == 0:
258
- break
259
-
260
- latest_id = batch[-1].get(ID_FIELD, "")
261
- if latest_id == "":
262
- raise ValueError(f"Unexpected data format retrieved: {batch}")
263
-
264
- fpmms.extend(batch)
265
-
266
- return pd.DataFrame(fpmms)
267
-
268
-
269
- def get_answer(fpmm: pd.Series) -> str:
270
- """Get an answer from its index, using Series of an FPMM."""
271
- return fpmm[QUESTION_FIELD][OUTCOMES_FIELD][fpmm[ANSWER_FIELD]]
272
-
273
-
274
- def transform_fpmms(fpmms: pd.DataFrame) -> pd.DataFrame:
275
- """Transform an FPMMS dataframe."""
276
- transformed = fpmms.dropna()
277
- transformed = transformed.drop_duplicates([ID_FIELD])
278
- transformed = transformed.loc[transformed[ANSWER_FIELD] != MAX_UINT_HEX]
279
- transformed.loc[:, ANSWER_FIELD] = (
280
- transformed[ANSWER_FIELD].str.slice(-1).astype(int)
281
- )
282
- transformed.loc[:, ANSWER_FIELD] = transformed.apply(get_answer, axis=1)
283
- transformed = transformed.drop(columns=[QUESTION_FIELD])
284
-
285
- return transformed
286
-
287
-
288
- def etl(filename: Optional[str] = None) -> pd.DataFrame:
289
- """Fetch, process, store and return the markets as a Dataframe."""
290
- qs_fpmms = fetch_qs_fpmms()
291
- qs_fpmms = transform_fpmms(qs_fpmms)
292
- qs_fpmms["market_creator"] = "quickstart"
293
- print(f"Results for the market creator quickstart. Len = {len(qs_fpmms)}")
294
-
295
- pearl_fpmms = fetch_pearl_fpmms()
296
- pearl_fpmms = transform_fpmms(pearl_fpmms)
297
- pearl_fpmms["market_creator"] = "pearl"
298
- print(f"Results for the market creator pearl. Len = {len(pearl_fpmms)}")
299
- fpmms = pd.concat([qs_fpmms, pearl_fpmms], ignore_index=True)
300
-
301
- if filename:
302
- fpmms.to_parquet(DATA_DIR / filename, index=False)
303
-
304
- return fpmms
305
-
306
-
307
- def read_global_trades_file() -> pd.DataFrame:
308
- try:
309
- trades_filename = "fpmmTrades.parquet"
310
- fpmms_trades = pd.read_parquet(TMP_DIR / trades_filename)
311
- except FileNotFoundError:
312
- print("Error: fpmmTrades.parquet not found. No market creator added")
313
- return
314
- return fpmms_trades
315
-
316
-
317
- def add_market_creator(tools: pd.DataFrame) -> None:
318
- # Check if fpmmTrades.parquet is in the same directory
319
- fpmms_trades = read_global_trades_file()
320
- tools["market_creator"] = ""
321
- # traverse the list of traders
322
- tools_no_market_creator = 0
323
- traders_list = list(tools.trader_address.unique())
324
- for trader_address in traders_list:
325
- market_creator = ""
326
- try:
327
- trades = fpmms_trades[fpmms_trades["trader_address"] == trader_address]
328
- market_creator = trades.iloc[0]["market_creator"] # first value is enough
329
- except Exception:
330
- print(f"ERROR getting the market creator of {trader_address}")
331
- tools_no_market_creator += 1
332
- continue
333
- # update
334
- tools.loc[tools["trader_address"] == trader_address, "market_creator"] = (
335
- market_creator
336
- )
337
- # filter those tools where we don't have market creator info
338
- tools = tools.loc[tools["market_creator"] != ""]
339
- print(f"Number of tools with no market creator info = {tools_no_market_creator}")
340
- return tools
341
-
342
-
343
- def fpmmTrades_etl(
344
- trades_filename: str, from_timestamp: int, to_timestamp: int = DEFAULT_TO_TIMESTAMP
345
- ) -> None:
346
- print("Generating the trades file")
347
- try:
348
- fpmmTrades = create_fpmmTrades(
349
- from_timestamp=from_timestamp, to_timestamp=to_timestamp
350
- )
351
- except FileNotFoundError:
352
- print(f"Error creating {trades_filename} file .")
353
-
354
- # make sure trader_address is in the columns
355
- assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
356
-
357
- # lowercase and strip creator_address
358
- fpmmTrades["trader_address"] = fpmmTrades["trader_address"].str.lower().str.strip()
359
- fpmmTrades.to_parquet(DATA_DIR / trades_filename, index=False)
360
- return fpmmTrades
361
-
362
-
363
- def check_current_week_data(trades_df: pd.DataFrame) -> pd.DataFrame:
364
- """Function to check if all current weeks data is present, if not, then add the missing data from previous file"""
365
- # Get current date
366
- now = datetime.now()
367
-
368
- # Get start of the current week (Monday)
369
- start_of_week = now - timedelta(days=now.weekday())
370
- start_of_week = start_of_week.replace(hour=0, minute=0, second=0, microsecond=0)
371
- print(f"start of the week = {start_of_week}")
372
-
373
- trades_df["creation_timestamp"] = pd.to_datetime(trades_df["creationTimestamp"])
374
- trades_df["creation_date"] = trades_df["creation_timestamp"].dt.date
375
- trades_df["creation_date"] = pd.to_datetime(trades_df["creation_date"])
376
- # Check dataframe
377
- min_date = min(trades_df.creation_date)
378
- if min_date > start_of_week:
379
- # missing data of current week in the trades file
380
- fpmms_trades = read_global_trades_file()
381
- # get missing data
382
- missing_data = fpmms_trades[
383
- (fpmms_trades["creation_date"] >= start_of_week)
384
- & (fpmms_trades["creation_date"] < min_date)
385
- ]
386
- merge_df = pd.concat([trades_df, missing_data], ignore_index=True)
387
- merge_df.drop_duplicates("id", keep="last", inplace=True)
388
- return merge_df
389
- # no update needed
390
- return trades_df
391
-
392
-
393
- def update_fpmmTrades_parquet(trades_filename: str) -> pd.DataFrame:
394
- # Read old trades parquet file
395
- try:
396
- old_trades_df = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
397
- except Exception as e:
398
- print(f"Error reading old trades parquet file {e}")
399
- return None
400
-
401
- try:
402
- new_trades_df = pd.read_parquet(DATA_DIR / trades_filename)
403
- except Exception as e:
404
- print(f"Error reading new trades parquet file {e}")
405
- return None
406
-
407
- # lowercase and strip creator_address
408
- new_trades_df["trader_address"] = (
409
- new_trades_df["trader_address"].str.lower().str.strip()
410
- )
411
- # ensure creationTimestamp compatibility
412
- try:
413
- new_trades_df["creationTimestamp"] = new_trades_df["creationTimestamp"].apply(
414
- lambda x: transform_to_datetime(x)
415
- )
416
-
417
- except Exception as e:
418
- print(f"Transformation not needed")
419
- try:
420
- old_trades_df["creationTimestamp"] = old_trades_df["creationTimestamp"].apply(
421
- lambda x: transform_to_datetime(x)
422
- )
423
- except Exception as e:
424
- print(f"Transformation not needed")
425
-
426
- # merge two dataframes
427
- merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)
428
- # avoid numpy objects
429
- merge_df["fpmm.arbitrationOccurred"] = merge_df["fpmm.arbitrationOccurred"].astype(
430
- bool
431
- )
432
- merge_df["fpmm.isPendingArbitration"] = merge_df[
433
- "fpmm.isPendingArbitration"
434
- ].astype(bool)
435
-
436
- # Check for duplicates
437
- print(f"Initial length before removing duplicates in fpmmTrades= {len(merge_df)}")
438
-
439
- # Remove duplicates
440
- # fpmm.outcomes is a numpy array
441
- merge_df.drop_duplicates("id", keep="last", inplace=True)
442
- print(f"Final length after removing duplicates in fpmmTrades= {len(merge_df)}")
443
-
444
- # save the parquet file
445
- merge_df.to_parquet(TMP_DIR / "fpmmTrades.parquet", index=False)
446
-
447
- return
448
-
449
-
450
- def update_fpmmTrades(from_date: str):
451
-
452
- from_timestamp = pd.Timestamp(datetime.strptime(from_date, "%Y-%m-%d")).tz_localize(
453
- "UTC"
454
- )
455
- fpmmTrades_etl(
456
- trades_filename="new_fpmmTrades.parquet",
457
- from_timestamp=int(from_timestamp.timestamp()),
458
- )
459
- update_fpmmTrades_parquet("new_fpmmTrades.parquet")
460
-
461
-
462
- if __name__ == "__main__":
463
- cutoff_date = "2025-01-13"
464
- update_fpmmTrades(cutoff_date)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/mech_request_utils.py DELETED
@@ -1,603 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # ------------------------------------------------------------------------------
3
- #
4
- # Copyright 2024 Valory AG
5
- #
6
- # Licensed under the Apache License, Version 2.0 (the "License");
7
- # you may not use this file except in compliance with the License.
8
- # You may obtain a copy of the License at
9
- #
10
- # http://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing, software
13
- # distributed under the License is distributed on an "AS IS" BASIS,
14
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
- # See the License for the specific language governing permissions and
16
- # limitations under the License.
17
- #
18
- # ------------------------------------------------------------------------------
19
-
20
- """Script for retrieving mech requests and their delivers."""
21
- import json
22
- import time
23
- import pickle
24
- from random import uniform
25
- from typing import Any, Dict, Tuple
26
- import requests
27
- from gql import Client, gql
28
- from gql.transport.requests import RequestsHTTPTransport
29
- from tools import (
30
- GET_CONTENTS_BATCH_SIZE,
31
- IRRELEVANT_TOOLS,
32
- create_session,
33
- request,
34
- )
35
- from tqdm import tqdm
36
- from web3_utils import (
37
- FPMM_QS_CREATOR,
38
- FPMM_PEARL_CREATOR,
39
- IPFS_POLL_INTERVAL,
40
- SUBGRAPH_POLL_INTERVAL,
41
- )
42
- from concurrent.futures import ThreadPoolExecutor, as_completed
43
- from utils import (
44
- DATA_DIR,
45
- JSON_DATA_DIR,
46
- MECH_SUBGRAPH_URL,
47
- SUBGRAPH_API_KEY,
48
- IPFS_ADDRESS,
49
- )
50
-
51
- NUM_WORKERS = 10
52
- BLOCKS_CHUNK_SIZE = 10000
53
- TEXT_ALIGNMENT = 30
54
- MINIMUM_WRITE_FILE_DELAY_SECONDS = 20
55
- MECH_FROM_BLOCK_RANGE = 50000
56
-
57
- last_write_time = 0.0
58
-
59
- REQUESTS_QUERY_FILTER = """
60
- query requests_query($sender_not_in: [Bytes!], $id_gt: Bytes, $blockNumber_gte: BigInt, $blockNumber_lte: BigInt) {
61
- requests(where: {sender_not_in: $sender_not_in, id_gt: $id_gt, blockNumber_gte: $blockNumber_gte, blockNumber_lte: $blockNumber_lte}, orderBy: id, first: 1000) {
62
- blockNumber
63
- blockTimestamp
64
- id
65
- ipfsHash
66
- requestId
67
- sender
68
- transactionHash
69
- }
70
- }
71
- """
72
-
73
- DELIVERS_QUERY_NO_FILTER = """
74
- query delivers_query($id_gt: Bytes, $blockNumber_gte: BigInt, $blockNumber_lte: BigInt) {
75
- delivers(where: {id_gt: $id_gt, blockNumber_gte: $blockNumber_gte, blockNumber_lte: $blockNumber_lte}, orderBy: id, first: 1000) {
76
- blockNumber
77
- blockTimestamp
78
- id
79
- ipfsHash
80
- requestId
81
- sender
82
- transactionHash
83
- }
84
- }
85
-
86
- """
87
- DELIVERS_QUERY = """
88
- query delivers_query($requestId: BigInt, $blockNumber_gte: BigInt, $blockNumber_lte: BigInt) {
89
- delivers(where: {requestId: $requestId, blockNumber_gte: $blockNumber_gte, blockNumber_lte: $blockNumber_lte}, orderBy: blockNumber, first: 1000) {
90
- blockNumber
91
- blockTimestamp
92
- id
93
- ipfsHash
94
- requestId
95
- sender
96
- transactionHash
97
- }
98
- }
99
- """
100
-
101
- MISSING_DELIVERS_QUERY = """
102
- query delivers_query($requestId: BigInt, $blockNumber_gte: BigInt, $blockNumber_lte: BigInt) {
103
- delivers(where: {requestId: $requestId, blockNumber_gte: $blockNumber_gte, blockNumber_lte: $blockNumber_lte}, orderBy: blockNumber, first: 1000) {
104
- blockNumber
105
- blockTimestamp
106
- id
107
- ipfsHash
108
- requestId
109
- sender
110
- transactionHash
111
- }
112
- }
113
- """
114
-
115
-
116
- def collect_all_mech_requests(from_block: int, to_block: int, filename: str) -> Tuple:
117
-
118
- print(f"Fetching all mech requests from {from_block} to {to_block}")
119
- mech_requests = {}
120
- duplicated_reqIds = []
121
- mech_subgraph_url = MECH_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
122
- transport = RequestsHTTPTransport(url=mech_subgraph_url)
123
- client = Client(transport=transport, fetch_schema_from_transport=True)
124
-
125
- id_gt = "0x00"
126
- nr_errors = 0
127
- while True:
128
- variables = {
129
- "sender_not_in": [FPMM_QS_CREATOR, FPMM_PEARL_CREATOR],
130
- "id_gt": id_gt,
131
- "blockNumber_gte": str(from_block), # str
132
- "blockNumber_lte": str(to_block), # str
133
- }
134
- try:
135
- response = fetch_with_retry(client, REQUESTS_QUERY_FILTER, variables)
136
-
137
- items = response.get("requests", [])
138
-
139
- if not items:
140
- break
141
-
142
- for mech_request in items:
143
- if mech_request["id"] not in mech_requests:
144
- mech_requests[mech_request["id"]] = mech_request
145
- else:
146
- duplicated_reqIds.append(mech_request["id"])
147
- except Exception as e:
148
- # counter for errors
149
- nr_errors += 1
150
- print(f"Error while getting the response: {e}")
151
-
152
- id_gt = items[-1]["id"]
153
- time.sleep(SUBGRAPH_POLL_INTERVAL)
154
- print(f"New execution for id_gt = {id_gt}")
155
- if len(duplicated_reqIds) > 0:
156
- print(f"Number of duplicated req Ids = {len(duplicated_reqIds)}")
157
- save_json_file(mech_requests, filename)
158
-
159
- print(f"Number of requests = {len(mech_requests)}")
160
- print(f"Number of duplicated req Ids = {len(duplicated_reqIds)}")
161
- save_json_file(mech_requests, filename)
162
- return mech_requests, duplicated_reqIds, nr_errors
163
-
164
-
165
- def fetch_with_retry(client, query, variables, max_retries=5):
166
- for attempt in range(max_retries):
167
- try:
168
- return client.execute(gql(query), variable_values=variables)
169
- except Exception as e:
170
- if attempt == max_retries - 1:
171
- raise e
172
- wait_time = (2**attempt) + uniform(0, 1) # exponential backoff with jitter
173
- time.sleep(wait_time)
174
-
175
-
176
- def collect_all_mech_delivers(from_block: int, to_block: int, filename: str) -> Tuple:
177
-
178
- print(f"Fetching all mech delivers from {from_block} to {to_block}")
179
-
180
- mech_delivers = {}
181
- duplicated_requestIds = []
182
- mech_subgraph_url = MECH_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
183
- transport = RequestsHTTPTransport(url=mech_subgraph_url)
184
- client = Client(transport=transport, fetch_schema_from_transport=True)
185
- to_block = (
186
- to_block + MECH_FROM_BLOCK_RANGE
187
- ) # there is a delay between deliver and request
188
- id_gt = ""
189
- nr_errors = 0
190
- while True:
191
- variables = {
192
- "id_gt": id_gt,
193
- "blockNumber_gte": str(from_block), # str
194
- "blockNumber_lte": str(to_block), # str
195
- }
196
- try:
197
- response = fetch_with_retry(client, DELIVERS_QUERY_NO_FILTER, variables)
198
- items = response.get("delivers", [])
199
-
200
- if not items:
201
- break
202
-
203
- for mech_deliver in items:
204
- if mech_deliver["requestId"] not in mech_delivers:
205
- mech_delivers[mech_deliver["requestId"]] = [mech_deliver]
206
- else:
207
- duplicated_requestIds.append(mech_deliver["requestId"])
208
- # we will handle the duplicated later
209
- except Exception as e:
210
- # counter for errors
211
- nr_errors += 1
212
- print(f"Error while getting the response: {e}")
213
- # return None, None
214
-
215
- id_gt = items[-1]["id"]
216
- time.sleep(SUBGRAPH_POLL_INTERVAL)
217
- print(f"New execution for id_gt = {id_gt}")
218
- if len(duplicated_requestIds) > 0:
219
- print(f"Number of duplicated request id = {len(duplicated_requestIds)}")
220
- save_json_file(mech_delivers, filename)
221
- print(f"Number of delivers = {len(mech_delivers)}")
222
- print(f"Number of duplicated request id = {len(duplicated_requestIds)}")
223
- save_json_file(mech_delivers, filename)
224
- return mech_delivers, duplicated_requestIds, nr_errors
225
-
226
-
227
- def collect_missing_delivers(request_id: int, block_number: int) -> Dict[str, Any]:
228
- to_block = (
229
- block_number + MECH_FROM_BLOCK_RANGE
230
- ) # there is a delay between deliver and request
231
- print(f"Fetching all missing delivers from {block_number} to {to_block}")
232
- mech_delivers = {}
233
- mech_subgraph_url = MECH_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
234
- transport = RequestsHTTPTransport(url=mech_subgraph_url)
235
- client = Client(transport=transport, fetch_schema_from_transport=True)
236
-
237
- variables = {
238
- "requestId": request_id,
239
- "blockNumber_gte": str(block_number), # str
240
- "blockNumber_lte": str(to_block), # str
241
- }
242
- try:
243
- response = fetch_with_retry(client, MISSING_DELIVERS_QUERY, variables)
244
- items = response.get("delivers", [])
245
- # If the user sends requests with the same values (tool, prompt, nonce) it
246
- # will generate the same requestId. Therefore, multiple items can be retrieved
247
- # at this point. We assume the most likely deliver to this request is the
248
- # one with the closest blockNumber among all delivers with the same requestId.
249
- if items:
250
- return items[0]
251
- except Exception as e:
252
- print(f"Error while getting the response: {e}")
253
- # TODO count how many mech requests without a deliver do we have
254
-
255
- return mech_delivers
256
-
257
-
258
- def populate_requests_ipfs_contents(
259
- session: requests.Session, mech_requests: Dict[str, Any], keys_to_traverse: list
260
- ) -> dict:
261
- updated_dict = {}
262
- wrong_response_count = 0
263
- for k in tqdm(
264
- keys_to_traverse,
265
- desc="Fetching IPFS contents for requests",
266
- position=1,
267
- unit="results",
268
- ):
269
- mech_request = mech_requests[k]
270
-
271
- if "ipfsContents" not in mech_request:
272
- ipfs_hash = mech_request["ipfsHash"]
273
- url = f"{IPFS_ADDRESS}{ipfs_hash}/metadata.json"
274
- response = request(session, url)
275
- if response is None:
276
- tqdm.write(f"Skipping {mech_request=}. because response was None")
277
- wrong_response_count += 1
278
- continue
279
- try:
280
- contents = response.json()
281
- if contents["tool"] in IRRELEVANT_TOOLS:
282
- continue
283
- mech_request["ipfsContents"] = contents
284
- except requests.exceptions.JSONDecodeError:
285
- tqdm.write(
286
- f"Skipping {mech_request} because of JSONDecodeError when parsing response"
287
- )
288
- wrong_response_count += 1
289
- continue
290
- updated_dict[k] = mech_request
291
- time.sleep(IPFS_POLL_INTERVAL)
292
-
293
- return updated_dict, wrong_response_count
294
-
295
-
296
- def populate_delivers_ipfs_contents(
297
- session: requests.Session, mech_requests: Dict[str, Any], keys_to_traverse: list
298
- ) -> dict:
299
- """Function to complete the delivers content info from ipfs"""
300
- updated_dict = {}
301
- errors = 0
302
- for k in tqdm(
303
- keys_to_traverse,
304
- desc="Fetching IPFS contents for delivers",
305
- position=1,
306
- unit="results",
307
- ):
308
- mech_request = mech_requests[k]
309
- if "deliver" not in mech_request or len(mech_request["deliver"]) == 0:
310
- print(f"Skipping mech request {mech_request} because of no delivers info")
311
- continue
312
-
313
- deliver = mech_request["deliver"]
314
- if "ipfsContents" not in deliver:
315
- ipfs_hash = deliver["ipfsHash"]
316
- request_id = deliver["requestId"]
317
- url = f"{IPFS_ADDRESS}{ipfs_hash}/{request_id}"
318
- response = request(session, url)
319
- if response is None:
320
- tqdm.write(f"Skipping {mech_request=}.")
321
- continue
322
- try:
323
- contents = response.json()
324
- metadata = contents.get("metadata", None)
325
- if metadata and contents["metadata"]["tool"] in IRRELEVANT_TOOLS:
326
- continue
327
- contents.pop("cost_dict", None)
328
- deliver["ipfsContents"] = contents
329
- except requests.exceptions.JSONDecodeError:
330
- tqdm.write(f"Skipping {mech_request} because of JSONDecodeError")
331
- continue
332
- except Exception:
333
- errors += 1
334
- tqdm.write(
335
- f"Skipping {mech_request} because of error parsing the response"
336
- )
337
- continue
338
- updated_dict[k] = mech_request
339
- time.sleep(IPFS_POLL_INTERVAL)
340
-
341
- return updated_dict, errors
342
-
343
-
344
- def write_mech_events_to_file(
345
- mech_requests: Dict[str, Any],
346
- filename: str,
347
- force_write: bool = False,
348
- ) -> None:
349
- global last_write_time # pylint: disable=global-statement
350
- now = time.time()
351
-
352
- if len(mech_requests) == 0:
353
- return
354
-
355
- filename_path = DATA_DIR / filename
356
- if force_write or (now - last_write_time) >= MINIMUM_WRITE_FILE_DELAY_SECONDS:
357
- with open(filename_path, "w", encoding="utf-8") as file:
358
- json.dump(mech_requests, file, indent=2)
359
- last_write_time = now
360
-
361
-
362
- def save_json_file(data: Dict[str, Any], filename: str):
363
- """Function to save the content into a json file"""
364
- filename_path = JSON_DATA_DIR / filename
365
- with open(filename_path, "w", encoding="utf-8") as file:
366
- json.dump(data, file, indent=2)
367
-
368
-
369
- def merge_json_files(old_file: str, new_file: str):
370
- # read old file
371
- with open(JSON_DATA_DIR / old_file, "r") as f:
372
- old_data = json.load(f)
373
-
374
- # read the new file
375
- with open(JSON_DATA_DIR / new_file, "r") as f:
376
- new_data = json.load(f)
377
-
378
- # Merge the two JSON files and remove duplicates
379
- old_data.update(new_data)
380
-
381
- # Save the merged JSON file
382
- print(f"{old_file} updated")
383
- save_json_file(old_data, old_file)
384
-
385
-
386
- def clean_mech_delivers(requests_filename: str, delivers_filename: str) -> None:
387
- """Function to remove from the delivers json file the request Ids that are not in the mech requests"""
388
- # read mech requests
389
- with open(JSON_DATA_DIR / requests_filename, "r") as file:
390
- mech_requests = json.load(file)
391
-
392
- list_reqIds = [mech_requests[k].get("requestId") for k in mech_requests.keys()]
393
-
394
- # remove requestIds from delivers that are not in this list
395
- with open(JSON_DATA_DIR / delivers_filename, "r") as file:
396
- mech_delivers = json.load(file)
397
-
398
- print(f"original size of the file {len(mech_delivers)}")
399
- mech_delivers = {
400
- k: v
401
- for k, v in tqdm(
402
- mech_delivers.items(),
403
- total=len(mech_delivers),
404
- desc="Filtering delivers dictionary",
405
- )
406
- if k in set(list_reqIds)
407
- }
408
-
409
- print(f"final size of the file {len(mech_delivers)}")
410
- save_json_file(mech_delivers, delivers_filename)
411
-
412
-
413
- def get_request_block_numbers(
414
- mech_requests: Dict[str, Any], target_req_id: int
415
- ) -> list:
416
- block_numbers = []
417
-
418
- for entry in mech_requests.values():
419
- if entry["requestId"] == target_req_id:
420
- block_numbers.append(entry["blockNumber"])
421
-
422
- return block_numbers
423
-
424
-
425
- def update_block_request_map(block_request_id_map: dict) -> None:
426
- print("Saving block request id map info")
427
- with open(JSON_DATA_DIR / "block_request_id_map.pickle", "wb") as handle:
428
- pickle.dump(block_request_id_map, handle, protocol=pickle.HIGHEST_PROTOCOL)
429
-
430
-
431
- def fix_duplicate_requestIds(requests_filename: str, delivers_filename: str) -> dict:
432
- print("Fix duplicated request Ids")
433
- with open(JSON_DATA_DIR / delivers_filename, "r") as file:
434
- data_delivers = json.load(file)
435
-
436
- with open(JSON_DATA_DIR / requests_filename, "r") as file:
437
- mech_requests = json.load(file)
438
- list_request_Ids = list(data_delivers.keys())
439
-
440
- list_duplicated_reqIds = []
441
- for req_Id in list_request_Ids:
442
- if len(data_delivers.get(req_Id)) > 1:
443
- list_duplicated_reqIds.append(req_Id)
444
-
445
- print(len(list_duplicated_reqIds))
446
- block_request_id_map = {}
447
-
448
- for req_Id in list_duplicated_reqIds:
449
- # get the list of mech request block numbers for that requestId
450
- block_nrs = get_request_block_numbers(mech_requests, req_Id)
451
- # get the list of mech delivers
452
- mech_delivers_list = data_delivers.get(req_Id) # list of dictionaries
453
- if len(block_nrs) > 1:
454
- print("More than one block number was found")
455
- for block_nr in block_nrs:
456
- key = (block_nr, req_Id)
457
- min_difference_request = min(
458
- mech_delivers_list,
459
- key=lambda x: abs(int(x["blockNumber"]) - int(block_nr)),
460
- )
461
- block_request_id_map[key] = min_difference_request
462
-
463
- update_block_request_map(block_request_id_map)
464
-
465
- return block_request_id_map
466
-
467
-
468
- def merge_requests_delivers(
469
- requests_filename: str, delivers_filename: str, filename: str
470
- ) -> None:
471
- print("Merge request delivers")
472
- """Function to map requests and delivers"""
473
- with open(JSON_DATA_DIR / delivers_filename, "r") as file:
474
- mech_delivers = json.load(file)
475
-
476
- with open(JSON_DATA_DIR / requests_filename, "r") as file:
477
- mech_requests = json.load(file)
478
-
479
- # read the block map for duplicated requestIds
480
- with open(JSON_DATA_DIR / "block_request_id_map.pickle", "rb") as handle:
481
- # key = (block_nr, req_Id) value = delivers dictionary
482
- block_request_id_map = pickle.load(handle)
483
- for _, mech_req in tqdm(
484
- mech_requests.items(),
485
- desc=f"Merging delivers data into the mech requests",
486
- ):
487
- if "deliver" in mech_req:
488
- continue
489
-
490
- block_number_req = mech_req["blockNumber"]
491
- req_Id = mech_req["requestId"]
492
- # check if it is in the duplicated map
493
- key = (block_number_req, req_Id)
494
- if key in block_request_id_map.keys():
495
- deliver_dict = block_request_id_map[key]
496
- elif req_Id in mech_delivers.keys():
497
- deliver_dict = mech_delivers.get(req_Id)[0] # the value is a list
498
- else:
499
- print("No deliver entry found for this request Id")
500
- deliver_dict = collect_missing_delivers(
501
- request_id=req_Id, block_number=int(block_number_req)
502
- )
503
-
504
- # extract the info and append it to the original mech request dictionary
505
- mech_req["deliver"] = deliver_dict
506
- save_json_file(mech_requests, filename)
507
- return
508
-
509
-
510
- def get_ipfs_data(input_filename: str, output_filename: str, logger):
511
- with open(JSON_DATA_DIR / input_filename, "r") as file:
512
- mech_requests = json.load(file)
513
-
514
- total_keys_to_traverse = list(mech_requests.keys())
515
- updated_mech_requests = dict()
516
- session = create_session()
517
- logger.info("UPDATING IPFS CONTENTS OF REQUESTS")
518
- # requests
519
- nr_errors = 0
520
- with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
521
- futures = []
522
- for i in range(0, len(mech_requests), GET_CONTENTS_BATCH_SIZE):
523
- futures.append(
524
- executor.submit(
525
- populate_requests_ipfs_contents,
526
- session,
527
- mech_requests,
528
- total_keys_to_traverse[i : i + GET_CONTENTS_BATCH_SIZE],
529
- )
530
- )
531
-
532
- for future in tqdm(
533
- as_completed(futures),
534
- total=len(futures),
535
- desc=f"Fetching all ipfs contents from requests ",
536
- ):
537
- partial_dict, error_counter = future.result()
538
- nr_errors += error_counter
539
- updated_mech_requests.update(partial_dict)
540
-
541
- save_json_file(updated_mech_requests, output_filename)
542
- logger.info(f"NUMBER OF MECH REQUEST IPFS ERRORS={nr_errors}")
543
-
544
- # delivers
545
- nr_deliver_errors = 0
546
- logger.info("UPDATING IPFS CONTENTS OF DELIVERS")
547
- total_keys_to_traverse = list(updated_mech_requests.keys())
548
- final_tools_content = {}
549
- with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
550
- futures = []
551
- for i in range(0, len(updated_mech_requests), GET_CONTENTS_BATCH_SIZE):
552
- futures.append(
553
- executor.submit(
554
- populate_delivers_ipfs_contents,
555
- session,
556
- updated_mech_requests,
557
- total_keys_to_traverse[i : i + GET_CONTENTS_BATCH_SIZE],
558
- )
559
- )
560
-
561
- for future in tqdm(
562
- as_completed(futures),
563
- total=len(futures),
564
- desc=f"Fetching all ipfs contents from delivers ",
565
- ):
566
- partial_dict, error_counter = future.result()
567
- nr_deliver_errors += error_counter
568
- final_tools_content.update(partial_dict)
569
-
570
- save_json_file(final_tools_content, output_filename)
571
- logger.info(f"NUMBER OF MECH DELIVERS IPFS ERRORS={nr_deliver_errors}")
572
-
573
-
574
- def only_delivers_loop():
575
- with open(DATA_DIR / "tools_info.json", "r") as file:
576
- updated_mech_requests = json.load(file)
577
-
578
- # delivers
579
- session = create_session()
580
- print("UPDATING IPFS CONTENTS OF DELIVERS")
581
- total_keys_to_traverse = list(updated_mech_requests.keys())
582
- final_tools_content = {}
583
- with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
584
- futures = []
585
- for i in range(0, len(updated_mech_requests), GET_CONTENTS_BATCH_SIZE):
586
- futures.append(
587
- executor.submit(
588
- populate_delivers_ipfs_contents,
589
- session,
590
- updated_mech_requests,
591
- total_keys_to_traverse[i : i + GET_CONTENTS_BATCH_SIZE],
592
- )
593
- )
594
-
595
- for future in tqdm(
596
- as_completed(futures),
597
- total=len(futures),
598
- desc=f"Fetching all ipfs contents from delivers ",
599
- ):
600
- partial_dict = future.result()
601
- final_tools_content.update(partial_dict)
602
-
603
- save_json_file(final_tools_content, "tools_info.json")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/nr_mech_calls.py DELETED
@@ -1,271 +0,0 @@
1
- import pandas as pd
2
- from utils import DATA_DIR, DEFAULT_MECH_FEE, TMP_DIR, transform_to_datetime
3
- from tqdm import tqdm
4
-
5
- from typing import Dict, Any
6
- from collections import defaultdict
7
- from tools import IRRELEVANT_TOOLS
8
- import re
9
-
10
-
11
- def update_roi(row: pd.DataFrame) -> float:
12
- new_value = row.net_earnings / (
13
- row.collateral_amount
14
- + row.trade_fee_amount
15
- + row.num_mech_calls * DEFAULT_MECH_FEE
16
- )
17
- return new_value
18
-
19
-
20
- def get_mech_statistics(mech_requests: Dict[str, Any]) -> Dict[str, Dict[str, int]]:
21
- """Outputs a table with Mech statistics"""
22
-
23
- mech_statistics: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int))
24
-
25
- for mech_request in mech_requests.values():
26
- if (
27
- "ipfs_contents" not in mech_request
28
- or "tool" not in mech_request["ipfs_contents"]
29
- or "prompt" not in mech_request["ipfs_contents"]
30
- ):
31
- continue
32
-
33
- if mech_request["ipfs_contents"]["tool"] in IRRELEVANT_TOOLS:
34
- continue
35
-
36
- prompt = mech_request["ipfs_contents"]["prompt"]
37
- prompt = prompt.replace("\n", " ")
38
- prompt = prompt.strip()
39
- prompt = re.sub(r"\s+", " ", prompt)
40
- prompt_match = re.search(r"\"(.*)\"", prompt)
41
- if prompt_match:
42
- question = prompt_match.group(1)
43
- else:
44
- question = prompt
45
-
46
- mech_statistics[question]["count"] += 1
47
- mech_statistics[question]["fees"] += mech_request["fee"]
48
-
49
- return mech_statistics
50
-
51
-
52
- def create_unknown_traders_df(trades_df: pd.DataFrame) -> pd.DataFrame:
53
- """filter trades coming from non-Olas traders that are placing no mech calls"""
54
- no_mech_calls_mask = (trades_df["staking"] == "non_Olas") & (
55
- trades_df["num_mech_calls"] == 0
56
- )
57
- no_mech_calls_df = trades_df.loc[no_mech_calls_mask]
58
- trades_df = trades_df.loc[~no_mech_calls_mask]
59
- return no_mech_calls_df, trades_df
60
-
61
-
62
- def update_trade_nr_mech_calls(non_agents: bool = False):
63
- try:
64
- all_trades_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
65
- tools = pd.read_parquet(DATA_DIR / "tools.parquet")
66
- except Exception as e:
67
- print(f"Error reading the profitability and tools parquet files")
68
-
69
- traders = list(all_trades_df.trader_address.unique())
70
- if non_agents:
71
- traders = list(
72
- all_trades_df.loc[
73
- all_trades_df["staking"] == "non_agent"
74
- ].trader_address.unique()
75
- )
76
-
77
- print("before updating")
78
- print(
79
- all_trades_df.loc[
80
- all_trades_df["staking"] == "non_agent"
81
- ].num_mech_calls.describe()
82
- )
83
- for trader in tqdm(traders, desc=f"Updating Traders mech calls", unit="traders"):
84
- tools_usage = tools[tools["trader_address"] == trader]
85
- if len(tools_usage) == 0:
86
- tqdm.write(f"trader with no tools usage found {trader}")
87
- all_trades_df.loc[
88
- all_trades_df["trader_address"] == trader, "nr_mech_calls"
89
- ] = 0
90
- # update roi
91
- all_trades_df["roi"] = all_trades_df.apply(lambda x: update_roi(x), axis=1)
92
- print("after updating")
93
- print(
94
- all_trades_df.loc[
95
- all_trades_df["staking"] == "non_agent"
96
- ].num_mech_calls.describe()
97
- )
98
-
99
- # saving
100
- all_trades_df.to_parquet(DATA_DIR / "all_trades_profitability.parquet", index=False)
101
- # print("Summarising trades...")
102
- # summary_df = summary_analyse(all_trades_df)
103
- # summary_df.to_parquet(DATA_DIR / "summary_profitability.parquet", index=False)
104
-
105
-
106
- def get_daily_mech_calls_estimation(
107
- daily_trades: pd.DataFrame, daily_tools: pd.DataFrame
108
- ) -> list:
109
- # for each market
110
- daily_markets = daily_trades.title.unique()
111
- trader = daily_trades.iloc[0].trader_address
112
- day = daily_trades.iloc[0].creation_date
113
- estimations = []
114
- for market in daily_markets:
115
- estimation_dict = {}
116
- estimation_dict["trader_address"] = trader
117
- estimation_dict["trading_day"] = day
118
- # tools usage of this market
119
- market_requests = daily_tools.loc[daily_tools["title"] == market]
120
- # trades done on this market
121
- market_trades = daily_trades[daily_trades["title"] == market]
122
- mech_calls_estimation = 0
123
- total_trades = len(market_trades)
124
- total_requests = 0
125
- if len(market_requests) > 0:
126
- total_requests = len(market_requests)
127
- mech_calls_estimation = total_requests / total_trades
128
- estimation_dict["total_trades"] = total_trades
129
- estimation_dict["total_mech_requests"] = total_requests
130
- estimation_dict["market"] = market
131
- estimation_dict["mech_calls_per_trade"] = mech_calls_estimation
132
- estimations.append(estimation_dict)
133
- return estimations
134
-
135
-
136
- def compute_daily_mech_calls(
137
- fpmmTrades: pd.DataFrame, tools: pd.DataFrame
138
- ) -> pd.DataFrame:
139
- """Function to compute the daily mech calls at the trader and market level"""
140
- nr_traders = len(fpmmTrades["trader_address"].unique())
141
- fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
142
- fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
143
- fpmmTrades = fpmmTrades.sort_values(by="creation_timestamp", ascending=True)
144
- tools["request_time"] = pd.to_datetime(tools["request_time"])
145
- tools["request_date"] = tools["request_time"].dt.date
146
- tools = tools.sort_values(by="request_time", ascending=True)
147
- all_mech_calls = []
148
- for trader in tqdm(
149
- fpmmTrades["trader_address"].unique(),
150
- total=nr_traders,
151
- desc="creating daily mech calls computation",
152
- ):
153
- # compute the mech calls estimations for each trader
154
- all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader]
155
- all_tools = tools[tools["trader_address"] == trader]
156
- trading_days = all_trades.creation_date.unique()
157
- for trading_day in trading_days:
158
- daily_trades = all_trades.loc[all_trades["creation_date"] == trading_day]
159
- daily_tools = all_tools.loc[all_tools["request_date"] == trading_day]
160
- trader_entry = {}
161
- trader_entry["trader_address"] = trader
162
- trader_entry["total_trades"] = len(daily_trades)
163
- trader_entry["trading_day"] = trading_day
164
- trader_entry["total_mech_calls"] = len(daily_tools)
165
- all_mech_calls.append(trader_entry)
166
- return pd.DataFrame.from_dict(all_mech_calls, orient="columns")
167
-
168
-
169
- def compute_mech_call_estimations(
170
- fpmmTrades: pd.DataFrame, tools: pd.DataFrame
171
- ) -> pd.DataFrame:
172
- """Function to compute the estimated mech calls needed per trade at the trader and market level"""
173
- nr_traders = len(fpmmTrades["trader_address"].unique())
174
- fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
175
- fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
176
- tools["request_time"] = pd.to_datetime(tools["request_time"])
177
- tools["request_date"] = tools["request_time"].dt.date
178
- all_estimations = []
179
- for trader in tqdm(
180
- fpmmTrades["trader_address"].unique(),
181
- total=nr_traders,
182
- desc="creating mech calls estimation dataframe",
183
- ):
184
- # compute the mech calls estimations for each trader
185
- all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader]
186
- all_tools = tools[tools["trader_address"] == trader]
187
- trading_days = all_trades.creation_date.unique()
188
- for trading_day in trading_days:
189
- daily_trades = all_trades.loc[all_trades["creation_date"] == trading_day]
190
- daily_tools = all_tools.loc[all_tools["request_date"] == trading_day]
191
- daily_estimations = get_daily_mech_calls_estimation(
192
- daily_trades=daily_trades, daily_tools=daily_tools
193
- )
194
- all_estimations.extend(daily_estimations)
195
- return pd.DataFrame.from_dict(all_estimations, orient="columns")
196
-
197
-
198
- def compute_timestamp_mech_calls(
199
- all_trades: pd.DataFrame, all_tools: pd.DataFrame
200
- ) -> list:
201
- """Function to compute the mech calls based on timestamps but without repeating mech calls"""
202
- mech_calls_contents = []
203
- request_timestamps_used = {}
204
- # intialize the dict with all markets
205
- all_markets = all_trades.title.unique()
206
- for market in all_markets:
207
- request_timestamps_used[market] = []
208
-
209
- for i, trade in all_trades.iterrows():
210
- trader = trade["trader_address"]
211
- trade_id = trade["id"]
212
- market = trade["title"]
213
- trade_ts = trade["creation_timestamp"]
214
- market_requests = all_tools.loc[
215
- (all_tools["trader_address"] == trader) & (all_tools["title"] == market)
216
- ]
217
- # traverse market requests
218
- total_mech_calls = 0
219
- for i, mech_request in market_requests.iterrows():
220
- # check timestamp (before the trade)
221
- request_ts = mech_request["request_time"]
222
- if request_ts < trade_ts:
223
- # check the timestamp has not been used in a previous trade
224
- used_timestamps = request_timestamps_used[market]
225
- if request_ts not in used_timestamps:
226
- request_timestamps_used[market].append(request_ts)
227
- total_mech_calls += 1
228
- # create enty for the dataframe
229
- mech_call_entry = {}
230
- mech_call_entry["trader_address"] = trader
231
- mech_call_entry["market"] = market
232
- mech_call_entry["trade_id"] = trade_id
233
- mech_call_entry["total_mech_calls"] = total_mech_calls
234
- mech_calls_contents.append(mech_call_entry)
235
- return mech_calls_contents
236
-
237
-
238
- def compute_mech_calls_based_on_timestamps(
239
- fpmmTrades: pd.DataFrame, tools: pd.DataFrame
240
- ) -> pd.DataFrame:
241
- """Function to compute the mech calls needed per trade at the trader and market level using timestamps"""
242
- nr_traders = len(fpmmTrades["trader_address"].unique())
243
- fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
244
- fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
245
- fpmmTrades = fpmmTrades.sort_values(by="creation_timestamp", ascending=True)
246
- tools["request_time"] = pd.to_datetime(tools["request_time"])
247
- tools["request_date"] = tools["request_time"].dt.date
248
- tools = tools.sort_values(by="request_time", ascending=True)
249
- all_mech_calls = []
250
- for trader in tqdm(
251
- fpmmTrades["trader_address"].unique(),
252
- total=nr_traders,
253
- desc="creating mech calls count based on timestamps",
254
- ):
255
- # compute the mech calls for each trader
256
- all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader]
257
- all_tools = tools[tools["trader_address"] == trader]
258
- trader_mech_calls = compute_timestamp_mech_calls(all_trades, all_tools)
259
- all_mech_calls.extend(trader_mech_calls)
260
- return pd.DataFrame.from_dict(all_mech_calls, orient="columns")
261
-
262
-
263
- if __name__ == "__main__":
264
- # update_trade_nr_mech_calls(non_agents=True)
265
- tools = pd.read_parquet(TMP_DIR / "tools.parquet")
266
- fpmmTrades = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
267
- fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
268
- lambda x: transform_to_datetime(x)
269
- )
270
- result = compute_mech_calls_based_on_timestamps(fpmmTrades=fpmmTrades, tools=tools)
271
- result.to_parquet(TMP_DIR / "result_df.parquet", index=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/profitability.py DELETED
@@ -1,530 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # ------------------------------------------------------------------------------
3
- #
4
- # Copyright 2023 Valory AG
5
- #
6
- # Licensed under the Apache License, Version 2.0 (the "License");
7
- # you may not use this file except in compliance with the License.
8
- # You may obtain a copy of the License at
9
- #
10
- # http://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing, software
13
- # distributed under the License is distributed on an "AS IS" BASIS,
14
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
- # See the License for the specific language governing permissions and
16
- # limitations under the License.
17
- #
18
- # ------------------------------------------------------------------------------
19
-
20
- import time
21
- import pandas as pd
22
- from typing import Any
23
- from enum import Enum
24
- from tqdm import tqdm
25
- import numpy as np
26
- import os
27
- from web3_utils import query_conditional_tokens_gc_subgraph
28
- from get_mech_info import (
29
- DATETIME_60_DAYS_AGO,
30
- update_tools_parquet,
31
- update_all_trades_parquet,
32
- )
33
- from utils import (
34
- wei_to_unit,
35
- convert_hex_to_int,
36
- JSON_DATA_DIR,
37
- DATA_DIR,
38
- DEFAULT_MECH_FEE,
39
- TMP_DIR,
40
- measure_execution_time,
41
- )
42
- from staking import label_trades_by_staking
43
- from nr_mech_calls import (
44
- create_unknown_traders_df,
45
- transform_to_datetime,
46
- compute_mech_calls_based_on_timestamps,
47
- )
48
-
49
- DUST_THRESHOLD = 10000000000000
50
- INVALID_ANSWER = -1
51
- DEFAULT_60_DAYS_AGO_TIMESTAMP = (DATETIME_60_DAYS_AGO).timestamp()
52
- WXDAI_CONTRACT_ADDRESS = "0xe91D153E0b41518A2Ce8Dd3D7944Fa863463a97d"
53
- DUST_THRESHOLD = 10000000000000
54
-
55
-
56
- class MarketState(Enum):
57
- """Market state"""
58
-
59
- OPEN = 1
60
- PENDING = 2
61
- FINALIZING = 3
62
- ARBITRATING = 4
63
- CLOSED = 5
64
-
65
- def __str__(self) -> str:
66
- """Prints the market status."""
67
- return self.name.capitalize()
68
-
69
-
70
- class MarketAttribute(Enum):
71
- """Attribute"""
72
-
73
- NUM_TRADES = "Num_trades"
74
- WINNER_TRADES = "Winner_trades"
75
- NUM_REDEEMED = "Num_redeemed"
76
- INVESTMENT = "Investment"
77
- FEES = "Fees"
78
- MECH_CALLS = "Mech_calls"
79
- MECH_FEES = "Mech_fees"
80
- EARNINGS = "Earnings"
81
- NET_EARNINGS = "Net_earnings"
82
- REDEMPTIONS = "Redemptions"
83
- ROI = "ROI"
84
-
85
- def __str__(self) -> str:
86
- """Prints the attribute."""
87
- return self.value
88
-
89
- def __repr__(self) -> str:
90
- """Prints the attribute representation."""
91
- return self.name
92
-
93
- @staticmethod
94
- def argparse(s: str) -> "MarketAttribute":
95
- """Performs string conversion to MarketAttribute."""
96
- try:
97
- return MarketAttribute[s.upper()]
98
- except KeyError as e:
99
- raise ValueError(f"Invalid MarketAttribute: {s}") from e
100
-
101
-
102
- ALL_TRADES_STATS_DF_COLS = [
103
- "trader_address",
104
- "market_creator",
105
- "trade_id",
106
- "creation_timestamp",
107
- "title",
108
- "market_status",
109
- "collateral_amount",
110
- "outcome_index",
111
- "trade_fee_amount",
112
- "outcomes_tokens_traded",
113
- "current_answer",
114
- "is_invalid",
115
- "winning_trade",
116
- "earnings",
117
- "redeemed",
118
- "redeemed_amount",
119
- "num_mech_calls",
120
- "mech_fee_amount",
121
- "net_earnings",
122
- "roi",
123
- ]
124
-
125
-
126
- def _is_redeemed(user_json: dict[str, Any], fpmmTrade: dict[str, Any]) -> bool:
127
- """Returns whether the user has redeemed the position."""
128
- user_positions = user_json["data"]["user"]["userPositions"]
129
- condition_id = fpmmTrade["fpmm.condition.id"]
130
- for position in user_positions:
131
- position_condition_ids = position["position"]["conditionIds"]
132
- balance = int(position["balance"])
133
-
134
- if condition_id in position_condition_ids:
135
- if balance == 0:
136
- return True
137
- # return early
138
- return False
139
- return False
140
-
141
-
142
- def prepare_profitalibity_data(
143
- tools_filename: str,
144
- trades_filename: str,
145
- tmp_dir: bool = False,
146
- ) -> pd.DataFrame:
147
- """Prepare data for profitalibity analysis."""
148
-
149
- # Check if tools.parquet is in the same directory
150
- try:
151
- if tmp_dir:
152
- tools = pd.read_parquet(TMP_DIR / tools_filename)
153
- else:
154
- tools = pd.read_parquet(DATA_DIR / tools_filename)
155
-
156
- # make sure creator_address is in the columns
157
- assert "trader_address" in tools.columns, "trader_address column not found"
158
-
159
- # lowercase and strip creator_address
160
- tools["trader_address"] = tools["trader_address"].str.lower().str.strip()
161
-
162
- tools.drop_duplicates(
163
- subset=["request_id", "request_block"], keep="last", inplace=True
164
- )
165
- tools.to_parquet(DATA_DIR / tools_filename)
166
- print(f"{tools_filename} loaded")
167
- except FileNotFoundError:
168
- print(f"{tools_filename} not found.")
169
- return
170
-
171
- # Check if fpmmTrades.parquet is in the same directory
172
- print("Reading the new trades file")
173
- try:
174
- if tmp_dir:
175
- fpmmTrades = pd.read_parquet(TMP_DIR / trades_filename)
176
- else:
177
- fpmmTrades = pd.read_parquet(DATA_DIR / trades_filename)
178
- except FileNotFoundError:
179
- print(f"Error reading {trades_filename} file.")
180
-
181
- # make sure trader_address is in the columns
182
- assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
183
-
184
- # lowercase and strip creator_address
185
- fpmmTrades["trader_address"] = fpmmTrades["trader_address"].str.lower().str.strip()
186
-
187
- return fpmmTrades
188
-
189
-
190
- def determine_market_status(trade, current_answer):
191
- """Determine the market status of a trade."""
192
- if (current_answer is np.nan or current_answer is None) and time.time() >= int(
193
- trade["fpmm.openingTimestamp"]
194
- ):
195
- return MarketState.PENDING
196
- elif current_answer is np.nan or current_answer is None:
197
- return MarketState.OPEN
198
- elif trade["fpmm.isPendingArbitration"]:
199
- return MarketState.ARBITRATING
200
- elif time.time() < int(trade["fpmm.answerFinalizedTimestamp"]):
201
- return MarketState.FINALIZING
202
- return MarketState.CLOSED
203
-
204
-
205
- def analyse_trader(
206
- trader_address: str,
207
- fpmmTrades: pd.DataFrame,
208
- trader_estimated_mech_calls: pd.DataFrame,
209
- daily_info: bool = False,
210
- ) -> pd.DataFrame:
211
- """Analyse a trader's trades"""
212
- fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
213
- fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
214
- # Filter trades and tools for the given trader
215
- trades = fpmmTrades[fpmmTrades["trader_address"] == trader_address]
216
-
217
- # Prepare the DataFrame
218
- trades_df = pd.DataFrame(columns=ALL_TRADES_STATS_DF_COLS)
219
- if trades.empty:
220
- return trades_df
221
-
222
- # Fetch user's conditional tokens gc graph
223
- try:
224
- user_json = query_conditional_tokens_gc_subgraph(trader_address)
225
- except Exception as e:
226
- print(f"Error fetching user data: {e}")
227
- return trades_df
228
-
229
- # Iterate over the trades
230
- trades_answer_nan = 0
231
- trades_no_closed_market = 0
232
- for i, trade in tqdm(trades.iterrows(), total=len(trades), desc="Analysing trades"):
233
- try:
234
- market_answer = trade["fpmm.currentAnswer"]
235
- trading_day = trade["creation_date"]
236
- trade_id = trade["id"]
237
- if not daily_info and not market_answer:
238
- # print(f"Skipping trade {i} because currentAnswer is NaN")
239
- trades_answer_nan += 1
240
- continue
241
- # Parsing and computing shared values
242
- collateral_amount = wei_to_unit(float(trade["collateralAmount"]))
243
- fee_amount = wei_to_unit(float(trade["feeAmount"]))
244
- outcome_tokens_traded = wei_to_unit(float(trade["outcomeTokensTraded"]))
245
- earnings, winner_trade = (0, False)
246
- redemption = _is_redeemed(user_json, trade)
247
- current_answer = market_answer if market_answer else None
248
- market_creator = trade["market_creator"]
249
-
250
- # Determine market status
251
- market_status = determine_market_status(trade, current_answer)
252
-
253
- # Skip non-closed markets
254
- if not daily_info and market_status != MarketState.CLOSED:
255
- # print(
256
- # f"Skipping trade {i} because market is not closed. Market Status: {market_status}"
257
- # )
258
- trades_no_closed_market += 1
259
- continue
260
- if current_answer is not None:
261
- current_answer = convert_hex_to_int(current_answer)
262
-
263
- # Compute invalidity
264
- is_invalid = current_answer == INVALID_ANSWER
265
-
266
- # Compute earnings and winner trade status
267
- if current_answer is None:
268
- earnings = 0.0
269
- winner_trade = None
270
- elif is_invalid:
271
- earnings = collateral_amount
272
- winner_trade = False
273
- elif int(trade["outcomeIndex"]) == current_answer:
274
- earnings = outcome_tokens_traded
275
- winner_trade = True
276
-
277
- # Compute mech calls using the title, and trade id
278
- if daily_info:
279
- total_mech_calls = trader_estimated_mech_calls.loc[
280
- (trader_estimated_mech_calls["trading_day"] == trading_day),
281
- "total_mech_calls",
282
- ].iloc[0]
283
- else:
284
- total_mech_calls = trader_estimated_mech_calls.loc[
285
- (trader_estimated_mech_calls["market"] == trade["title"])
286
- & (trader_estimated_mech_calls["trade_id"] == trade_id),
287
- "total_mech_calls",
288
- ].iloc[0]
289
-
290
- net_earnings = (
291
- earnings
292
- - fee_amount
293
- - (total_mech_calls * DEFAULT_MECH_FEE)
294
- - collateral_amount
295
- )
296
-
297
- # Assign values to DataFrame
298
- trades_df.loc[i] = {
299
- "trader_address": trader_address,
300
- "market_creator": market_creator,
301
- "trade_id": trade["id"],
302
- "market_status": market_status.name,
303
- "creation_timestamp": trade["creationTimestamp"],
304
- "title": trade["title"],
305
- "collateral_amount": collateral_amount,
306
- "outcome_index": trade["outcomeIndex"],
307
- "trade_fee_amount": fee_amount,
308
- "outcomes_tokens_traded": outcome_tokens_traded,
309
- "current_answer": current_answer,
310
- "is_invalid": is_invalid,
311
- "winning_trade": winner_trade,
312
- "earnings": earnings,
313
- "redeemed": redemption,
314
- "redeemed_amount": earnings if redemption else 0,
315
- "num_mech_calls": total_mech_calls,
316
- "mech_fee_amount": total_mech_calls * DEFAULT_MECH_FEE,
317
- "net_earnings": net_earnings,
318
- "roi": net_earnings
319
- / (
320
- collateral_amount + fee_amount + total_mech_calls * DEFAULT_MECH_FEE
321
- ),
322
- }
323
-
324
- except Exception as e:
325
- print(f"Error processing trade {i}: {e}")
326
- print(trade)
327
- continue
328
-
329
- print(f"Number of trades where currentAnswer is NaN = {trades_answer_nan}")
330
- print(
331
- f"Number of trades where the market is not closed = {trades_no_closed_market}"
332
- )
333
- return trades_df
334
-
335
-
336
- def analyse_all_traders(
337
- trades: pd.DataFrame,
338
- estimated_mech_calls: pd.DataFrame,
339
- daily_info: bool = False,
340
- ) -> pd.DataFrame:
341
- """Analyse all creators."""
342
-
343
- all_traders = []
344
- for trader in tqdm(
345
- trades["trader_address"].unique(),
346
- total=len(trades["trader_address"].unique()),
347
- desc="Analysing creators",
348
- ):
349
- trader_estimated_mech_calls = estimated_mech_calls.loc[
350
- estimated_mech_calls["trader_address"] == trader
351
- ]
352
- all_traders.append(
353
- analyse_trader(trader, trades, trader_estimated_mech_calls, daily_info)
354
- )
355
-
356
- # concat all creators
357
- all_creators_df = pd.concat(all_traders)
358
-
359
- return all_creators_df
360
-
361
-
362
- @measure_execution_time
363
- def run_profitability_analysis(
364
- tools_filename: str,
365
- trades_filename: str,
366
- merge: bool = False,
367
- tmp_dir: bool = False,
368
- ):
369
- """Create all trades analysis."""
370
- print(f"Preparing data with {tools_filename} and {trades_filename}")
371
- fpmmTrades = prepare_profitalibity_data(
372
- tools_filename, trades_filename, tmp_dir=tmp_dir
373
- )
374
-
375
- if merge:
376
- update_tools_parquet(tools_filename)
377
-
378
- tools = pd.read_parquet(TMP_DIR / "tools.parquet")
379
-
380
- try:
381
- fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
382
- lambda x: transform_to_datetime(x)
383
- )
384
- except Exception as e:
385
- print(f"Transformation not needed")
386
-
387
- print("Computing the estimated mech calls dataset")
388
- trade_mech_calls = compute_mech_calls_based_on_timestamps(
389
- fpmmTrades=fpmmTrades, tools=tools
390
- )
391
- trade_mech_calls.to_parquet(TMP_DIR / "trade_mech_calls.parquet")
392
-
393
- print(trade_mech_calls.total_mech_calls.describe())
394
- print("Analysing trades...")
395
- all_trades_df = analyse_all_traders(fpmmTrades, trade_mech_calls)
396
-
397
- # # merge previous files if requested
398
- if merge:
399
- all_trades_df = update_all_trades_parquet(all_trades_df)
400
-
401
- # debugging purposes
402
- all_trades_df.to_parquet(JSON_DATA_DIR / "all_trades_df.parquet", index=False)
403
- # all_trades_df = pd.read_parquet(JSON_DATA_DIR / "all_trades_df.parquet")
404
-
405
- # filter invalid markets. Condition: "is_invalid" is True
406
- invalid_trades = all_trades_df.loc[all_trades_df["is_invalid"] == True]
407
- if len(invalid_trades) == 0:
408
- print("No new invalid trades")
409
- else:
410
- if merge:
411
- try:
412
- print("Merging invalid trades parquet file")
413
- old_invalid_trades = pd.read_parquet(
414
- DATA_DIR / "invalid_trades.parquet"
415
- )
416
- merge_df = pd.concat(
417
- [old_invalid_trades, invalid_trades], ignore_index=True
418
- )
419
- invalid_trades = merge_df.drop_duplicates()
420
- except Exception as e:
421
- print(f"Error updating the invalid trades parquet {e}")
422
- invalid_trades.to_parquet(DATA_DIR / "invalid_trades.parquet", index=False)
423
-
424
- all_trades_df = all_trades_df.loc[all_trades_df["is_invalid"] == False]
425
-
426
- all_trades_df = label_trades_by_staking(trades_df=all_trades_df)
427
-
428
- print("Creating unknown traders dataset")
429
- unknown_traders_df, all_trades_df = create_unknown_traders_df(
430
- trades_df=all_trades_df
431
- )
432
- # merge with previous unknown traders dataset
433
- previous_unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
434
-
435
- unknown_traders_df: pd.DataFrame = pd.concat(
436
- [unknown_traders_df, previous_unknown_traders], ignore_index=True
437
- )
438
- unknown_traders_df.drop_duplicates("trade_id", keep="last", inplace=True)
439
- unknown_traders_df.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
440
-
441
- # save to parquet
442
- all_trades_df.to_parquet(DATA_DIR / "all_trades_profitability.parquet", index=False)
443
-
444
- print("Done!")
445
-
446
- return all_trades_df
447
-
448
-
449
- def add_trades_profitability(trades_filename: str):
450
- print("Reading the trades file")
451
- try:
452
- fpmmTrades = pd.read_parquet(DATA_DIR / trades_filename)
453
- except FileNotFoundError:
454
- print(f"Error reading {trades_filename} file .")
455
-
456
- # make sure trader_address is in the columns
457
- assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
458
-
459
- # lowercase and strip creator_address
460
- fpmmTrades["trader_address"] = fpmmTrades["trader_address"].str.lower().str.strip()
461
-
462
- print("Reading tools parquet file")
463
- tools = pd.read_parquet(TMP_DIR / "tools.parquet")
464
-
465
- try:
466
- fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
467
- lambda x: transform_to_datetime(x)
468
- )
469
- except Exception as e:
470
- print(f"Transformation not needed")
471
-
472
- print("Computing the estimated mech calls dataset")
473
- trade_mech_calls = compute_mech_calls_based_on_timestamps(
474
- fpmmTrades=fpmmTrades, tools=tools
475
- )
476
- print(trade_mech_calls.total_mech_calls.describe())
477
- print("Analysing trades...")
478
- all_trades_df = analyse_all_traders(fpmmTrades, trade_mech_calls)
479
-
480
- # debugging purposes
481
- all_trades_df.to_parquet(JSON_DATA_DIR / "missing_trades_df.parquet", index=False)
482
- # filter invalid markets. Condition: "is_invalid" is True
483
- print("Checking invalid trades")
484
- invalid_trades = all_trades_df.loc[all_trades_df["is_invalid"] == True]
485
- if len(invalid_trades) > 0:
486
- try:
487
- print("Merging invalid trades parquet file")
488
- old_invalid_trades = pd.read_parquet(DATA_DIR / "invalid_trades.parquet")
489
- merge_df = pd.concat(
490
- [old_invalid_trades, invalid_trades], ignore_index=True
491
- )
492
- invalid_trades = merge_df.drop_duplicates("trade_id")
493
- except Exception as e:
494
- print(f"Error updating the invalid trades parquet {e}")
495
- invalid_trades.to_parquet(DATA_DIR / "invalid_trades.parquet", index=False)
496
- all_trades_df = all_trades_df.loc[all_trades_df["is_invalid"] == False]
497
-
498
- print("Adding staking labels")
499
- all_trades_df = label_trades_by_staking(trades_df=all_trades_df)
500
- print("Creating unknown traders dataset")
501
- unknown_traders_df, all_trades_df = create_unknown_traders_df(
502
- trades_df=all_trades_df
503
- )
504
- if len(unknown_traders_df) > 0:
505
- print("Merging unknown traders info")
506
- # merge with previous unknown traders dataset
507
- previous_unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
508
-
509
- unknown_traders_df: pd.DataFrame = pd.concat(
510
- [unknown_traders_df, previous_unknown_traders], ignore_index=True
511
- )
512
- unknown_traders_df.drop_duplicates("trade_id", keep="last", inplace=True)
513
- unknown_traders_df.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
514
-
515
- print("merge with previous all_trades_profitability")
516
- old_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
517
- all_trades_df: pd.DataFrame = pd.concat(
518
- [all_trades_df, old_trades], ignore_index=True
519
- )
520
- all_trades_df.drop_duplicates("trade_id", keep="last", inplace=True)
521
- all_trades_df.to_parquet(DATA_DIR / "all_trades_profitability.parquet", index=False)
522
-
523
-
524
- if __name__ == "__main__":
525
- run_profitability_analysis(
526
- tools_filename="tools.parquet",
527
- trades_filename="fpmmTrades.parquet",
528
- merge=False,
529
- tmp_dir=True,
530
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/pull_data.py DELETED
@@ -1,173 +0,0 @@
1
- import logging
2
- from datetime import datetime
3
- import pandas as pd
4
- from markets import (
5
- etl as mkt_etl,
6
- DEFAULT_FILENAME as MARKETS_FILENAME,
7
- fpmmTrades_etl,
8
- update_fpmmTrades_parquet,
9
- )
10
- from tools import generate_tools_file
11
- from profitability import run_profitability_analysis, add_trades_profitability
12
- from utils import (
13
- get_question,
14
- current_answer,
15
- RPC,
16
- measure_execution_time,
17
- DATA_DIR,
18
- HIST_DIR,
19
- TMP_DIR,
20
- )
21
- from get_mech_info import (
22
- get_mech_events_since_last_run,
23
- update_json_files,
24
- )
25
- from update_tools_accuracy import compute_tools_accuracy
26
- from cleaning_old_info import clean_old_data_from_parquet_files
27
- from web3_utils import updating_timestamps
28
- from manage_space_files import move_files
29
- from cloud_storage import upload_historical_file
30
- from tools_metrics import compute_tools_based_datasets
31
-
32
-
33
- logging.basicConfig(
34
- level=logging.INFO,
35
- format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
36
- datefmt="%Y-%m-%d %H:%M:%S",
37
- )
38
- logger = logging.getLogger(__name__)
39
-
40
-
41
- def add_current_answer(tools_filename: str):
42
- # Get currentAnswer from FPMMS
43
- fpmms = pd.read_parquet(DATA_DIR / MARKETS_FILENAME)
44
- tools = pd.read_parquet(DATA_DIR / tools_filename)
45
-
46
- # Get the question from the tools
47
- logging.info("Getting the question and current answer for the tools")
48
- tools["title"] = tools["prompt_request"].apply(lambda x: get_question(x))
49
- tools["currentAnswer"] = tools["title"].apply(lambda x: current_answer(x, fpmms))
50
-
51
- tools["currentAnswer"] = tools["currentAnswer"].str.replace("yes", "Yes")
52
- tools["currentAnswer"] = tools["currentAnswer"].str.replace("no", "No")
53
- # Save the tools data after the updates on the content
54
- tools.to_parquet(DATA_DIR / tools_filename, index=False)
55
- del fpmms
56
-
57
-
58
- def save_historical_data():
59
- """Function to save a copy of the main trades and tools file
60
- into the historical folder"""
61
- print("Saving historical data copies")
62
- current_datetime = datetime.now()
63
-
64
- timestamp = current_datetime.strftime("%Y%m%d_%H%M%S")
65
-
66
- try:
67
- tools = pd.read_parquet(TMP_DIR / "tools.parquet")
68
- filename = f"tools_{timestamp}.parquet"
69
- tools.to_parquet(HIST_DIR / filename, index=False)
70
- # save into cloud storage
71
- upload_historical_file(filename)
72
- except Exception as e:
73
- print(f"Error saving tools file in the historical folder {e}")
74
-
75
- try:
76
- all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
77
- filename = f"all_trades_profitability_{timestamp}.parquet"
78
- all_trades.to_parquet(HIST_DIR / filename, index=False)
79
- # save into cloud storage
80
- upload_historical_file(filename)
81
-
82
- except Exception as e:
83
- print(
84
- f"Error saving all_trades_profitability file in the historical folder {e}"
85
- )
86
-
87
-
88
- @measure_execution_time
89
- def only_new_weekly_analysis():
90
- """Run weekly analysis for the FPMMS project."""
91
- rpc = RPC
92
- # Run markets ETL
93
- logging.info("Running markets ETL")
94
- mkt_etl(MARKETS_FILENAME)
95
- logging.info("Markets ETL completed")
96
-
97
- # Mech events ETL
98
- logging.info("Generating the mech json files")
99
- # get only new data
100
- latest_timestamp = get_mech_events_since_last_run(logger)
101
- if latest_timestamp == None:
102
- print("Error while getting the mech events")
103
- return
104
- logging.info(f"Finished generating the mech json files from {latest_timestamp}")
105
-
106
- # FpmmTrades ETL
107
- fpmmTrades_etl(
108
- trades_filename="new_fpmmTrades.parquet",
109
- from_timestamp=int(latest_timestamp.timestamp()),
110
- )
111
- # merge with previous file
112
- print("Merging with previous fpmmTrades file")
113
- update_fpmmTrades_parquet(trades_filename="new_fpmmTrades.parquet")
114
-
115
- # Run tools ETL
116
- logging.info("Generate and parse the tools content")
117
- # generate only new file
118
- generate_tools_file("new_tools_info.json", "new_tools.parquet")
119
- logging.info("Tools ETL completed")
120
-
121
- add_current_answer("new_tools.parquet")
122
-
123
- # # Run profitability analysis
124
- logging.info("Running profitability analysis")
125
- run_profitability_analysis(
126
- tools_filename="new_tools.parquet",
127
- trades_filename="new_fpmmTrades.parquet",
128
- merge=True,
129
- )
130
-
131
- logging.info("Profitability analysis completed")
132
-
133
- # merge new json files with old json files
134
- update_json_files()
135
-
136
- save_historical_data()
137
- try:
138
- clean_old_data_from_parquet_files("2024-11-26")
139
- except Exception as e:
140
- print("Error cleaning the oldest information from parquet files")
141
- print(f"reason = {e}")
142
- compute_tools_accuracy()
143
- compute_tools_based_datasets()
144
- # # move to tmp folder the new generated files
145
- move_files()
146
- logging.info("Weekly analysis files generated and saved")
147
-
148
-
149
- def restoring_trades_data(from_date: str, to_date: str):
150
- # Convert the string to datetime64[ns, UTC]
151
- min_date_utc = pd.to_datetime(from_date, format="%Y-%m-%d", utc=True)
152
- max_date_utc = pd.to_datetime(to_date, format="%Y-%m-%d", utc=True)
153
- logging.info("Running markets ETL")
154
- mkt_etl(MARKETS_FILENAME)
155
- logging.info("Markets ETL completed")
156
-
157
- fpmmTrades_etl(
158
- trades_filename="missing_fpmmTrades.parquet",
159
- from_timestamp=int(min_date_utc.timestamp()),
160
- to_timestamp=int(max_date_utc.timestamp()),
161
- )
162
-
163
- # merge with the old file
164
- print("Merging with previous fpmmTrades file")
165
- update_fpmmTrades_parquet(trades_filename="missing_fpmmTrades.parquet")
166
-
167
- # adding tools information
168
- add_trades_profitability(trades_filename="missing_fpmmTrades.parquet")
169
-
170
-
171
- if __name__ == "__main__":
172
- only_new_weekly_analysis()
173
- # restoring_trades_data("2024-12-28", "2025-01-07")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/queries.py DELETED
@@ -1,161 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # ------------------------------------------------------------------------------
3
- #
4
- # Copyright 2024 Valory AG
5
- #
6
- # Licensed under the Apache License, Version 2.0 (the "License");
7
- # you may not use this file except in compliance with the License.
8
- # You may obtain a copy of the License at
9
- #
10
- # http://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing, software
13
- # distributed under the License is distributed on an "AS IS" BASIS,
14
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
- # See the License for the specific language governing permissions and
16
- # limitations under the License.
17
- #
18
- # ------------------------------------------------------------------------------
19
-
20
- from string import Template
21
-
22
- FPMMS_FIELD = "fixedProductMarketMakers"
23
- QUERY_FIELD = "query"
24
- ERROR_FIELD = "errors"
25
- DATA_FIELD = "data"
26
- ID_FIELD = "id"
27
- ANSWER_FIELD = "currentAnswer"
28
- QUESTION_FIELD = "question"
29
- OUTCOMES_FIELD = "outcomes"
30
- TITLE_FIELD = "title"
31
- ANSWER_TIMESTAMP_FIELD = "currentAnswerTimestamp"
32
- OPENING_TIMESTAMP_FIELD = "openingTimestamp"
33
- RESOLUTION_TIMESTAMP_FIELD = "resolutionTimestamp"
34
- CREATION_TIMESTAMP_FIELD = "creationTimestamp"
35
- LIQUIDITY_FIELD = "liquidityParameter"
36
- LIQUIDIY_MEASURE_FIELD = "liquidityMeasure"
37
- TOKEN_AMOUNTS_FIELD = "outcomeTokenAmounts"
38
-
39
- FPMMS_QUERY = Template(
40
- """
41
- {
42
- ${fpmms_field}(
43
- where: {
44
- creator: "${creator}",
45
- id_gt: "${fpmm_id}",
46
- isPendingArbitration: false
47
- },
48
- orderBy: ${id_field}
49
- first: ${first}
50
- ){
51
- ${id_field}
52
- ${answer_field}
53
- ${question_field} {
54
- ${outcomes_field}
55
- }
56
- ${title_field}
57
- }
58
- }
59
- """
60
- )
61
-
62
- omen_xdai_trades_query = Template(
63
- """
64
- {
65
- fpmmTrades(
66
- where: {
67
- type: Buy,
68
- fpmm_: {
69
- creator: "${fpmm_creator}"
70
- creationTimestamp_gte: "${fpmm_creationTimestamp_gte}",
71
- creationTimestamp_lt: "${fpmm_creationTimestamp_lte}"
72
- },
73
- creationTimestamp_gte: "${creationTimestamp_gte}",
74
- creationTimestamp_lte: "${creationTimestamp_lte}"
75
- id_gt: "${id_gt}"
76
- }
77
- first: ${first}
78
- orderBy: id
79
- orderDirection: asc
80
- ) {
81
- id
82
- title
83
- collateralToken
84
- outcomeTokenMarginalPrice
85
- oldOutcomeTokenMarginalPrice
86
- type
87
- creator {
88
- id
89
- }
90
- creationTimestamp
91
- collateralAmount
92
- collateralAmountUSD
93
- feeAmount
94
- outcomeIndex
95
- outcomeTokensTraded
96
- transactionHash
97
- fpmm {
98
- id
99
- outcomes
100
- title
101
- answerFinalizedTimestamp
102
- currentAnswer
103
- isPendingArbitration
104
- arbitrationOccurred
105
- openingTimestamp
106
- condition {
107
- id
108
- }
109
- }
110
- }
111
- }
112
- """
113
- )
114
-
115
- conditional_tokens_gc_user_query = Template(
116
- """
117
- {
118
- user(id: "${id}") {
119
- userPositions(
120
- first: ${first}
121
- where: {
122
- id_gt: "${userPositions_id_gt}"
123
- }
124
- orderBy: id
125
- ) {
126
- balance
127
- id
128
- position {
129
- id
130
- conditionIds
131
- }
132
- totalBalance
133
- wrappedBalance
134
- }
135
- }
136
- }
137
- """
138
- )
139
-
140
-
141
- TRADES_QUERY = """
142
- query fpmms_query($fpmm: String, $id_gt: ID) {
143
- fpmmTrades(
144
- where: {fpmm: $fpmm, id_gt: $id_gt, type: Buy}
145
- orderBy: id
146
- orderDirection: asc
147
- first: 1000
148
- ) {
149
- collateralAmount
150
- outcomeIndex
151
- outcomeTokensTraded
152
- id
153
- oldOutcomeTokenMarginalPrice
154
- outcomeTokenMarginalPrice
155
- type
156
- collateralAmountUSD
157
- creationTimestamp
158
- feeAmount
159
- }
160
- }
161
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/staking.py DELETED
@@ -1,302 +0,0 @@
1
- import json
2
- import sys
3
- from typing import Any, List
4
- from utils import RPC, DATA_DIR, TMP_DIR, JSON_DATA_DIR
5
- import requests
6
- from tqdm import tqdm
7
- from web3 import Web3
8
- import pandas as pd
9
- import pickle
10
- import os
11
- from concurrent.futures import ThreadPoolExecutor, as_completed
12
-
13
- NUM_WORKERS = 10
14
- DEPRECATED_STAKING_PROGRAMS = {
15
- "quickstart_alpha_everest": "0x5add592ce0a1B5DceCebB5Dcac086Cd9F9e3eA5C",
16
- "quickstart_alpha_alpine": "0x2Ef503950Be67a98746F484DA0bBAdA339DF3326",
17
- "quickstart_alpha_coastal": "0x43fB32f25dce34EB76c78C7A42C8F40F84BCD237",
18
- }
19
- STAKING_PROGRAMS_QS = {
20
- "quickstart_beta_hobbyist": "0x389B46c259631Acd6a69Bde8B6cEe218230bAE8C",
21
- "quickstart_beta_hobbyist_2": "0x238EB6993b90a978ec6AAD7530d6429c949C08DA",
22
- "quickstart_beta_expert": "0x5344B7DD311e5d3DdDd46A4f71481bD7b05AAA3e",
23
- "quickstart_beta_expert_2": "0xb964e44c126410df341ae04B13aB10A985fE3513",
24
- "quickstart_beta_expert_3": "0x80faD33Cadb5F53f9D29F02Db97D682E8b101618",
25
- "quickstart_beta_expert_4": "0xaD9d891134443B443D7F30013c7e14Fe27F2E029",
26
- "quickstart_beta_expert_5": "0xE56dF1E563De1B10715cB313D514af350D207212",
27
- "quickstart_beta_expert_6": "0x2546214aEE7eEa4bEE7689C81231017CA231Dc93",
28
- "quickstart_beta_expert_7": "0xD7A3C8b975f71030135f1a66e9e23164d54fF455",
29
- "quickstart_beta_expert_8": "0x356C108D49C5eebd21c84c04E9162de41933030c",
30
- "quickstart_beta_expert_9": "0x17dBAe44BC5618Cc254055b386A29576b4F87015",
31
- "quickstart_beta_expert_10": "0xB0ef657b8302bd2c74B6E6D9B2b4b39145b19c6f",
32
- "quickstart_beta_expert_11": "0x3112c1613eAC3dBAE3D4E38CeF023eb9E2C91CF7",
33
- "quickstart_beta_expert_12": "0xF4a75F476801B3fBB2e7093aCDcc3576593Cc1fc",
34
- }
35
-
36
- STAKING_PROGRAMS_PEARL = {
37
- "pearl_alpha": "0xEE9F19b5DF06c7E8Bfc7B28745dcf944C504198A",
38
- "pearl_beta": "0xeF44Fb0842DDeF59D37f85D61A1eF492bbA6135d",
39
- "pearl_beta_2": "0x1c2F82413666d2a3fD8bC337b0268e62dDF67434",
40
- "pearl_beta_3": "0xBd59Ff0522aA773cB6074ce83cD1e4a05A457bc1",
41
- "pearl_beta_4": "0x3052451e1eAee78e62E169AfdF6288F8791F2918",
42
- "pearl_beta_5": "0x4Abe376Fda28c2F43b84884E5f822eA775DeA9F4",
43
- }
44
-
45
-
46
- SERVICE_REGISTRY_ADDRESS = "0x9338b5153AE39BB89f50468E608eD9d764B755fD"
47
-
48
-
49
- def _get_contract(address: str) -> Any:
50
- w3 = Web3(Web3.HTTPProvider(RPC))
51
- abi = _get_abi(address)
52
- contract = w3.eth.contract(address=Web3.to_checksum_address(address), abi=abi)
53
- return contract
54
-
55
-
56
- def _get_abi(address: str) -> List:
57
- contract_abi_url = (
58
- "https://gnosis.blockscout.com/api/v2/smart-contracts/{contract_address}"
59
- )
60
- response = requests.get(contract_abi_url.format(contract_address=address)).json()
61
-
62
- if "result" in response:
63
- result = response["result"]
64
- try:
65
- abi = json.loads(result)
66
- except json.JSONDecodeError:
67
- print("Error: Failed to parse 'result' field as JSON")
68
- sys.exit(1)
69
- else:
70
- abi = response.get("abi")
71
-
72
- return abi if abi else []
73
-
74
-
75
- def get_service_safe(service_id: int) -> str:
76
- """Gets the service Safe"""
77
- service_registry = _get_contract(SERVICE_REGISTRY_ADDRESS)
78
- service_safe_address = service_registry.functions.getService(service_id).call()[1]
79
- return service_safe_address
80
-
81
-
82
- def list_contract_functions(contract):
83
- function_names = []
84
- for item in contract.abi:
85
- if item.get("type") == "function":
86
- function_names.append(item.get("name"))
87
- return function_names
88
-
89
-
90
- def get_service_data(service_registry: Any, service_id: int) -> dict:
91
- tmp_map = {}
92
- # Get the list of addresses
93
- # print(f"getting addresses from service id ={service_id}")
94
-
95
- # available_functions = list_contract_functions(service_registry)
96
- # print("Available Contract Functions:")
97
- # for func in available_functions:
98
- # print(f"- {func}")
99
-
100
- data = service_registry.functions.getService(service_id).call()
101
- try:
102
- owner_data = service_registry.functions.ownerOf(service_id).call()
103
- except Exception as e:
104
- tqdm.write(f"Error: no owner data infor from {service_id}")
105
- return None
106
- # print(f"owner data = {owner_data}")
107
- address = data[1]
108
- state = data[-1]
109
- # print(f"address = {address}")
110
- # print(f"state={state}")
111
- # PEARL trade
112
-
113
- if address != "0x0000000000000000000000000000000000000000":
114
- tmp_map[service_id] = {
115
- "safe_address": address,
116
- "state": state,
117
- "owner_address": owner_data,
118
- }
119
- return tmp_map
120
-
121
-
122
- def update_service_map(start: int = 1, end: int = 2000):
123
- if os.path.exists(DATA_DIR / "service_map.pkl"):
124
- with open(DATA_DIR / "service_map.pkl", "rb") as f:
125
- service_map = pickle.load(f)
126
- else:
127
- service_map = {}
128
- print(f"updating service map from service id={start}")
129
- # we do not know which is the last service id right now
130
- service_registry = _get_contract(SERVICE_REGISTRY_ADDRESS)
131
- with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
132
- futures = []
133
- for service_id in range(start, end):
134
- futures.append(
135
- executor.submit(
136
- get_service_data,
137
- service_registry,
138
- service_id,
139
- )
140
- )
141
-
142
- for future in tqdm(
143
- as_completed(futures),
144
- total=len(futures),
145
- desc=f"Fetching all service data from contracts",
146
- ):
147
- partial_dict = future.result()
148
- if partial_dict:
149
- service_map.update(partial_dict)
150
-
151
- print(f"length of service map {len(service_map)}")
152
- with open(DATA_DIR / "service_map.pkl", "wb") as f:
153
- pickle.dump(service_map, f)
154
-
155
-
156
- def check_owner_staking_contract(owner_address: str) -> str:
157
- staking = "non_staking"
158
- owner_address = owner_address.lower()
159
- # check quickstart staking contracts
160
- qs_list = [x.lower() for x in STAKING_PROGRAMS_QS.values()]
161
- if owner_address in qs_list:
162
- return "quickstart"
163
-
164
- # check pearl staking contracts
165
- pearl_list = [x.lower() for x in STAKING_PROGRAMS_PEARL.values()]
166
- if owner_address in pearl_list:
167
- return "pearl"
168
-
169
- # check legacy staking contracts
170
- deprec_list = [x.lower() for x in DEPRECATED_STAKING_PROGRAMS.values()]
171
- if owner_address in deprec_list:
172
- return "quickstart"
173
-
174
- return staking
175
-
176
-
177
- def get_trader_address_staking(trader_address: str, service_map: dict) -> str:
178
- # check if there is any service id linked with that trader address
179
-
180
- found_key = -1
181
- for key, value in service_map.items():
182
- if value["safe_address"].lower() == trader_address.lower():
183
- # found a service
184
- found_key = key
185
- break
186
-
187
- if found_key == -1:
188
- return "non_Olas"
189
- owner = service_map[found_key]["owner_address"]
190
- return check_owner_staking_contract(owner_address=owner)
191
-
192
-
193
- def label_trades_by_staking(trades_df: pd.DataFrame, start: int = None) -> None:
194
- with open(DATA_DIR / "service_map.pkl", "rb") as f:
195
- service_map = pickle.load(f)
196
- # get the last service id
197
- keys = service_map.keys()
198
- if start is None:
199
- last_key = max(keys)
200
- else:
201
- last_key = start
202
- print(f"last service key = {last_key}")
203
- update_service_map(start=last_key)
204
- all_traders = trades_df.trader_address.unique()
205
- trades_df["staking"] = ""
206
- for trader in tqdm(all_traders, desc="Labeling traders by staking", unit="trader"):
207
- # tqdm.write(f"checking trader {trader}")
208
- staking_label = get_trader_address_staking(trader, service_map)
209
- if staking_label:
210
- trades_df.loc[trades_df["trader_address"] == trader, "staking"] = (
211
- staking_label
212
- )
213
- # tqdm.write(f"statking label {staking_label}")
214
- return trades_df
215
-
216
-
217
- def generate_retention_activity_file():
218
- tools = pd.read_parquet(TMP_DIR / "tools.parquet")
219
- tools["request_time"] = pd.to_datetime(tools["request_time"])
220
- tools["request_date"] = tools["request_time"].dt.date
221
- tools = tools.sort_values(by="request_time", ascending=True)
222
- reduced_tools_df = tools[
223
- ["trader_address", "request_time", "market_creator", "request_date"]
224
- ]
225
- print(f"length of reduced tools before labeling = {len(reduced_tools_df)}")
226
- reduced_tools_df = label_trades_by_staking(trades_df=reduced_tools_df)
227
- print(f"labeling of tools activity. {reduced_tools_df.staking.value_counts()}")
228
- print(f"length of reduced tools after labeling = {len(reduced_tools_df)}")
229
- reduced_tools_df = reduced_tools_df.sort_values(by="request_time", ascending=True)
230
- reduced_tools_df["month_year_week"] = (
231
- pd.to_datetime(tools["request_time"])
232
- .dt.to_period("W")
233
- .dt.start_time.dt.strftime("%b-%d-%Y")
234
- )
235
- reduced_tools_df.to_parquet(TMP_DIR / "retention_activity.parquet")
236
- return True
237
-
238
-
239
- def check_list_addresses(address_list: list):
240
- with open(DATA_DIR / "service_map.pkl", "rb") as f:
241
- service_map = pickle.load(f)
242
- # check if it is part of any service id on the map
243
- mapping = {}
244
- print(f"length of service map={len(service_map)}")
245
- keys = service_map.keys()
246
- last_key = max(keys)
247
-
248
- print(f"last service key = {last_key}")
249
- update_service_map(start=last_key)
250
- found_key = -1
251
- trader_types = []
252
- for trader_address in address_list:
253
- for key, value in service_map.items():
254
- if value["safe_address"].lower() == trader_address.lower():
255
- # found a service
256
- found_key = key
257
- mapping[trader_address] = "Olas"
258
- trader_types.append("Olas")
259
- break
260
-
261
- if found_key == -1:
262
- mapping[trader_address] = "non_Olas"
263
- trader_types.append("non_Olas")
264
- return mapping
265
-
266
-
267
- def check_service_map():
268
- with open(DATA_DIR / "service_map.pkl", "rb") as f:
269
- service_map = pickle.load(f)
270
- # check if it is part of any service id on the map
271
- mapping = {}
272
- print(f"length of service map={len(service_map)}")
273
- keys = service_map.keys()
274
- last_key = max(keys)
275
- print(f"last key ={last_key}")
276
- missing_keys = 0
277
- for i in range(1, last_key):
278
- if i not in keys:
279
- missing_keys += 1
280
- print(f"missing key = {i}")
281
- print(f"total missing keys = {missing_keys}")
282
-
283
-
284
- if __name__ == "__main__":
285
- # create_service_map()
286
- trades_df = pd.read_parquet(JSON_DATA_DIR / "all_trades_df.parquet")
287
- trades_df = trades_df.loc[trades_df["is_invalid"] == False]
288
-
289
- trades_df = label_trades_by_staking(trades_df=trades_df)
290
- print(trades_df.staking.value_counts())
291
- # trades_df.to_parquet(TMP_DIR / "result_staking.parquet", index=False)
292
- # generate_retention_activity_file()
293
- # a_list = [
294
- # "0x027592700fafc4db3221bb662d7bdc7f546a2bb5",
295
- # "0x0845f4ad01a2f41da618848c7a9e56b64377965e",
296
- # ]
297
- # check_list_addresses(address_list=a_list)
298
- # update_service_map()
299
- # check_service_map()
300
- # unknown_traders = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
301
- # unknown_traders = label_trades_by_staking(trades_df=unknown_traders)
302
- # unknown_traders.to_parquet(DATA_DIR / "unknown_traders.parquet", index=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/tools.py DELETED
@@ -1,320 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # ------------------------------------------------------------------------------
3
- #
4
- # Copyright 2023 Valory AG
5
- #
6
- # Licensed under the Apache License, Version 2.0 (the "License");
7
- # you may not use this file except in compliance with the License.
8
- # You may obtain a copy of the License at
9
- #
10
- # http://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing, software
13
- # distributed under the License is distributed on an "AS IS" BASIS,
14
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
- # See the License for the specific language governing permissions and
16
- # limitations under the License.
17
- #
18
- # ------------------------------------------------------------------------------
19
-
20
- import json
21
- from typing import (
22
- Optional,
23
- List,
24
- Dict,
25
- Union,
26
- Any,
27
- )
28
- import pandas as pd
29
- import requests
30
- from datetime import datetime
31
- from gnosis_timestamps import transform_timestamp_to_datetime
32
- from requests.adapters import HTTPAdapter
33
- from tqdm import tqdm
34
- from urllib3 import Retry
35
- from markets import add_market_creator
36
- from concurrent.futures import ThreadPoolExecutor, as_completed
37
- from web3_utils import (
38
- N_IPFS_RETRIES,
39
- )
40
- from utils import (
41
- clean,
42
- BLOCK_FIELD,
43
- limit_text,
44
- DATA_DIR,
45
- JSON_DATA_DIR,
46
- MechEvent,
47
- MechEventName,
48
- MechRequest,
49
- MechResponse,
50
- EVENT_TO_MECH_STRUCT,
51
- REQUEST_ID,
52
- HTTP,
53
- HTTPS,
54
- get_result_values,
55
- get_vote,
56
- get_win_probability,
57
- get_prediction_values,
58
- )
59
-
60
- CONTRACTS_PATH = "contracts"
61
- MECH_TO_INFO = {
62
- # this block number is when the creator had its first tx ever, and after this mech's creation
63
- "0xff82123dfb52ab75c417195c5fdb87630145ae81": ("old_mech_abi.json", 28911547),
64
- # this block number is when this mech was created
65
- "0x77af31de935740567cf4ff1986d04b2c964a786a": ("new_mech_abi.json", 30776879),
66
- }
67
- # optionally set the latest block to stop searching for the delivered events
68
-
69
- EVENT_ARGUMENTS = "args"
70
- DATA = "data"
71
- IPFS_LINKS_SERIES_NAME = "ipfs_links"
72
- BACKOFF_FACTOR = 1
73
- STATUS_FORCELIST = [404, 500, 502, 503, 504]
74
- DEFAULT_FILENAME = "tools.parquet"
75
- ABI_ERROR = "The event signature did not match the provided ABI"
76
- # HTTP_TIMEOUT = 10
77
- # Increasing when ipfs is slow
78
- HTTP_TIMEOUT = 15
79
-
80
- IRRELEVANT_TOOLS = [
81
- "openai-text-davinci-002",
82
- "openai-text-davinci-003",
83
- "openai-gpt-3.5-turbo",
84
- "openai-gpt-4",
85
- "stabilityai-stable-diffusion-v1-5",
86
- "stabilityai-stable-diffusion-xl-beta-v2-2-2",
87
- "stabilityai-stable-diffusion-512-v2-1",
88
- "stabilityai-stable-diffusion-768-v2-1",
89
- "deepmind-optimization-strong",
90
- "deepmind-optimization",
91
- ]
92
- # this is how frequently we will keep a snapshot of the progress so far in terms of blocks' batches
93
- # for example, the value 1 means that for every `BLOCKS_CHUNK_SIZE` blocks that we search,
94
- # we also store the snapshot
95
- SNAPSHOT_RATE = 10
96
- NUM_WORKERS = 10
97
- GET_CONTENTS_BATCH_SIZE = 1000
98
-
99
-
100
- class TimestampedRetry(Retry):
101
- def increment(self, *args, **kwargs):
102
- print(f"Retry attempt at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
103
- return super().increment(*args, **kwargs)
104
-
105
-
106
- def create_session() -> requests.Session:
107
- """Create a session with a retry strategy."""
108
- session = requests.Session()
109
- retry_strategy = TimestampedRetry(
110
- total=N_IPFS_RETRIES,
111
- backoff_factor=BACKOFF_FACTOR,
112
- status_forcelist=STATUS_FORCELIST,
113
- )
114
- adapter = HTTPAdapter(max_retries=retry_strategy)
115
- for protocol in (HTTP, HTTPS):
116
- session.mount(protocol, adapter)
117
-
118
- return session
119
-
120
-
121
- def request(
122
- session: requests.Session, url: str, timeout: int = HTTP_TIMEOUT
123
- ) -> Optional[requests.Response]:
124
- """Perform a request with a session."""
125
- try:
126
- response = session.get(url, timeout=timeout)
127
- response.raise_for_status()
128
- except requests.exceptions.HTTPError as exc:
129
- tqdm.write(f"HTTP error occurred: {exc}.")
130
- except Exception as exc:
131
- tqdm.write(f"Unexpected error occurred: {exc}.")
132
- else:
133
- return response
134
- return None
135
-
136
-
137
- def parse_ipfs_response(
138
- session: requests.Session,
139
- url: str,
140
- event: MechEvent,
141
- event_name: MechEventName,
142
- response: requests.Response,
143
- ) -> Optional[Dict[str, str]]:
144
- """Parse a response from IPFS."""
145
- try:
146
- return response.json()
147
- except requests.exceptions.JSONDecodeError:
148
- # this is a workaround because the `metadata.json` file was introduced and removed multiple times
149
- if event_name == MechEvent.REQUEST and url != event.ipfs_request_link:
150
- url = event.ipfs_request_link
151
- response = request(session, url)
152
- if response is None:
153
- tqdm.write(f"Skipping {event=}.")
154
- return None
155
-
156
- try:
157
- return response.json()
158
- except requests.exceptions.JSONDecodeError:
159
- pass
160
-
161
- tqdm.write(f"Failed to parse response into json for {url=}.")
162
- return None
163
-
164
-
165
- def parse_ipfs_tools_content(
166
- raw_content: Dict[str, str], event: MechEvent, event_name: MechEventName
167
- ) -> Optional[Union[MechRequest, MechResponse]]:
168
- """Parse tools content from IPFS."""
169
- struct = EVENT_TO_MECH_STRUCT.get(event_name)
170
- raw_content[REQUEST_ID] = str(event.requestId)
171
- raw_content[BLOCK_FIELD] = str(event.for_block)
172
- raw_content["sender"] = str(event.sender)
173
-
174
- try:
175
- mech_response = struct(**raw_content)
176
- except (ValueError, TypeError, KeyError):
177
- tqdm.write(f"Could not parse {limit_text(str(raw_content))}")
178
- return None
179
-
180
- if event_name == MechEventName.REQUEST and mech_response.tool in IRRELEVANT_TOOLS:
181
- return None
182
-
183
- return mech_response
184
-
185
-
186
- def parse_json_events(json_events: dict, keys_to_traverse: List[int]) -> pd.DataFrame:
187
- """Function to parse the mech info in a json format"""
188
- all_records = []
189
- for key in keys_to_traverse:
190
- try:
191
- json_input = json_events[key]
192
- output = {}
193
- output["request_id"] = json_input["requestId"]
194
- output["request_block"] = json_input["blockNumber"]
195
- output["request_time"] = transform_timestamp_to_datetime(
196
- int(json_input["blockTimestamp"])
197
- )
198
- output["tx_hash"] = json_input["transactionHash"]
199
- output["prompt_request"] = json_input["ipfsContents"]["prompt"]
200
- output["tool"] = json_input["ipfsContents"]["tool"]
201
- output["nonce"] = json_input["ipfsContents"]["nonce"]
202
- output["trader_address"] = json_input["sender"]
203
- output["deliver_block"] = json_input["deliver"]["blockNumber"]
204
- error_value, error_message, prediction_params = get_result_values(
205
- json_input["deliver"]["ipfsContents"]["result"]
206
- )
207
- error_message_value = json_input.get("error_message", error_message)
208
- output["error"] = error_value
209
- output["error_message"] = error_message_value
210
- output["prompt_response"] = json_input["deliver"]["ipfsContents"]["prompt"]
211
- output["mech_address"] = json_input["deliver"]["sender"]
212
- p_yes_value, p_no_value, confidence_value, info_utility_value = (
213
- get_prediction_values(prediction_params)
214
- )
215
- output["p_yes"] = p_yes_value
216
- output["p_no"] = p_no_value
217
- output["confidence"] = confidence_value
218
- output["info_utility"] = info_utility_value
219
- output["vote"] = get_vote(p_yes_value, p_no_value)
220
- output["win_probability"] = get_win_probability(p_yes_value, p_no_value)
221
- all_records.append(output)
222
- except Exception as e:
223
- print(e)
224
- print(f"Error parsing the key ={key}. Noted as error")
225
- output["error"] = 1
226
- output["error_message"] = "Response parsing error"
227
- output["p_yes"] = None
228
- output["p_no"] = None
229
- output["confidence"] = None
230
- output["info_utility"] = None
231
- output["vote"] = None
232
- output["win_probability"] = None
233
- all_records.append(output)
234
-
235
- return pd.DataFrame.from_dict(all_records, orient="columns")
236
-
237
-
238
- def transform_request(contents: pd.DataFrame) -> pd.DataFrame:
239
- """Transform the requests dataframe."""
240
- return clean(contents)
241
-
242
-
243
- def transform_deliver(contents: pd.DataFrame) -> pd.DataFrame:
244
- """Transform the delivers dataframe."""
245
- unpacked_result = pd.json_normalize(contents.result)
246
- # # drop result column if it exists
247
- if "result" in unpacked_result.columns:
248
- unpacked_result.drop(columns=["result"], inplace=True)
249
-
250
- # drop prompt column if it exists
251
- if "prompt" in unpacked_result.columns:
252
- unpacked_result.drop(columns=["prompt"], inplace=True)
253
-
254
- # rename prompt column to prompt_deliver
255
- unpacked_result.rename(columns={"prompt": "prompt_deliver"}, inplace=True)
256
- contents = pd.concat((contents, unpacked_result), axis=1)
257
-
258
- if "result" in contents.columns:
259
- contents.drop(columns=["result"], inplace=True)
260
-
261
- if "prompt" in contents.columns:
262
- contents.drop(columns=["prompt"], inplace=True)
263
-
264
- return clean(contents)
265
-
266
-
267
- def parse_store_json_events_parallel(json_events: Dict[str, Any], output_filename: str):
268
- total_nr_events = len(json_events)
269
- ids_to_traverse = list(json_events.keys())
270
- print(f"Parsing {total_nr_events} events")
271
- contents = []
272
- with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
273
- futures = []
274
- for i in range(0, total_nr_events, GET_CONTENTS_BATCH_SIZE):
275
- futures.append(
276
- executor.submit(
277
- parse_json_events,
278
- json_events,
279
- ids_to_traverse[i : i + GET_CONTENTS_BATCH_SIZE],
280
- )
281
- )
282
-
283
- for future in tqdm(
284
- as_completed(futures),
285
- total=len(futures),
286
- desc=f"Fetching json contents",
287
- ):
288
- current_mech_contents = future.result()
289
- contents.append(current_mech_contents)
290
-
291
- tools = pd.concat(contents, ignore_index=True)
292
- print(f"Adding market creators info. Length of the tools file = {len(tools)}")
293
- tools = add_market_creator(tools)
294
- print(
295
- f"Length of the tools dataframe after adding market creators info= {len(tools)}"
296
- )
297
- print(tools.info())
298
- try:
299
- if "result" in tools.columns:
300
- tools = tools.drop(columns=["result"])
301
- tools.to_parquet(DATA_DIR / output_filename, index=False)
302
- except Exception as e:
303
- print(f"Failed to write tools data: {e}")
304
-
305
- return tools
306
-
307
-
308
- def generate_tools_file(input_filename: str, output_filename: str):
309
- """Function to parse the json mech events and generate the parquet tools file"""
310
- try:
311
- with open(JSON_DATA_DIR / input_filename, "r") as file:
312
- file_contents = json.load(file)
313
- parse_store_json_events_parallel(file_contents, output_filename)
314
- except Exception as e:
315
- print(f"An Exception happened while parsing the json events {e}")
316
-
317
-
318
- if __name__ == "__main__":
319
-
320
- generate_tools_file()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/tools_metrics.py DELETED
@@ -1,95 +0,0 @@
1
- import pandas as pd
2
- from typing import List
3
- from utils import TMP_DIR, INC_TOOLS, DATA_DIR
4
-
5
-
6
- def get_error_data_by_market(
7
- tools_df: pd.DataFrame, inc_tools: List[str]
8
- ) -> pd.DataFrame:
9
- """Gets the error data for the given tools and calculates the error percentage."""
10
- tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
11
- error = (
12
- tools_inc.groupby(
13
- ["tool", "request_month_year_week", "market_creator", "error"], sort=False
14
- )
15
- .size()
16
- .unstack()
17
- .fillna(0)
18
- .reset_index()
19
- )
20
- error["error_perc"] = (error[1] / (error[0] + error[1])) * 100
21
- error["total_requests"] = error[0] + error[1]
22
- return error
23
-
24
-
25
- def get_tool_winning_rate_by_market(
26
- tools_df: pd.DataFrame, inc_tools: List[str]
27
- ) -> pd.DataFrame:
28
- """Gets the tool winning rate data for the given tools by market and calculates the winning percentage."""
29
- tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
30
- tools_non_error = tools_inc[tools_inc["error"] != 1]
31
- tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace(
32
- {"no": "No", "yes": "Yes"}
33
- )
34
- tools_non_error = tools_non_error[
35
- tools_non_error["currentAnswer"].isin(["Yes", "No"])
36
- ]
37
- tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])]
38
- tools_non_error["win"] = (
39
- tools_non_error["currentAnswer"] == tools_non_error["vote"]
40
- ).astype(int)
41
- tools_non_error.columns = tools_non_error.columns.astype(str)
42
- wins = (
43
- tools_non_error.groupby(
44
- ["tool", "request_month_year_week", "market_creator", "win"], sort=False
45
- )
46
- .size()
47
- .unstack()
48
- .fillna(0)
49
- )
50
- wins["win_perc"] = (wins[1] / (wins[0] + wins[1])) * 100
51
- wins.reset_index(inplace=True)
52
- wins["total_request"] = wins[0] + wins[1]
53
- wins.columns = wins.columns.astype(str)
54
- # Convert request_month_year_week to string and explicitly set type for Altair
55
- # wins["request_month_year_week"] = wins["request_month_year_week"].astype(str)
56
- return wins
57
-
58
-
59
- def prepare_tools(tools: pd.DataFrame) -> pd.DataFrame:
60
- tools["request_time"] = pd.to_datetime(tools["request_time"])
61
- tools = tools.sort_values(by="request_time", ascending=True)
62
-
63
- tools["request_month_year_week"] = (
64
- pd.to_datetime(tools["request_time"])
65
- .dt.to_period("W")
66
- .dt.start_time.dt.strftime("%b-%d-%Y")
67
- )
68
- # preparing the tools graph
69
- # adding the total
70
- tools_all = tools.copy(deep=True)
71
- tools_all["market_creator"] = "all"
72
- # merging both dataframes
73
- tools = pd.concat([tools, tools_all], ignore_index=True)
74
- tools = tools.sort_values(by="request_time", ascending=True)
75
- return tools
76
-
77
-
78
- def compute_tools_based_datasets():
79
- try:
80
- tools_df = pd.read_parquet(TMP_DIR / "tools.parquet")
81
- tools_df = prepare_tools(tools_df)
82
- except Exception as e:
83
- print(f"Error reading old tools parquet file {e}")
84
- return None
85
- # error by markets
86
- error_by_markets = get_error_data_by_market(tools_df=tools_df, inc_tools=INC_TOOLS)
87
- error_by_markets.to_parquet(DATA_DIR / "error_by_markets.parquet", index=False)
88
- try:
89
- tools_df = pd.read_parquet(TMP_DIR / "tools.parquet")
90
- tools_df = prepare_tools(tools_df)
91
- except Exception as e:
92
- print(f"Error reading old tools parquet file {e}")
93
- return None
94
- winning_df = get_tool_winning_rate_by_market(tools_df, inc_tools=INC_TOOLS)
95
- winning_df.to_parquet(DATA_DIR / "winning_df.parquet", index=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/update_tools_accuracy.py DELETED
@@ -1,120 +0,0 @@
1
- import os
2
- import pandas as pd
3
- import ipfshttpclient
4
- from utils import INC_TOOLS
5
- from typing import List
6
- from utils import TMP_DIR, DATA_DIR
7
-
8
- ACCURACY_FILENAME = "tools_accuracy.csv"
9
- OLD_IPFS_SERVER = "/dns/registry.autonolas.tech/tcp/443/https"
10
- IPFS_SERVER = "/dns/registry.gcp.autonolas.tech/tcp/443/https"
11
-
12
-
13
- def update_tools_accuracy(
14
- tools_acc: pd.DataFrame, tools_df: pd.DataFrame, inc_tools: List[str]
15
- ) -> pd.DataFrame:
16
- """To compute/update the latest accuracy information for the different mech tools"""
17
-
18
- # computation of the accuracy information
19
- tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
20
- # filtering errors
21
- tools_non_error = tools_inc[tools_inc["error"] != 1]
22
- tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace(
23
- {"no": "No", "yes": "Yes"}
24
- )
25
- tools_non_error = tools_non_error[
26
- tools_non_error["currentAnswer"].isin(["Yes", "No"])
27
- ]
28
- tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])]
29
- tools_non_error["win"] = (
30
- tools_non_error["currentAnswer"] == tools_non_error["vote"]
31
- ).astype(int)
32
- tools_non_error.columns = tools_non_error.columns.astype(str)
33
-
34
- wins = tools_non_error.groupby(["tool", "win"]).size().unstack().fillna(0)
35
- wins["tool_accuracy"] = (wins[1] / (wins[0] + wins[1])) * 100
36
- wins.reset_index(inplace=True)
37
- wins["total_requests"] = wins[0] + wins[1]
38
- wins.columns = wins.columns.astype(str)
39
- wins = wins[["tool", "tool_accuracy", "total_requests"]]
40
-
41
- no_timeline_info = False
42
- try:
43
- timeline = tools_non_error.groupby(["tool"])["request_time"].agg(["min", "max"])
44
- print("timeline dataset")
45
- print(timeline.head())
46
- acc_info = wins.merge(timeline, how="left", on="tool")
47
- except:
48
- print("NO REQUEST TIME INFORMATION AVAILABLE")
49
- no_timeline_info = True
50
- acc_info = wins
51
-
52
- if tools_acc is None:
53
- print("Creating accuracy file for the first time")
54
- return acc_info
55
-
56
- # update the old information
57
- print("Updating accuracy information")
58
- tools_to_update = list(acc_info["tool"].values)
59
- print("tools to update")
60
- print(tools_to_update)
61
- existing_tools = list(tools_acc["tool"].values)
62
- # dt.strftime("%Y-%m-%d %H:%M:%S")
63
- acc_info["min"] = acc_info["min"].dt.strftime("%Y-%m-%d %H:%M:%S")
64
- acc_info["max"] = acc_info["max"].dt.strftime("%Y-%m-%d %H:%M:%S")
65
- for tool in tools_to_update:
66
- new_accuracy = acc_info[acc_info["tool"] == tool]["tool_accuracy"].values[0]
67
- new_volume = acc_info[acc_info["tool"] == tool]["total_requests"].values[0]
68
- if no_timeline_info:
69
- new_min_timeline = None
70
- new_max_timeline = None
71
- else:
72
- new_min_timeline = acc_info[acc_info["tool"] == tool]["min"].values[0]
73
- new_max_timeline = acc_info[acc_info["tool"] == tool]["max"].values[0]
74
- if tool in existing_tools:
75
-
76
- tools_acc.loc[tools_acc["tool"] == tool, "tool_accuracy"] = new_accuracy
77
- tools_acc.loc[tools_acc["tool"] == tool, "total_requests"] = new_volume
78
- tools_acc.loc[tools_acc["tool"] == tool, "min"] = new_min_timeline
79
- tools_acc.loc[tools_acc["tool"] == tool, "max"] = new_max_timeline
80
- else:
81
- # new tool to add to the file
82
- # tool,tool_accuracy,total_requests,min,max
83
- new_row = {
84
- "tool": tool,
85
- "tool_accuracy": new_accuracy,
86
- "total_requests": new_volume,
87
- "min": new_min_timeline,
88
- "max": new_max_timeline,
89
- }
90
- tools_acc = pd.concat([tools_acc, pd.DataFrame(new_row)], ignore_index=True)
91
-
92
- print(tools_acc)
93
- return tools_acc
94
-
95
-
96
- def compute_tools_accuracy():
97
- print("Computing accuracy of tools")
98
- print("Reading tools parquet file")
99
- tools = pd.read_parquet(TMP_DIR / "tools.parquet")
100
- # Computing tools accuracy information
101
- print("Computing tool accuracy information")
102
- # Check if the file exists
103
- acc_data = None
104
- if os.path.exists(DATA_DIR / ACCURACY_FILENAME):
105
- acc_data = pd.read_csv(DATA_DIR / ACCURACY_FILENAME)
106
- acc_data = update_tools_accuracy(acc_data, tools, INC_TOOLS)
107
-
108
- # save acc_data into a CSV file
109
- print("Saving into a csv file")
110
- acc_data.to_csv(DATA_DIR / ACCURACY_FILENAME, index=False)
111
- print(acc_data.head())
112
-
113
- # save the data into IPFS
114
- client = ipfshttpclient.connect(IPFS_SERVER)
115
- result = client.add(DATA_DIR / ACCURACY_FILENAME)
116
- print(f"HASH of the tools accuracy file: {result['Hash']}")
117
-
118
-
119
- if __name__ == "__main__":
120
- compute_tools_accuracy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/utils.py DELETED
@@ -1,431 +0,0 @@
1
- import json
2
- import os
3
- import time
4
- from typing import List, Any, Optional, Union, Tuple
5
- import numpy as np
6
- import pandas as pd
7
- import gc
8
- import re
9
- from dataclasses import dataclass
10
- from datetime import datetime, timezone
11
- from pathlib import Path
12
- from enum import Enum
13
- from string import Template
14
- from json.decoder import JSONDecodeError
15
-
16
- DEFAULT_MECH_FEE = 0.01
17
- REDUCE_FACTOR = 0.25
18
- SLEEP = 0.5
19
- REQUEST_ID_FIELD = "request_id"
20
- SCRIPTS_DIR = Path(__file__).parent
21
- ROOT_DIR = SCRIPTS_DIR.parent
22
- DATA_DIR = ROOT_DIR / "data"
23
- JSON_DATA_DIR = ROOT_DIR / "json_data"
24
- HIST_DIR = ROOT_DIR / "historical_data"
25
- TMP_DIR = ROOT_DIR / "tmp"
26
- BLOCK_FIELD = "block"
27
- CID_PREFIX = "f01701220"
28
- REQUEST_ID = "requestId"
29
- REQUEST_SENDER = "sender"
30
- PROMPT_FIELD = "prompt"
31
- HTTP = "http://"
32
- HTTPS = HTTP[:4] + "s" + HTTP[4:]
33
- FORMAT_UPDATE_BLOCK_NUMBER = 30411638
34
- INVALID_ANSWER_HEX = (
35
- "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
36
- )
37
- OLD_IPFS_ADDRESS = "https://gateway.autonolas.tech/ipfs/"
38
- IPFS_ADDRESS = "https://gateway.gcp.autonolas.tech/ipfs/"
39
-
40
- INC_TOOLS = [
41
- "prediction-online",
42
- "prediction-offline",
43
- "claude-prediction-online",
44
- "claude-prediction-offline",
45
- "prediction-offline-sme",
46
- "prediction-online-sme",
47
- "prediction-request-rag",
48
- "prediction-request-reasoning",
49
- "prediction-url-cot-claude",
50
- "prediction-request-rag-claude",
51
- "prediction-request-reasoning-claude",
52
- "superforcaster",
53
- ]
54
- SUBGRAPH_URL = Template(
55
- """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/7s9rGBffUTL8kDZuxvvpuc46v44iuDarbrADBFw5uVp2"""
56
- )
57
- OMEN_SUBGRAPH_URL = Template(
58
- """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/9fUVQpFwzpdWS9bq5WkAnmKbNNcoBwatMR4yZq81pbbz"""
59
- )
60
- NETWORK_SUBGRAPH_URL = Template(
61
- """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/FxV6YUix58SpYmLBwc9gEHkwjfkqwe1X5FJQjn8nKPyA"""
62
- )
63
- # THEGRAPH_ENDPOINT = (
64
- # "https://api.studio.thegraph.com/query/78829/mech-predict/version/latest"
65
- # )
66
- MECH_SUBGRAPH_URL = Template(
67
- """https://gateway.thegraph.com/api/${subgraph_api_key}/subgraphs/id/4YGoX3iXUni1NBhWJS5xyKcntrAzssfytJK7PQxxQk5g"""
68
- )
69
-
70
- SUBGRAPH_API_KEY = os.environ.get("SUBGRAPH_API_KEY", None)
71
- RPC = os.environ.get("RPC", None)
72
-
73
-
74
- class MechEventName(Enum):
75
- """The mech's event names."""
76
-
77
- REQUEST = "Request"
78
- DELIVER = "Deliver"
79
-
80
-
81
- @dataclass
82
- class MechEvent:
83
- """A mech's on-chain event representation."""
84
-
85
- for_block: int
86
- requestId: int
87
- data: bytes
88
- sender: str
89
-
90
- def _ipfs_link(self) -> Optional[str]:
91
- """Get the ipfs link for the data."""
92
- return f"{IPFS_ADDRESS}{CID_PREFIX}{self.data.hex()}"
93
-
94
- @property
95
- def ipfs_request_link(self) -> Optional[str]:
96
- """Get the IPFS link for the request."""
97
- return f"{self._ipfs_link()}/metadata.json"
98
-
99
- @property
100
- def ipfs_deliver_link(self) -> Optional[str]:
101
- """Get the IPFS link for the deliver."""
102
- if self.requestId is None:
103
- return None
104
- return f"{self._ipfs_link()}/{self.requestId}"
105
-
106
- def ipfs_link(self, event_name: MechEventName) -> Optional[str]:
107
- """Get the ipfs link based on the event."""
108
- if event_name == MechEventName.REQUEST:
109
- if self.for_block < FORMAT_UPDATE_BLOCK_NUMBER:
110
- return self._ipfs_link()
111
- return self.ipfs_request_link
112
- if event_name == MechEventName.DELIVER:
113
- return self.ipfs_deliver_link
114
- return None
115
-
116
-
117
- @dataclass(init=False)
118
- class MechRequest:
119
- """A structure for a request to a mech."""
120
-
121
- request_id: Optional[int]
122
- request_block: Optional[int]
123
- prompt_request: Optional[str]
124
- tool: Optional[str]
125
- nonce: Optional[str]
126
- trader_address: Optional[str]
127
-
128
- def __init__(self, **kwargs: Any) -> None:
129
- """Initialize the request ignoring extra keys."""
130
- self.request_id = int(kwargs.pop(REQUEST_ID, 0))
131
- self.request_block = int(kwargs.pop(BLOCK_FIELD, 0))
132
- self.prompt_request = kwargs.pop(PROMPT_FIELD, None)
133
- self.tool = kwargs.pop("tool", None)
134
- self.nonce = kwargs.pop("nonce", None)
135
- self.trader_address = kwargs.pop("sender", None)
136
-
137
-
138
- @dataclass(init=False)
139
- class PredictionResponse:
140
- """A response of a prediction."""
141
-
142
- p_yes: float
143
- p_no: float
144
- confidence: float
145
- info_utility: float
146
- vote: Optional[str]
147
- win_probability: Optional[float]
148
-
149
- def __init__(self, **kwargs: Any) -> None:
150
- """Initialize the mech's prediction ignoring extra keys."""
151
- try:
152
- self.p_yes = float(kwargs.pop("p_yes"))
153
- self.p_no = float(kwargs.pop("p_no"))
154
- self.confidence = float(kwargs.pop("confidence"))
155
- self.info_utility = float(kwargs.pop("info_utility"))
156
- self.win_probability = 0
157
-
158
- # Validate probabilities
159
- probabilities = {
160
- "p_yes": self.p_yes,
161
- "p_no": self.p_no,
162
- "confidence": self.confidence,
163
- "info_utility": self.info_utility,
164
- }
165
-
166
- for name, prob in probabilities.items():
167
- if not 0 <= prob <= 1:
168
- raise ValueError(f"{name} probability is out of bounds: {prob}")
169
-
170
- if self.p_yes + self.p_no != 1:
171
- raise ValueError(
172
- f"Sum of p_yes and p_no is not 1: {self.p_yes} + {self.p_no}"
173
- )
174
-
175
- self.vote = self.get_vote()
176
- self.win_probability = self.get_win_probability()
177
-
178
- except KeyError as e:
179
- raise KeyError(f"Missing key in PredictionResponse: {e}")
180
- except ValueError as e:
181
- raise ValueError(f"Invalid value in PredictionResponse: {e}")
182
-
183
- def get_vote(self) -> Optional[str]:
184
- """Return the vote."""
185
- if self.p_no == self.p_yes:
186
- return None
187
- if self.p_no > self.p_yes:
188
- return "No"
189
- return "Yes"
190
-
191
- def get_win_probability(self) -> Optional[float]:
192
- """Return the probability estimation for winning with vote."""
193
- return max(self.p_no, self.p_yes)
194
-
195
-
196
- @dataclass(init=False)
197
- class MechResponse:
198
- """A structure for the response of a mech."""
199
-
200
- request_id: int
201
- deliver_block: Optional[int]
202
- result: Optional[PredictionResponse]
203
- error: Optional[str]
204
- error_message: Optional[str]
205
- prompt_response: Optional[str]
206
- mech_address: Optional[str]
207
-
208
- def __init__(self, **kwargs: Any) -> None:
209
- """Initialize the mech's response ignoring extra keys."""
210
- self.error = kwargs.get("error", None)
211
- self.request_id = int(kwargs.get(REQUEST_ID, 0))
212
- self.deliver_block = int(kwargs.get(BLOCK_FIELD, 0))
213
- self.result = kwargs.get("result", None)
214
- self.prompt_response = kwargs.get(PROMPT_FIELD, None)
215
- self.mech_address = kwargs.get("sender", None)
216
-
217
- if self.result != "Invalid response":
218
- self.error_message = kwargs.get("error_message", None)
219
-
220
- try:
221
- if isinstance(self.result, str):
222
- kwargs = json.loads(self.result)
223
- self.result = PredictionResponse(**kwargs)
224
- self.error = 0
225
-
226
- except JSONDecodeError:
227
- self.error_message = "Response parsing error"
228
- self.error = 1
229
-
230
- except Exception as e:
231
- self.error_message = str(e)
232
- self.error = 1
233
-
234
- else:
235
- self.error_message = "Invalid response from tool"
236
- self.error = 1
237
- self.result = None
238
-
239
-
240
- EVENT_TO_MECH_STRUCT = {
241
- MechEventName.REQUEST: MechRequest,
242
- MechEventName.DELIVER: MechResponse,
243
- }
244
-
245
-
246
- def transform_to_datetime(x):
247
- return datetime.fromtimestamp(int(x), tz=timezone.utc)
248
-
249
-
250
- def measure_execution_time(func):
251
- def wrapper(*args, **kwargs):
252
- start_time = time.time()
253
- result = func(*args, **kwargs)
254
- end_time = time.time()
255
- execution_time = end_time - start_time
256
- print(f"Execution time: {execution_time:.6f} seconds")
257
- return result
258
-
259
- return wrapper
260
-
261
-
262
- def limit_text(text: str, limit: int = 200) -> str:
263
- """Limit the given text"""
264
- if len(text) > limit:
265
- return f"{text[:limit]}..."
266
- return text
267
-
268
-
269
- def check_for_dicts(df: pd.DataFrame) -> List[str]:
270
- """Check for columns that contain dictionaries."""
271
- dict_columns = []
272
- for column in df.columns:
273
- if df[column].apply(lambda x: isinstance(x, dict)).any():
274
- dict_columns.append(column)
275
- return dict_columns
276
-
277
-
278
- def drop_dict_rows(df: pd.DataFrame, dict_columns: List[str]) -> pd.DataFrame:
279
- """Drop rows that contain dictionaries."""
280
- for column in dict_columns:
281
- df = df[~df[column].apply(lambda x: isinstance(x, dict))]
282
- return df
283
-
284
-
285
- def clean(df: pd.DataFrame) -> pd.DataFrame:
286
- """Clean the dataframe."""
287
- dict_columns = check_for_dicts(df)
288
- df = drop_dict_rows(df, dict_columns)
289
- cleaned = df.drop_duplicates()
290
- cleaned[REQUEST_ID_FIELD] = cleaned[REQUEST_ID_FIELD].astype("str")
291
- return cleaned
292
-
293
-
294
- def gen_event_filename(event_name: MechEventName) -> str:
295
- """Generate the filename of an event."""
296
- return f"{event_name.value.lower()}s.parquet"
297
-
298
-
299
- def read_n_last_lines(filename: str, n: int = 1) -> str:
300
- """Return the `n` last lines' content of a file."""
301
- num_newlines = 0
302
- with open(filename, "rb") as f:
303
- try:
304
- f.seek(-2, os.SEEK_END)
305
- while num_newlines < n:
306
- f.seek(-2, os.SEEK_CUR)
307
- if f.read(1) == b"\n":
308
- num_newlines += 1
309
- except OSError:
310
- f.seek(0)
311
- last_line = f.readline().decode()
312
- return last_line
313
-
314
-
315
- def get_question(text: str) -> str:
316
- """Get the question from a text."""
317
- # Regex to find text within double quotes
318
- pattern = r'"([^"]*)"'
319
-
320
- # Find all occurrences
321
- questions = re.findall(pattern, text)
322
-
323
- # Assuming you want the first question if there are multiple
324
- question = questions[0] if questions else None
325
-
326
- return question
327
-
328
-
329
- def current_answer(text: str, fpmms: pd.DataFrame) -> Optional[str]:
330
- """Get the current answer for a question."""
331
- row = fpmms[fpmms["title"] == text]
332
- if row.shape[0] == 0:
333
- return None
334
- return row["currentAnswer"].values[0]
335
-
336
-
337
- def convert_hex_to_int(x: Union[str, float]) -> Union[int, float]:
338
- """Convert hex to int"""
339
- if isinstance(x, float):
340
- return np.nan
341
- if isinstance(x, str):
342
- if x == INVALID_ANSWER_HEX:
343
- return -1
344
- return int(x, 16)
345
-
346
-
347
- def wei_to_unit(wei: int) -> float:
348
- """Converts wei to currency unit."""
349
- return wei / 10**18
350
-
351
-
352
- def get_vote(p_yes, p_no) -> Optional[str]:
353
- """Return the vote."""
354
- if p_no == p_yes:
355
- return None
356
- if p_no > p_yes:
357
- return "No"
358
- return "Yes"
359
-
360
-
361
- def get_win_probability(p_yes, p_no) -> Optional[float]:
362
- """Return the probability estimation for winning with vote."""
363
- return max(p_no, p_yes)
364
-
365
-
366
- def get_result_values(result: str) -> Tuple:
367
- if result == "Invalid response":
368
- return 1, "Invalid response from tool", None
369
- error_message = None
370
- params = None
371
- try:
372
- if isinstance(result, str):
373
- params = json.loads(result)
374
- error_value = 0
375
-
376
- except JSONDecodeError:
377
- error_message = "Response parsing error"
378
- error_value = 1
379
-
380
- except Exception as e:
381
- error_message = str(e)
382
- error_value = 1
383
- return error_value, error_message, params
384
-
385
-
386
- def get_prediction_values(params: dict) -> Tuple:
387
- p_yes = float(params.pop("p_yes"))
388
- p_no = float(params.pop("p_no"))
389
- confidence = float(params.pop("confidence"))
390
- info_utility = float(params.pop("info_utility"))
391
- return p_yes, p_no, confidence, info_utility
392
-
393
-
394
- def to_content(q: str) -> dict[str, Any]:
395
- """Convert the given query string to payload content, i.e., add it under a `queries` key and convert it to bytes."""
396
- finalized_query = {
397
- "query": q,
398
- "variables": None,
399
- "extensions": {"headers": None},
400
- }
401
- return finalized_query
402
-
403
-
404
- def read_parquet_files(tools_filename: str, trades_filename: str):
405
- # Check if tools.parquet is in the same directory
406
- try:
407
- tools = pd.read_parquet(DATA_DIR / tools_filename)
408
-
409
- # make sure creator_address is in the columns
410
- assert "trader_address" in tools.columns, "trader_address column not found"
411
-
412
- # lowercase and strip creator_address
413
- tools["trader_address"] = tools["trader_address"].str.lower().str.strip()
414
-
415
- # drop duplicates
416
- tools.drop_duplicates(inplace=True)
417
-
418
- print(f"{tools_filename} loaded")
419
- except FileNotFoundError:
420
- print("tools.parquet not found. Please run tools.py first.")
421
- return
422
- try:
423
- fpmmTrades = pd.read_parquet(DATA_DIR / trades_filename)
424
- fpmmTrades["trader_address"] = (
425
- fpmmTrades["trader_address"].str.lower().str.strip()
426
- )
427
- except FileNotFoundError:
428
- print("fpmmsTrades.parquet not found.")
429
- return
430
-
431
- return tools, fpmmTrades
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/web3_utils.py DELETED
@@ -1,276 +0,0 @@
1
- import sys
2
- import pickle
3
- import gc
4
- import time
5
- import requests
6
- from functools import partial
7
- from string import Template
8
- from datetime import datetime
9
- from concurrent.futures import ThreadPoolExecutor
10
- from collections import defaultdict
11
- from tqdm import tqdm
12
- from web3 import Web3
13
- from typing import Any, Optional
14
- from web3.types import BlockParams
15
- from utils import (
16
- JSON_DATA_DIR,
17
- DATA_DIR,
18
- SUBGRAPH_API_KEY,
19
- to_content,
20
- SUBGRAPH_URL,
21
- HIST_DIR,
22
- TMP_DIR,
23
- )
24
- from queries import conditional_tokens_gc_user_query, omen_xdai_trades_query
25
- import pandas as pd
26
-
27
- REDUCE_FACTOR = 0.25
28
- SLEEP = 0.5
29
- QUERY_BATCH_SIZE = 1000
30
- FPMM_QS_CREATOR = "0x89c5cc945dd550bcffb72fe42bff002429f46fec"
31
- FPMM_PEARL_CREATOR = "0xFfc8029154ECD55ABED15BD428bA596E7D23f557"
32
- LATEST_BLOCK: Optional[int] = None
33
- LATEST_BLOCK_NAME: BlockParams = "latest"
34
- BLOCK_DATA_NUMBER = "number"
35
- BLOCKS_CHUNK_SIZE = 10_000
36
- N_IPFS_RETRIES = 4
37
- N_RPC_RETRIES = 100
38
- RPC_POLL_INTERVAL = 0.05
39
- SUBGRAPH_POLL_INTERVAL = 0.05
40
- IPFS_POLL_INTERVAL = 0.2 # 5 calls per second
41
- OMEN_SUBGRAPH_URL = Template(
42
- """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/9fUVQpFwzpdWS9bq5WkAnmKbNNcoBwatMR4yZq81pbbz"""
43
- )
44
-
45
- headers = {
46
- "Accept": "application/json, multipart/mixed",
47
- "Content-Type": "application/json",
48
- }
49
-
50
-
51
- def parse_args() -> str:
52
- """Parse the arguments and return the RPC."""
53
- if len(sys.argv) != 2:
54
- raise ValueError("Expected the RPC as a positional argument.")
55
- return sys.argv[1]
56
-
57
-
58
- def read_abi(abi_path: str) -> str:
59
- """Read and return the wxDAI contract's ABI."""
60
- with open(abi_path) as abi_file:
61
- return abi_file.read()
62
-
63
-
64
- def update_block_request_map(block_request_id_map: dict) -> None:
65
- print("Saving block request id map info")
66
- with open(JSON_DATA_DIR / "block_request_id_map.pickle", "wb") as handle:
67
- pickle.dump(block_request_id_map, handle, protocol=pickle.HIGHEST_PROTOCOL)
68
-
69
-
70
- def reduce_window(contract_instance, event, from_block, batch_size, latest_block):
71
- """Dynamically reduce the batch size window."""
72
- keep_fraction = 1 - REDUCE_FACTOR
73
- events_filter = contract_instance.events[event].build_filter()
74
- events_filter.fromBlock = from_block
75
- batch_size = int(batch_size * keep_fraction)
76
- events_filter.toBlock = min(from_block + batch_size, latest_block)
77
- tqdm.write(f"RPC timed out! Resizing batch size to {batch_size}.")
78
- time.sleep(SLEEP)
79
- return events_filter, batch_size
80
-
81
-
82
- def block_number_to_timestamp(block_number: int, web3: Web3) -> str:
83
- """Convert a block number to a timestamp."""
84
- block = web3.eth.get_block(block_number)
85
- timestamp = datetime.utcfromtimestamp(block["timestamp"])
86
- try:
87
- timestamp_str = timestamp.strftime("%Y-%m-%d %H:%M:%S")
88
- timestamp = datetime.strptime(timestamp_str, "%Y-%m-%dT%H:%M:%S.%f")
89
- except Exception as e:
90
- timestamp = datetime.utcfromtimestamp(block["timestamp"])
91
- return timestamp.strftime("%Y-%m-%d %H:%M:%S")
92
-
93
-
94
- def parallelize_timestamp_conversion(df: pd.DataFrame, function: callable) -> list:
95
- """Parallelize the timestamp conversion."""
96
- block_numbers = df["request_block"].tolist()
97
- with ThreadPoolExecutor(max_workers=10) as executor:
98
- results = list(
99
- tqdm(executor.map(function, block_numbers), total=len(block_numbers))
100
- )
101
- return results
102
-
103
-
104
- def updating_timestamps(rpc: str, tools_filename: str):
105
- web3 = Web3(Web3.HTTPProvider(rpc))
106
-
107
- tools = pd.read_parquet(TMP_DIR / tools_filename)
108
-
109
- # Convert block number to timestamp
110
- print("Converting block number to timestamp")
111
- t_map = pickle.load(open(TMP_DIR / "t_map.pkl", "rb"))
112
- tools["request_time"] = tools["request_block"].map(t_map)
113
-
114
- no_data = tools["request_time"].isna().sum()
115
- print(f"Total rows with no request time info = {no_data}")
116
-
117
- # Identify tools with missing request_time and fill them
118
- missing_time_indices = tools[tools["request_time"].isna()].index
119
- if not missing_time_indices.empty:
120
- partial_block_number_to_timestamp = partial(
121
- block_number_to_timestamp, web3=web3
122
- )
123
- missing_timestamps = parallelize_timestamp_conversion(
124
- tools.loc[missing_time_indices], partial_block_number_to_timestamp
125
- )
126
-
127
- # Update the original DataFrame with the missing timestamps
128
- for i, timestamp in zip(missing_time_indices, missing_timestamps):
129
- tools.at[i, "request_time"] = timestamp
130
-
131
- tools["request_month_year"] = pd.to_datetime(tools["request_time"]).dt.strftime(
132
- "%Y-%m"
133
- )
134
- tools["request_month_year_week"] = (
135
- pd.to_datetime(tools["request_time"])
136
- .dt.to_period("W")
137
- .dt.start_time.dt.strftime("%b-%d-%Y")
138
- )
139
-
140
- # Save the tools data after the updates on the content
141
- print(f"Updating file {tools_filename} with timestamps")
142
- tools.to_parquet(TMP_DIR / tools_filename, index=False)
143
-
144
- # Update t_map with new timestamps
145
- new_timestamps = (
146
- tools[["request_block", "request_time"]]
147
- .dropna()
148
- .set_index("request_block")
149
- .to_dict()["request_time"]
150
- )
151
- t_map.update(new_timestamps)
152
-
153
- # filtering old timestamps
154
- cutoff_date = datetime(2024, 9, 9)
155
- filtered_map = {
156
- k: v
157
- for k, v in t_map.items()
158
- if datetime.strptime(v, "%Y-%m-%d %H:%M:%S") < cutoff_date
159
- }
160
-
161
- with open(DATA_DIR / "t_map.pkl", "wb") as f:
162
- pickle.dump(filtered_map, f)
163
-
164
- # clean and release all memory
165
- del tools
166
- del t_map
167
- gc.collect()
168
-
169
-
170
- def query_conditional_tokens_gc_subgraph(creator: str) -> dict[str, Any]:
171
- """Query the subgraph."""
172
-
173
- subgraph = SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
174
- all_results: dict[str, Any] = {"data": {"user": {"userPositions": []}}}
175
- userPositions_id_gt = ""
176
- while True:
177
- query = conditional_tokens_gc_user_query.substitute(
178
- id=creator.lower(),
179
- first=QUERY_BATCH_SIZE,
180
- userPositions_id_gt=userPositions_id_gt,
181
- )
182
- content_json = {"query": query}
183
- # print("sending query to subgraph")
184
- res = requests.post(subgraph, headers=headers, json=content_json)
185
- result_json = res.json()
186
- # print(f"result = {result_json}")
187
- user_data = result_json.get("data", {}).get("user", {})
188
-
189
- if not user_data:
190
- break
191
-
192
- user_positions = user_data.get("userPositions", [])
193
-
194
- if user_positions:
195
- all_results["data"]["user"]["userPositions"].extend(user_positions)
196
- userPositions_id_gt = user_positions[len(user_positions) - 1]["id"]
197
- else:
198
- break
199
-
200
- if len(all_results["data"]["user"]["userPositions"]) == 0:
201
- return {"data": {"user": None}}
202
-
203
- return all_results
204
-
205
-
206
- def query_omen_xdai_subgraph(
207
- trader_category: str,
208
- from_timestamp: float,
209
- to_timestamp: float,
210
- fpmm_from_timestamp: float,
211
- fpmm_to_timestamp: float,
212
- ) -> dict[str, Any]:
213
- """Query the subgraph."""
214
-
215
- omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
216
- print(f"omen_subgraph = {omen_subgraph}")
217
- grouped_results = defaultdict(list)
218
- id_gt = ""
219
- if trader_category == "quickstart":
220
- creator_id = FPMM_QS_CREATOR.lower()
221
- else: # pearl
222
- creator_id = FPMM_PEARL_CREATOR.lower()
223
-
224
- while True:
225
- query = omen_xdai_trades_query.substitute(
226
- fpmm_creator=creator_id,
227
- creationTimestamp_gte=int(from_timestamp),
228
- creationTimestamp_lte=int(to_timestamp),
229
- fpmm_creationTimestamp_gte=int(fpmm_from_timestamp),
230
- fpmm_creationTimestamp_lte=int(fpmm_to_timestamp),
231
- first=QUERY_BATCH_SIZE,
232
- id_gt=id_gt,
233
- )
234
- print(f"omen query={query}")
235
- content_json = to_content(query)
236
-
237
- res = requests.post(omen_subgraph, headers=headers, json=content_json)
238
- result_json = res.json()
239
- # print(f"result = {result_json}")
240
- user_trades = result_json.get("data", {}).get("fpmmTrades", [])
241
-
242
- if not user_trades:
243
- break
244
-
245
- for trade in user_trades:
246
- fpmm_id = trade.get("fpmm", {}).get("id")
247
- grouped_results[fpmm_id].append(trade)
248
-
249
- id_gt = user_trades[len(user_trades) - 1]["id"]
250
-
251
- all_results = {
252
- "data": {
253
- "fpmmTrades": [
254
- trade
255
- for trades_list in grouped_results.values()
256
- for trade in trades_list
257
- ]
258
- }
259
- }
260
-
261
- return all_results
262
-
263
-
264
- # def get_earliest_block(event_name: MechEventName) -> int:
265
- # """Get the earliest block number to use when filtering for events."""
266
- # filename = gen_event_filename(event_name)
267
- # if not os.path.exists(DATA_DIR / filename):
268
- # return 0
269
-
270
- # df = pd.read_parquet(DATA_DIR / filename)
271
- # block_field = f"{event_name.value.lower()}_{BLOCK_FIELD}"
272
- # earliest_block = int(df[block_field].max())
273
- # # clean and release all memory
274
- # del df
275
- # gc.collect()
276
- # return earliest_block