DamonDemon commited on
Commit
6f5b41f
·
1 Parent(s): bd93788

refine data

Browse files
Files changed (3) hide show
  1. app.py +31 -495
  2. assets/gtbench_results.csv +0 -23
  3. assets/uc_result.csv +6 -0
app.py CHANGED
@@ -1,501 +1,37 @@
 
1
  import gradio as gr
2
  import pandas as pd
3
- from src.display.about import (
4
- CITATION_BUTTON_LABEL,
5
- CITATION_BUTTON_TEXT,
6
- EVALUATION_QUEUE_TEXT,
7
- INTRODUCTION_TEXT,
8
- LLM_BENCHMARKS_TEXT,
9
- FAQ_TEXT,
10
- TITLE,
11
- )
12
- from src.display.css_html_js import custom_css
13
- from src.display.utils import (
14
- BENCHMARK_COLS,
15
- COLS,
16
- EVAL_COLS,
17
- EVAL_TYPES,
18
- NUMERIC_INTERVALS,
19
- TYPES,
20
- AutoEvalColumn,
21
- ModelType,
22
- fields,
23
- WeightType,
24
- Precision
25
- )
26
- from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO
27
- from PIL import Image
28
- # from src.populate import get_evaluation_queue_df, get_leaderboard_df
29
- # from src.submission.submit import add_new_eval
30
- # from src.tools.collections import update_collections
31
- # from src.tools.plots import (
32
- # create_metric_plot_obj,
33
- # create_plot_df,
34
- # create_scores_df,
35
- # )
36
- from dummydatagen import dummy_data_for_plot, create_metric_plot_obj_1, dummydf
37
- import copy
38
-
39
-
40
- def restart_space():
41
- API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
42
-
43
-
44
- def add_average_col(df):
45
-
46
- always_here_cols = [
47
- "Model", "Agent", "Opponent Model", "Opponent Agent"
48
- ]
49
- desired_col = [i for i in list(df.columns) if i not in always_here_cols]
50
- newdf = df[desired_col].mean(axis=1).round(3)
51
- return newdf
52
-
53
-
54
- gtbench_raw_data = dummydf()
55
- gtbench_raw_data["Average"] = add_average_col(gtbench_raw_data)
56
-
57
- column_to_move = "Average"
58
- # Move the column to the desired index
59
- gtbench_raw_data.insert(
60
- 4, column_to_move, gtbench_raw_data.pop(column_to_move))
61
-
62
- models = list(set(gtbench_raw_data['Model']))
63
-
64
- opponent_models = list(set(gtbench_raw_data['Opponent Model']))
65
-
66
-
67
- agents = list(set(gtbench_raw_data['Agent']))
68
-
69
-
70
- opponent_agents = list(set(gtbench_raw_data['Opponent Agent']))
71
-
72
- # Searching and filtering
73
-
74
-
75
- def update_table(
76
- hidden_df: pd.DataFrame,
77
- columns: list,
78
- model1: list,
79
- model2: list,
80
- agent1: list,
81
- agent2: list
82
- ):
83
-
84
- filtered_df = select_columns(hidden_df, columns)
85
-
86
- filtered_df = filter_model1(filtered_df, model1)
87
- filtered_df = filter_model2(filtered_df, model2)
88
- filtered_df = filter_agent1(filtered_df, agent1)
89
- filtered_df = filter_agent2(filtered_df, agent2)
90
-
91
- return filtered_df
92
-
93
- # triggered only once at startup => read query parameter if it exists
94
-
95
-
96
- def load_query(request: gr.Request):
97
- query = request.query_params.get("query") or ""
98
- return query, query # return one for the "search_bar", one for a hidden component that triggers a reload only if value has changed
99
-
100
-
101
- def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
102
- return df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))]
103
-
104
-
105
- def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
106
- always_here_cols = [
107
- "Model", "Agent", "Opponent Model", "Opponent Agent"
108
- ]
109
- # We use COLS to maintain sorting
110
- all_columns = games
111
-
112
- if len(columns) == 0:
113
- filtered_df = df[
114
- always_here_cols +
115
- [c for c in all_columns if c in df.columns]
116
- ]
117
- filtered_df["Average"] = add_average_col(filtered_df)
118
- column_to_move = "Average"
119
- current_index = filtered_df.columns.get_loc(column_to_move)
120
-
121
- # Move the column to the desired index
122
- filtered_df.insert(4, column_to_move, filtered_df.pop(column_to_move))
123
- return filtered_df
124
-
125
- filtered_df = df[
126
- always_here_cols +
127
- [c for c in all_columns if c in df.columns and c in columns]
128
- ]
129
- if "Average" in columns:
130
- filtered_df["Average"] = add_average_col(filtered_df)
131
- # Get the current index of the column
132
- column_to_move = "Average"
133
- current_index = filtered_df.columns.get_loc(column_to_move)
134
-
135
- # Move the column to the desired index
136
- filtered_df.insert(4, column_to_move, filtered_df.pop(column_to_move))
137
- else:
138
- if "Average" in filtered_df.columns:
139
- # Remove the column
140
- filtered_df = filtered_df.drop(columns=["Average"])
141
-
142
- return filtered_df
143
-
144
-
145
- def filter_model1(
146
- df: pd.DataFrame, model_query: list
147
- ) -> pd.DataFrame:
148
- # Show all models
149
- if len(model_query) == 0:
150
- return df
151
- filtered_df = df
152
-
153
- filtered_df = filtered_df[filtered_df["Model"].isin(
154
- model_query)]
155
- return filtered_df
156
-
157
 
158
- def filter_model2(
159
- df: pd.DataFrame, model_query: list
160
- ) -> pd.DataFrame:
161
- # Show all models
162
- if len(model_query) == 0:
163
- return df
164
- filtered_df = df
165
-
166
- filtered_df = filtered_df[filtered_df["Opponent Model"].isin(
167
- model_query)]
168
- return filtered_df
169
-
170
-
171
- def filter_agent1(
172
- df: pd.DataFrame, agent_query: list
173
- ) -> pd.DataFrame:
174
- # Show all models
175
- if len(agent_query) == 0:
176
- return df
177
- filtered_df = df
178
-
179
- filtered_df = filtered_df[filtered_df["Agent"].isin(
180
- agent_query)]
181
  return filtered_df
182
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
- def filter_agent2(
185
- df: pd.DataFrame, agent_query: list
186
- ) -> pd.DataFrame:
187
- # Show all models
188
- if len(agent_query) == 0:
189
- return df
190
- filtered_df = df
191
-
192
- filtered_df = filtered_df[filtered_df["Opponent Agent"].isin(
193
- agent_query)]
194
- return filtered_df
195
-
196
-
197
- # leaderboard_df = filter_models(leaderboard_df, [t.to_str(" : ") for t in ModelType], list(NUMERIC_INTERVALS.keys()), [i.value.name for i in Precision], False, False)
198
-
199
-
200
- class LLM_Model:
201
- def __init__(self, t_value, model_value, average_value, arc_value, hellaSwag_value, mmlu_value) -> None:
202
- self.t = t_value
203
- self.model = model_value
204
- self.average = average_value
205
- self.arc = arc_value
206
- self.hellaSwag = hellaSwag_value
207
- self.mmlu = mmlu_value
208
-
209
-
210
- games = ["Breakthrough", "Connect Four", "Blind Auction", "Kuhn Poker",
211
- "Liar's Dice", "Negotiation", "Nim", "Pig", "Iterated Prisoner's Dilemma", "Tic-Tac-Toe"]
212
-
213
- # models = ["gpt-35-turbo-1106", "gpt-4", "Llama-2-70b-chat-hf", "CodeLlama-34b-Instruct-hf",
214
- # "CodeLlama-70b-Instruct-hf", "Mistral-7B-Instruct-v01", "Mistral-7B-OpenOrca"]
215
-
216
- # agents = ["Prompt Agent", "CoT Agent", "SC-CoT Agent",
217
- # "ToT Agent", "MCTS", "Random", "TitforTat"]
218
-
219
- demo = gr.Blocks(css=custom_css)
220
-
221
-
222
- def load_image(image_path):
223
- image = Image.open(image_path)
224
- return image
225
-
226
-
227
- with demo:
228
- with gr.Row():
229
- gr.Image("./assets/logo.png", height="200px", width="200px", scale=0.1,
230
- show_download_button=False, container=False)
231
- gr.HTML(TITLE, elem_id="title")
232
-
233
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
234
- with gr.Tabs(elem_classes="tab-buttons") as tabs:
235
- with gr.TabItem("🏅 UnlearnCanvas Benchmark", elem_id="llm-benchmark-tab-table", id=0):
236
- with gr.Row():
237
- with gr.Column():
238
- with gr.Row():
239
-
240
- shown_columns = gr.CheckboxGroup(
241
- choices=[
242
- 'Average'
243
- ]+games,
244
- label="Select columns to show",
245
- elem_id="column-select",
246
- interactive=True,
247
- )
248
- with gr.Column(min_width=320):
249
- # with gr.Box(elem_id="box-filter"):
250
- model1_column = gr.CheckboxGroup(
251
- label="Model",
252
- choices=models,
253
- interactive=True,
254
- elem_id="filter-columns-type",
255
- )
256
-
257
- agent1_column = gr.CheckboxGroup(
258
- label="Agents",
259
- choices=agents,
260
- interactive=True,
261
- elem_id="filter-columns-precision",
262
- )
263
-
264
- model2_column = gr.CheckboxGroup(
265
- label="Opponent Model",
266
- choices=opponent_models,
267
- interactive=True,
268
- elem_id="filter-columns-type",
269
- )
270
- agent2_column = gr.CheckboxGroup(
271
- label="Opponent Agents",
272
- choices=opponent_agents,
273
- interactive=True,
274
- elem_id="filter-columns-precision",
275
- )
276
- # filter_columns_size = gr.CheckboxGroup(
277
- # label="Model sizes (in billions of parameters)",
278
- # choices=[f'NUMERIC_INTERVALS{i}' for i in range(0, 5)],
279
- # value=[f'NUMERIC_INTERVALS{i}' for i in range(0, 5)],
280
- # interactive=True,
281
- # elem_id="filter-columns-size",
282
- # )
283
-
284
- leaderboard_table = gr.components.Dataframe(
285
- value=gtbench_raw_data,
286
- elem_id="leaderboard-table",
287
- interactive=False,
288
- visible=True,
289
- # column_widths=["2%", "33%"]
290
- )
291
-
292
- game_bench_df_for_search = gr.components.Dataframe(
293
- value=gtbench_raw_data,
294
- elem_id="leaderboard-table",
295
- interactive=False,
296
- visible=False,
297
- # column_widths=["2%", "33%"]
298
- )
299
-
300
- # Dummy leaderboard for handling the case when the user uses backspace key
301
- # hidden_leaderboard_table_for_search = gr.components.Dataframe(
302
- # value=[],
303
- # headers=COLS,
304
- # datatype=TYPES,
305
- # visible=False,
306
- # )
307
- # search_bar.submit(
308
- # update_table,
309
- # [
310
- # # hidden_leaderboard_table_for_search,
311
- # # shown_columns,
312
- # # filter_columns_type,
313
- # # filter_columns_precision,
314
- # # filter_columns_size,
315
- # # deleted_models_visibility,
316
- # # flagged_models_visibility,
317
- # # search_bar,
318
- # ],
319
- # leaderboard_table,
320
- # )
321
-
322
- # # Define a hidden component that will trigger a reload only if a query parameter has be set
323
- # hidden_search_bar = gr.Textbox(value="", visible=False)
324
- # hidden_search_bar.change(
325
- # update_table,
326
- # [
327
- # hidden_leaderboard_table_for_search,
328
- # shown_columns,
329
- # filter_columns_type,
330
- # filter_columns_precision,
331
- # filter_columns_size,
332
- # deleted_models_visibility,
333
- # flagged_models_visibility,
334
- # search_bar,
335
- # ],
336
- # leaderboard_table,
337
- # )
338
- # # Check query parameter once at startup and update search bar + hidden component
339
- # demo.load(load_query, inputs=[], outputs=[search_bar, hidden_search_bar])
340
-
341
- for selector in [shown_columns, model1_column, model2_column, agent1_column, agent2_column]:
342
- selector.change(
343
- update_table,
344
- [
345
- game_bench_df_for_search,
346
- shown_columns,
347
- model1_column,
348
- model2_column,
349
- agent1_column,
350
- agent2_column
351
- # filter_columns_precision,
352
- # None, # filter_columns_size,
353
- # None, # deleted_models_visibility,
354
- # None, # flagged_models_visibility,
355
- # None, # search_bar,
356
- ],
357
- leaderboard_table,
358
- queue=True,
359
- )
360
-
361
- # with gr.TabItem("📈 Metrics through time", elem_id="llm-benchmark-tab-table", id=4):
362
- # with gr.Row():
363
- # with gr.Column():
364
- # chart = create_metric_plot_obj_1(
365
- # dummy_data_for_plot(
366
- # ["Metric1", "Metric2", 'Metric3']),
367
- # ["Metric1", "Metric2", "Metric3"],
368
- # title="Average of Top Scores and Human Baseline Over Time (from last update)",
369
- # )
370
- # gr.Plot(value=chart, min_width=500)
371
-
372
- with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
373
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
374
- gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")
375
-
376
- '''
377
- with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
378
- with gr.Column():
379
- with gr.Row():
380
- gr.Markdown(EVALUATION_QUEUE_TEXT,
381
- elem_classes="markdown-text")
382
-
383
- with gr.Column():
384
- with gr.Accordion(
385
- f"✅ Finished Evaluations ({9})",
386
- open=False,
387
- ):
388
- with gr.Row():
389
- finished_eval_table = gr.components.Dataframe(
390
- value=None,
391
- headers=EVAL_COLS,
392
- datatype=EVAL_TYPES,
393
- row_count=5,
394
- )
395
- with gr.Accordion(
396
- f"🔄 Running Evaluation Queue ({5})",
397
- open=False,
398
- ):
399
- with gr.Row():
400
- running_eval_table = gr.components.Dataframe(
401
- value=None,
402
- headers=EVAL_COLS,
403
- datatype=EVAL_TYPES,
404
- row_count=5,
405
- )
406
-
407
- with gr.Accordion(
408
- f"⏳ Pending Evaluation Queue ({7})",
409
- open=False,
410
- ):
411
- with gr.Row():
412
- pending_eval_table = gr.components.Dataframe(
413
- value=None,
414
- headers=EVAL_COLS,
415
- datatype=EVAL_TYPES,
416
- row_count=5,
417
- )
418
- with gr.Row():
419
- gr.Markdown("# ✉️✨ Submit your Agent here!",
420
- elem_classes="markdown-text")
421
-
422
- with gr.Row():
423
- with gr.Column():
424
- model_name_textbox = gr.Textbox(label="Agent name")
425
- # revision_name_textbox = gr.Textbox(
426
- # label="Revision commit", placeholder="main")
427
- # private = gr.Checkbox(
428
- # False, label="Private", visible=not IS_PUBLIC)
429
- model_type = gr.Dropdown(
430
- choices=[t.to_str(" : ")
431
- for t in ModelType if t != ModelType.Unknown],
432
- label="Agent type",
433
- multiselect=False,
434
- value=ModelType.FT.to_str(" : "),
435
- interactive=True,
436
- )
437
-
438
- # with gr.Column():
439
- # precision = gr.Dropdown(
440
- # choices=[i.value.name for i in Precision if i !=
441
- # Precision.Unknown],
442
- # label="Precision",
443
- # multiselect=False,
444
- # value="float16",
445
- # interactive=True,
446
- # )
447
- # weight_type = gr.Dropdown(
448
- # choices=[i.value.name for i in WeightType],
449
- # label="Weights type",
450
- # multiselect=False,
451
- # value="Original",
452
- # interactive=True,
453
- # )
454
- # base_model_name_textbox = gr.Textbox(
455
- # label="Base model (for delta or adapter weights)")
456
-
457
- submit_button = gr.Button("Submit Eval")
458
- submission_result = gr.Markdown()
459
- # submit_button.click(
460
- # add_new_eval,
461
- # [
462
- # model_name_textbox,
463
- # base_model_name_textbox,
464
- # revision_name_textbox,
465
- # precision,
466
- # private,
467
- # weight_type,
468
- # model_type,
469
- # ],
470
- # submission_result,
471
- # )
472
-
473
- '''
474
- with gr.Row():
475
- with gr.Accordion("📙 Citation", open=False):
476
- citation_button = gr.Textbox(
477
- value=CITATION_BUTTON_TEXT,
478
- label=CITATION_BUTTON_LABEL,
479
- lines=20,
480
- elem_id="citation-button",
481
- show_copy_button=True,
482
- )
483
-
484
- # scheduler = BackgroundScheduler()
485
- # scheduler.add_job(restart_space, "interval", seconds=1800)
486
- # scheduler.start()
487
- demo.launch()
488
- # Both launches the space and its CI
489
- # configure_space_ci(
490
- # demo.queue(default_concurrency_limit=40),
491
- # trusted_authors=[], # add manually trusted authors
492
- # private="True", # ephemeral spaces will have same visibility as the main space. Otherwise, set to `True` or `False` explicitly.
493
- # variables={}, # We overwrite HF_HOME as tmp CI spaces will have no cache
494
- # secrets=["HF_TOKEN", "H4_TOKEN"], # which secret do I want to copy from the main space? Can be a `List[str]`."HF_TOKEN", "H4_TOKEN"
495
- # hardware=None, # "cpu-basic" by default. Otherwise set to "auto" to have same hardware as the main space or any valid string value.
496
- # storage=None, # no storage by default. Otherwise set to "auto" to have same storage as the main space or any valid string value.
497
- # ).launch()
498
-
499
-
500
- # notes: opponent model , opponent agent
501
- # column is games
 
1
+
2
  import gradio as gr
3
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ # Load the uc_result.csv file
6
+ uc_result_df = pd.read_csv('uc_result.csv')
7
+
8
+ # Convert percentage columns to float for sorting
9
+ percentage_columns = [col for col in uc_result_df.columns if uc_result_df[col].dtype == 'object' and '%' in uc_result_df[col].iloc[0]]
10
+ for col in percentage_columns:
11
+ uc_result_df[col] = uc_result_df[col].str.rstrip('%').astype('float') / 100
12
+
13
+ # Define a function to filter and sort the dataframe
14
+ def filter_and_sort(method=None, sort_by=None, ascending=True):
15
+ filtered_df = uc_result_df
16
+ if method:
17
+ filtered_df = filtered_df[filtered_df['Method'].str.contains(method)]
18
+ if sort_by:
19
+ filtered_df = filtered_df.sort_values(by=sort_by, ascending=ascending)
 
 
 
 
 
 
 
 
20
  return filtered_df
21
 
22
+ # Create Gradio interface components
23
+ method_input = gr.inputs.Textbox(label="Filter by Method", placeholder="Enter method name...")
24
+ sort_by_dropdown = gr.inputs.Dropdown(label="Sort by", choices=uc_result_df.columns.tolist(), default=None)
25
+ ascending_checkbox = gr.inputs.Checkbox(label="Ascending Order", value=True)
26
+
27
+ # Create a Gradio interface to display the data
28
+ iface = gr.Interface(
29
+ fn=filter_and_sort,
30
+ inputs=[method_input, sort_by_dropdown, ascending_checkbox],
31
+ outputs=gr.outputs.DataFrame(type="pandas"),
32
+ title="Enhanced UC Results Display",
33
+ description="This interface allows filtering and sorting of the results from uc_result.csv"
34
+ )
35
 
36
+ if __name__ == "__main__":
37
+ iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
assets/gtbench_results.csv DELETED
@@ -1,23 +0,0 @@
1
- Model,Agent,Opponent Model,Opponent Agent,Tic-Tac-Toe,Connect Four,Breakthrough,Liar's Dice,Blind Auction,Negotiation,Kuhn Poker,Nim,Pig,Iterated Prisoner's Dilemma,
2
- GPT-3.5-turbo,Prompt,GPT-3.5-turbo-1106,prompt agent,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
3
- GPT-4,Prompt,GPT-3.5-turbo-1106,prompt agent,-0.111,0.080,0.320,0.800,0.040,-0.281,0.400,0.080,-0.040,0.004,0.129
4
- GPT-4,CoT,GPT-3.5-turbo-1106,prompt agent,-0.022,-0.080,0.560,0.240,0.069,0.135,0.440,0.040,0.040,-0.160,0.126
5
- GPT-3.5-turbo,CoT,GPT-3.5-turbo-1106,prompt agent,0.277,-0.320,-0.120,0.440,0.115,-0.207,0.120,-0.040,-0.160,0.126,0.023
6
- GPT-3.5-turbo,SC-CoT,GPT-3.5-turbo-1106,prompt agent,0.409,-0.040,-0.160,0.520,-0.120,-0.315,-0.080,0.000,-0.080,-0.155,-0.002
7
- GPT-3.5-turbo,ToT,GPT-3.5-turbo-1106,prompt agent,-0.045,0.240,0.160,0.000,-0.120,0.183,0.000,0.120,-0.400,-0.191,-0.005
8
- Codellama-34b-instruct,Prompt,GPT-3.5-turbo-1106,prompt agent,0.333,-0.100,-0.800,-0.400,-0.250,0.216,-0.160,0.360,0.120,0.600,-0.008
9
- Llama-2-70b-chat,SC-CoT,GPT-3.5-turbo-1106,prompt agent,-0.469,-0.160,-0.680,0.160,-0.040,0.052,0.120,0.040,0.040,0.296,-0.064
10
- Codellama-34b-instruct,CoT,GPT-3.5-turbo-1106,prompt agent,0.316,-0.360,-0.760,-0.320,-0.268,0.085,0.000,0.480,-0.080,0.032,-0.088
11
- Llama-2-70b-chat,CoT,GPT-3.5-turbo-1106,prompt agent,-0.500,0.080,-0.800,0.265,-0.086,0.128,-0.200,0.061,-0.160,0.324,-0.089
12
- Mistral-7b-Orca,CoT,GPT-3.5-turbo-1106,prompt agent,-0.077,-0.120,-0.320,-0.560,0.133,0.078,0.000,0.360,-0.680,0.055,-0.113
13
- Codellama-34b-instruct,SC-CoT,GPT-3.5-turbo-1106,prompt agent,0.122,-0.600,-0.560,-0.280,-0.348,0.095,0.000,0.160,0.120,0.008,-0.128
14
- Mistral-7b-Orca,SC-CoT,GPT-3.5-turbo-1106,prompt agent,-0.200,-0.080,-0.400,-0.640,0.082,0.364,-0.040,0.440,-0.840,0.013,-0.130
15
- Codellama-34b-instruct,ToT,GPT-3.5-turbo-1106,prompt agent,-0.021,-0.160,-0.600,-0.520,-0.304,0.098,0.000,-0.040,-0.160,0.237,-0.147
16
- Llama-2-70b-chat,Prompt,GPT-3.5-turbo-1106,prompt agent,-0.366,-1.000,-0.440,-0.160,-0.075,-0.033,-0.040,0.800,-0.020,-0.712,-0.205
17
- Mistral-7b-Orca,ToT,GPT-3.5-turbo-1106,prompt agent,-0.179,-0.800,-0.320,-0.440,-0.047,0.299,-0.200,-0.080,-0.840,0.162,-0.245
18
- Mistral-7b-Orca,Prompt,GPT-3.5-turbo-1106,prompt agent,-0.429,-0.840,-0.680,-0.680,-0.069,-0.114,-0.040,-0.080,0.000,-0.182,-0.311
19
- GPT-4,Prompt,GPT-4,prompt agent,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
20
- Codellama-34b-instruct,Prompt,GPT-4,prompt agent,-0.064,0.720,-0.600,-0.640,-0.148,0.000,0.080,0.160,0.040,0.342,-0.011
21
- Codellama-34b-instruct,CoT,GPT-4,prompt agent,0.022,0.560,-1.000,-0.800,0.449,-0.078,0.080,0.200,-0.080,0.224,-0.042
22
- Llama-2-70b-chat,Prompt,GPT-4,prompt agent,-0.938,0.960,-0.920,-0.720,-0.250,0.000,-0.040,0.360,0.200,0.333,-0.101
23
- Llama-2-70b-chat,CoT,GPT-4,prompt agent,-0.286,0.200,-0.880,-0.917,-0.417,0.201,0.000,-0.026,-0.360,0.173,-0.231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
assets/uc_result.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Method,S-UA,S-IRA S-CRA,O-UA,O-IRA,O-CRA,FID,Time (s),Memory (GB),Storage (GB)
2
+ ESD,98.58%,80.97%,93.96%,92.15%,55.78%,44.23%,65.55,6163,17.8,4.3
3
+ FMN,88.48%,56.77%,46.60%,45.64%,90.63%,73.46%,131.37,350,17.9,4.2
4
+ UCE,98.40%,60.22%,47.71%,94.31%,39.35%,34.67%,182.01,434,5.1,1.7
5
+ CA,60.82%,96.01%,92.70%,46.67%,90.11%,81.97%,54.21,734,10.1,4.2
6
+ SalUn,86.26%,90.39%,95.08%,86.91%,96.35%,99.59%,61.05,667,30.8,4.0