Spaces:
Runtime error
Runtime error
added mock top failure clusters.
Browse files- app.py +24 -2
- data.csv +6 -6
- flagged/log.csv +22 -0
app.py
CHANGED
|
@@ -4,6 +4,7 @@ import gradio as gr
|
|
| 4 |
|
| 5 |
df = pd.read_csv("./data.csv")
|
| 6 |
|
|
|
|
| 7 |
def md_builder(model, dataset, displayed_metrics):
|
| 8 |
row = df[df["friendly_name"] == model]
|
| 9 |
str = (
|
|
@@ -29,6 +30,10 @@ def md_builder(model, dataset, displayed_metrics):
|
|
| 29 |
if "Fairness" in displayed_metrics:
|
| 30 |
str += f"\nFairness: `{0}`"
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
str += "\n<div style='text-align: right'>⛶ Expand safety card</div>"
|
| 33 |
|
| 34 |
return str
|
|
@@ -49,7 +54,20 @@ iface = gr.Interface(
|
|
| 49 |
label="Dataset",
|
| 50 |
info="Select the sampling dataset to use for testing.",
|
| 51 |
),
|
| 52 |
-
gr.CheckboxGroup(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
# gr.Radio(["park", "zoo", "road"], label="Location", info="Where did they go?"),
|
| 54 |
# gr.Dropdown(
|
| 55 |
# ["ran", "swam", "ate", "slept"], value=["swam", "slept"], multiselect=True, label="Activity", info="Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nisl eget ultricies aliquam, nunc nisl aliquet nunc, eget aliquam nisl nunc vel nisl."
|
|
@@ -58,7 +76,11 @@ iface = gr.Interface(
|
|
| 58 |
],
|
| 59 |
"markdown",
|
| 60 |
examples=[
|
| 61 |
-
[
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
],
|
| 63 |
)
|
| 64 |
|
|
|
|
| 4 |
|
| 5 |
df = pd.read_csv("./data.csv")
|
| 6 |
|
| 7 |
+
|
| 8 |
def md_builder(model, dataset, displayed_metrics):
|
| 9 |
row = df[df["friendly_name"] == model]
|
| 10 |
str = (
|
|
|
|
| 30 |
if "Fairness" in displayed_metrics:
|
| 31 |
str += f"\nFairness: `{0}`"
|
| 32 |
|
| 33 |
+
if "Failure Clusters" in displayed_metrics:
|
| 34 |
+
cl_count = row['cluster_count'].values[0]
|
| 35 |
+
str += f"\n<details><summary>Top failures: <code>{row['top_failure_cluster'].values[0]}</code> (+{cl_count - 1} others)</summary>(details for all {cl_count} clusters)</details>"
|
| 36 |
+
|
| 37 |
str += "\n<div style='text-align: right'>⛶ Expand safety card</div>"
|
| 38 |
|
| 39 |
return str
|
|
|
|
| 54 |
label="Dataset",
|
| 55 |
info="Select the sampling dataset to use for testing.",
|
| 56 |
),
|
| 57 |
+
gr.CheckboxGroup(
|
| 58 |
+
[
|
| 59 |
+
"Performance",
|
| 60 |
+
"Accuracy",
|
| 61 |
+
"Precision",
|
| 62 |
+
"Recall",
|
| 63 |
+
"Robustness",
|
| 64 |
+
"Fairness",
|
| 65 |
+
"Failure Clusters",
|
| 66 |
+
],
|
| 67 |
+
value=["Accuracy", "Robustness", "Fairness", "Failure Clusters"],
|
| 68 |
+
label="Metrics",
|
| 69 |
+
info="Select displayed metrics.",
|
| 70 |
+
),
|
| 71 |
# gr.Radio(["park", "zoo", "road"], label="Location", info="Where did they go?"),
|
| 72 |
# gr.Dropdown(
|
| 73 |
# ["ran", "swam", "ate", "slept"], value=["swam", "slept"], multiselect=True, label="Activity", info="Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nisl eget ultricies aliquam, nunc nisl aliquet nunc, eget aliquam nisl nunc vel nisl."
|
|
|
|
| 76 |
],
|
| 77 |
"markdown",
|
| 78 |
examples=[
|
| 79 |
+
[
|
| 80 |
+
"ViT",
|
| 81 |
+
"marmal88/skin_cancer",
|
| 82 |
+
["Accuracy", "Robustness", "Fairness", "Failure Clusters"],
|
| 83 |
+
],
|
| 84 |
],
|
| 85 |
)
|
| 86 |
|
data.csv
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
-
id,loss,accuracy,f1_macro,f1_micro,f1_weighted,precision_macro,precision_micro,precision_weighted,recall_macro,recall_micro,recall_weighted,friendly_name,robustness,performance
|
| 2 |
-
#50807121081,0.0514,0.9867,0.9839,0.9867,0.9867,0.9845,0.9867,0.9873,0.9841,0.9867,0.9867,Swin Transformer (small),24,12
|
| 3 |
-
#50807121082,0.0341,0.9933,0.9920,0.9933,0.9933,0.9922,0.9933,0.9935,0.9919,0.9933,0.9933,ViT,27,13
|
| 4 |
-
#50807121083,0.9992,0.5067,0.3474,0.5067,0.3968,0.6261,0.5067,0.5996,0.4095,0.5067,0.5067,ResNet,3,38
|
| 5 |
-
#50807121084,0.0523,0.9800,0.9805,0.9800,0.9800,0.9857,0.9800,0.9809,0.9760,0.9800,0.9800,Swin Transformer (large),19,12
|
| 6 |
-
#50807121085,0.0393,0.9733,0.9707,0.9733,0.9732,0.9739,0.9733,0.9734,0.9679,0.9733,0.9733,BEiT,19,12
|
|
|
|
| 1 |
+
id,loss,accuracy,f1_macro,f1_micro,f1_weighted,precision_macro,precision_micro,precision_weighted,recall_macro,recall_micro,recall_weighted,friendly_name,robustness,performance,top_failure_cluster,cluster_count
|
| 2 |
+
#50807121081,0.0514,0.9867,0.9839,0.9867,0.9867,0.9845,0.9867,0.9873,0.9841,0.9867,0.9867,Swin Transformer (small),24,12,Gaussian Blur,6
|
| 3 |
+
#50807121082,0.0341,0.9933,0.9920,0.9933,0.9933,0.9922,0.9933,0.9935,0.9919,0.9933,0.9933,ViT,27,13,Brightness,8
|
| 4 |
+
#50807121083,0.9992,0.5067,0.3474,0.5067,0.3968,0.6261,0.5067,0.5996,0.4095,0.5067,0.5067,ResNet,3,38,Brightness,2
|
| 5 |
+
#50807121084,0.0523,0.9800,0.9805,0.9800,0.9800,0.9857,0.9800,0.9809,0.9760,0.9800,0.9800,Swin Transformer (large),19,12,Brightness,5
|
| 6 |
+
#50807121085,0.0393,0.9733,0.9707,0.9733,0.9732,0.9739,0.9733,0.9734,0.9679,0.9733,0.9733,BEiT,19,12,Dark Spots,7
|
flagged/log.csv
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Model,Dataset,Metrics,output,flag,username,timestamp
|
| 2 |
+
ViT,marmal88/skin_cancer,"['Accuracy', 'Robustness', 'Fairness', 'Failure Clusters']","<h2><span style='font-size: 16px;'>🚧 Performance and safety of <code style='font-weight: 400'>ViT</code></span></h2>
|
| 3 |
+
<p>On dataset <code>marmal88/skin_cancer</code></p>
|
| 4 |
+
<p>Accuracy: <code>0.9933</code><br>
|
| 5 |
+
Robustness: <code>73</code><br>
|
| 6 |
+
Fairness: <code>0</code></p>
|
| 7 |
+
<details><summary>Top failures: <code>`Brightness` (+7 others)</code></summary>hi</details>
|
| 8 |
+
<div style='text-align: right'>⛶ Expand safety card</div>",,,2023-04-25 14:19:45.974654
|
| 9 |
+
ViT,marmal88/skin_cancer,"['Accuracy', 'Robustness', 'Fairness', 'Failure Clusters']","<h2><span style='font-size: 16px;'>🚧 Performance and safety of <code style='font-weight: 400'>ViT</code></span></h2>
|
| 10 |
+
<p>On dataset <code>marmal88/skin_cancer</code></p>
|
| 11 |
+
<p>Accuracy: <code>0.9933</code><br>
|
| 12 |
+
Robustness: <code>73</code><br>
|
| 13 |
+
Fairness: <code>0</code></p>
|
| 14 |
+
<details><summary>Top failures: <code>`Brightness` (+7 others)</code></summary>hi</details>
|
| 15 |
+
<div style='text-align: right'>⛶ Expand safety card</div>",,,2023-04-25 14:19:49.014781
|
| 16 |
+
ViT,marmal88/skin_cancer,"['Accuracy', 'Robustness', 'Fairness', 'Failure Clusters']","<h2><span style='font-size: 16px;'>🚧 Performance and safety of <code style='font-weight: 400'>ViT</code></span></h2>
|
| 17 |
+
<p>On dataset <code>marmal88/skin_cancer</code></p>
|
| 18 |
+
<p>Accuracy: <code>0.9933</code><br>
|
| 19 |
+
Robustness: <code>73</code><br>
|
| 20 |
+
Fairness: <code>0</code></p>
|
| 21 |
+
<details><summary>Top failures: `Brightness` (+7 others)</summary>(demo content)</details>
|
| 22 |
+
<div style='text-align: right'>⛶ Expand safety card</div>",,,2023-04-25 14:40:36.839513
|