John Graham Reynolds
commited on
Commit
Β·
bcbab79
1
Parent(s):
86e4acf
change output to text, try adding example
Browse files
app.py
CHANGED
@@ -5,7 +5,7 @@ import evaluate
|
|
5 |
import gradio as gr
|
6 |
import pandas as pd
|
7 |
|
8 |
-
title = "
|
9 |
|
10 |
description = """<p style='text-align: center'>
|
11 |
As I introduce myself to the entirety of the π€ ecosystem, I've put together this Space to show off a temporary fix for a current πͺ² in the π€ Evaluate library. \n
|
@@ -13,7 +13,8 @@ As I introduce myself to the entirety of the π€ ecosystem, I've put together t
|
|
13 |
Check out the original, longstanding issue [here](https://github.com/huggingface/evaluate/issues/234). This details how it is currently impossible to \
|
14 |
`evaluate.combine()` multiple metrics related to multilabel text classification. Particularly, one cannot `combine` the `f1`, `precision`, and `recall` scores for \
|
15 |
evaluation. I encountered this issue specifically while training [RoBERTa-base-DReiFT](https://huggingface.co/MarioBarbeque/RoBERTa-base-DReiFT) for multilabel \
|
16 |
-
text classification of 805 labeled medical conditions based on drug reviews.
|
|
|
17 |
|
18 |
This Space shows how one can instantiate these custom `evaluate.Metric`s, each with their own unique methodology for averaging across labels, before `combine`-ing them into a
|
19 |
HF `evaluate.CombinedEvaluations` object. From here, we can easily compute each of the metrics simultaneously using `compute`.</p>
|
@@ -80,17 +81,23 @@ space = gr.Interface(
|
|
80 |
datatype=["number", "number"],
|
81 |
row_count=5,
|
82 |
col_count=(2, "fixed"),
|
|
|
83 |
),
|
84 |
gr.Dataframe(
|
85 |
headers=["Metric", "Averaging Type"],
|
86 |
datatype=["str", "str"],
|
87 |
-
row_count=3,
|
88 |
col_count=(2, "fixed"),
|
|
|
89 |
)
|
90 |
],
|
91 |
-
outputs="
|
92 |
title=title,
|
93 |
description=description,
|
94 |
article=article,
|
|
|
|
|
|
|
|
|
95 |
cache_examples=False
|
96 |
).launch()
|
|
|
5 |
import gradio as gr
|
6 |
import pandas as pd
|
7 |
|
8 |
+
title = "'Combine' multiple metrics with this π€ Evaluate πͺ² Fix!"
|
9 |
|
10 |
description = """<p style='text-align: center'>
|
11 |
As I introduce myself to the entirety of the π€ ecosystem, I've put together this Space to show off a temporary fix for a current πͺ² in the π€ Evaluate library. \n
|
|
|
13 |
Check out the original, longstanding issue [here](https://github.com/huggingface/evaluate/issues/234). This details how it is currently impossible to \
|
14 |
`evaluate.combine()` multiple metrics related to multilabel text classification. Particularly, one cannot `combine` the `f1`, `precision`, and `recall` scores for \
|
15 |
evaluation. I encountered this issue specifically while training [RoBERTa-base-DReiFT](https://huggingface.co/MarioBarbeque/RoBERTa-base-DReiFT) for multilabel \
|
16 |
+
text classification of 805 labeled medical conditions based on drug reviews. The [following workaround](https://github.com/johngrahamreynolds/FixedMetricsForHF) was
|
17 |
+
congifured. \n
|
18 |
|
19 |
This Space shows how one can instantiate these custom `evaluate.Metric`s, each with their own unique methodology for averaging across labels, before `combine`-ing them into a
|
20 |
HF `evaluate.CombinedEvaluations` object. From here, we can easily compute each of the metrics simultaneously using `compute`.</p>
|
|
|
81 |
datatype=["number", "number"],
|
82 |
row_count=5,
|
83 |
col_count=(2, "fixed"),
|
84 |
+
label_name="Table of Predicted vs Actual Class Labels"
|
85 |
),
|
86 |
gr.Dataframe(
|
87 |
headers=["Metric", "Averaging Type"],
|
88 |
datatype=["str", "str"],
|
89 |
+
row_count=(3, "fixed"),
|
90 |
col_count=(2, "fixed"),
|
91 |
+
label_name="Table of Metrics and Averaging Method across Labels "
|
92 |
)
|
93 |
],
|
94 |
+
outputs="text",
|
95 |
title=title,
|
96 |
description=description,
|
97 |
article=article,
|
98 |
+
examples=[
|
99 |
+
[[[1,1],[1,0],[2,0],[1,2],[2,2]], [["f1", "weighted"], ["precision", "micro"], ["recall", "weighted"]]],
|
100 |
+
# [[["precision", "micro"], ["recall", "weighted"], ["f1", "macro"]]],
|
101 |
+
]
|
102 |
cache_examples=False
|
103 |
).launch()
|