Spaces:
Sleeping
Sleeping
File size: 3,638 Bytes
3c77d98 7ba3a06 3c77d98 4165ebd 3c77d98 7ba3a06 3c77d98 d2324b7 3c77d98 7ba3a06 3c77d98 7ba3a06 3c77d98 d2324b7 7ba3a06 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import data_prep
import model_predict
import gradio as gr
model_dict = {
"BERT-Base": "research-dump/bert-base-uncased_deletion_multiclass_complete_Final",
"BERT-Large": "research-dump/bert-large-uncased_deletion_multiclass_complete_final",
"RoBERTa-Base": "research-dump/roberta-base_deletion_multiclass_complete_final",
"RoBERTa-Large": "research-dump/roberta-large_deletion_multiclass_complete_final"
}
def process_url(url, model_key):
model_name = model_dict[model_key]
processed_text = data_prep.process_data(url)
final_scores = model_predict.predict_text(processed_text, model_name)
highest_prob_label = max(final_scores, key=final_scores.get)
highest_prob = final_scores[highest_prob_label]
progress_bars = {label: score for label, score in final_scores.items()}
return processed_text, highest_prob_label, highest_prob, progress_bars #,highlighted_text
title = 'Wikipedia Deletion Discussion Classifier Demo'
desc = """ This demo is about classifying deletion discussions from Wikipedia about Wikipedia articles. Wikipedia community engages in discussions related to an article’s quality, and map potential issues to existing templates, or Wikipedia policies, which cover diverse areas, from low notability of sources to content implausibility or vandalism.
To this end, we design a multiclass classifier to predict the outcome of a deletion discussion, without the need for human intervention. The classifier is trained on a dataset of deletion discussions from Wikipedia, and it predicts one of the following labels: delete, keep, merge, no consensus, speedy keep, speedy delete, redirect, or withdrawn. Each of these labels corresponds to a specific outcome of the deletion discussion as described below.
- **keep**: The article should be kept as it is.
- **delete**: The article should be deleted.
- **merge**: The article should be merged with another article. Articles that are short and unlikely to be expanded could be merged into larger articles or lists.
- **redirect**: The article should be redirected to another existing article that is a better target for the content.
- **withdraw**: The nominator withdraws their nomination for deletion, often due to improvements made to the article during the discussion.
- **no consensus**: When there is no clear agreement on the deletion discussion.
- **speedy keep**: The article should be kept and there are reasons to bypass deletion discussions to keep the article immediately.
- **speedy delete**: The article should be deleted and there are reasons to bypass deletion discussions to delete the article immediately.
The input to the classifier is a URL of a Wikipedia deletion discussion page, and the output is the predicted label of the discussion, along with the probability of the predicted label, and the probabilities of all the labels.
The input to the classifier is a URL of a Wikipedia deletion discussion page, and the output is the predicted label of the discussion, along with the probability of the predicted label, and the probabilities of all the labels.
"""
url_input = gr.Textbox(label="URL")
model_name_input = gr.Dropdown(label="Model Name", choices=list(model_dict.keys()), value=list(model_dict.keys())[0])
outputs = [
gr.Textbox(label="Processed Text"),
gr.Textbox(label="Label with Highest Probability"),
gr.Textbox(label="Probability"),
gr.JSON(label="All Labels and Probabilities"),
#gr.HTML(label="Processed Text")
]
demo = gr.Interface(fn=process_url, inputs=[url_input, model_name_input], outputs=outputs, title=title, description=desc)
demo.launch() #share=True) |