Spaces:

Taranosaurus
/

Classifier

Sleeping

Taranosaurus commited on Dec 28, 2023

Commit

54d4ac0

1 Parent(s): 1619049

Added hypothesis_template and removed superfluous tokenizer

The hypothesis_template can be used to make the labelling more malleable give better classifications from the input text

Files changed (1) hide show

app.py +8 -7

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from transformers import pipeline, AutoTokenizer
 import gradio as gr
 import torch
@@ -9,17 +9,17 @@ else:
 summary_checkpoint = "facebook/bart-large-cnn" #"google/pegasus-large"
 oracle_checkpoint  = "facebook/bart-large-mnli"
-tokenizer = AutoTokenizer.from_pretrained(summary_checkpoint, device=device)
-summary = pipeline(task="summarization", model=summary_checkpoint, tokenizer=tokenizer, device=device)
 oracle = pipeline(task="zero-shot-classification", model=oracle_checkpoint, device=device)
 labels = ["merge","revert","fix","feature","update","refactor","test","security","documentation","style"]
 selected_labels = ["feature","update","refactor","test","security","documentation","style"]
-def do_the_thing(input, labels):
-    #print(labels)
     summarisation = summary(input, truncation=True)[0]['summary_text']
-    zsc_results = oracle(sequences=[input, summarisation], candidate_labels=labels, multi_label=False, batch_size=2)
     classifications_input = {}
     for i in range(len(labels)):
         classifications_input.update({zsc_results[0]['labels'][i]: zsc_results[0]['scores'][i]})
@@ -37,6 +37,7 @@ with gr.Blocks() as frontend:
     with gr.Row():
         with gr.Column():
             input_labels = gr.Dropdown(label="Classification Labels", choices=labels, multiselect=True, value=selected_labels, interactive=True, allow_custom_value=True, info="Labels to classify the original text and summary")
         with gr.Column():
             output_summary_text = gr.TextArea(label="Summary of Notes")
     with gr.Row():
@@ -44,6 +45,6 @@ with gr.Blocks() as frontend:
             output_original_labels = gr.Label(label="Original Text Classification")
         with gr.Column():
             output_summary_labels = gr.Label(label="Summary Text Classification")
-    btn_submit.click(fn=do_the_thing, inputs=[input_value, input_labels], outputs=[output_summary_text, output_original_labels, output_summary_labels])
 frontend.launch()

+from transformers import pipeline
 import gradio as gr
 import torch
 summary_checkpoint = "facebook/bart-large-cnn" #"google/pegasus-large"
 oracle_checkpoint  = "facebook/bart-large-mnli"
+summary = pipeline(task="summarization", model=summary_checkpoint, device=device)
 oracle = pipeline(task="zero-shot-classification", model=oracle_checkpoint, device=device)
 labels = ["merge","revert","fix","feature","update","refactor","test","security","documentation","style"]
 selected_labels = ["feature","update","refactor","test","security","documentation","style"]
+def do_the_thing(input, hypothesis, labels):
+    if hypothesis == None or hypothesis == "" or '{}' not in hypothesis:
+        hypothesis= "This example is {}."
     summarisation = summary(input, truncation=True)[0]['summary_text']
+    zsc_results = oracle(sequences=[input, summarisation], candidate_labels=labels, multi_label=False, batch_size=2, hypothesis_template=hypothesis)
     classifications_input = {}
     for i in range(len(labels)):
         classifications_input.update({zsc_results[0]['labels'][i]: zsc_results[0]['scores'][i]})
     with gr.Row():
         with gr.Column():
             input_labels = gr.Dropdown(label="Classification Labels", choices=labels, multiselect=True, value=selected_labels, interactive=True, allow_custom_value=True, info="Labels to classify the original text and summary")
+            input_hypothesis = gr.Textbox(label="Hypothesis Template", info="This must include the {} format syntax. Blank and invalid inputs get defaulted to the palceholder text.", value="This git commit relates to {} changes.", placeholder="This example is {}.")
         with gr.Column():
             output_summary_text = gr.TextArea(label="Summary of Notes")
     with gr.Row():
             output_original_labels = gr.Label(label="Original Text Classification")
         with gr.Column():
             output_summary_labels = gr.Label(label="Summary Text Classification")
+    btn_submit.click(fn=do_the_thing, inputs=[input_value, input_hypothesis, input_labels], outputs=[output_summary_text, output_original_labels, output_summary_labels])
 frontend.launch()