Spaces:

adlozano1
/

gibberish_detector

Runtime error

File size: 4,177 Bytes

9a34627
ead2dcb
 
 
 
9a34627
 
 
 
 
 
 
 
 
ead2dcb
 
 
 
 
 
 
9a34627
ead2dcb
9a34627
ead2dcb
9a34627
 
 
ead2dcb
 
 
 
 
 
 
 
 
 
 
9a34627
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ead2dcb
 
9a34627
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ead2dcb
9a34627
 
 
 
 
 
 
ead2dcb
9a34627
 
ead2dcb
 
9a34627
 
 
 
 
 
 
 
 
 
ead2dcb
9a34627
 
 
 
ead2dcb
9a34627

from cProfile import label
from fastapi import File
import gradio as gr
from gib_detect_module import detect
import csv
import torch
import tensorflow as tf


from transformers import AutoModelForSequenceClassification, AutoTokenizer
DLmodel = AutoModelForSequenceClassification.from_pretrained("madhurjindal/autonlp-Gibberish-Detector-492513457", use_auth_token=True)
tokenizer = AutoTokenizer.from_pretrained("madhurjindal/autonlp-Gibberish-Detector-492513457", use_auth_token=True)



def greet(name):
    return "Hello " + name + "!!"

def detect_gibberish(line,f):
    if line:
        if detect(line):
            return "Valid!!!!", None,None
        else:
            return "Bollocks Giberrish",None,None
    elif f:
        return None, annotate_csv(f), None

 


def annotate_csv(f):
    with open(f.name) as csvfile:
        creader = csv.reader(csvfile, delimiter=',', quotechar='"')
    
        with open('out.csv', 'w', newline='') as csvout:
            cwriter = csv.writer(csvout, delimiter=',',
                                quotechar='"', quoting=csv.QUOTE_MINIMAL)
            for row in creader:
                row.append(str(detect(row[0])))
                cwriter.writerow(row) 
            return "out.csv" 


def annotate_csv_deep(f):
    labels = DLmodel.config.id2label
    with open(f.name) as csvfile:
        creader = csv.reader(csvfile, delimiter=',', quotechar='"')
        with open('out.csv', 'w', newline='') as csvout:
            cwriter = csv.writer(csvout, delimiter=',',
                                quotechar='"', quoting=csv.QUOTE_MINIMAL)
            for row in creader:
                inputs = tokenizer(row, return_tensors="pt")
                outputs = DLmodel(**inputs)
                probs = outputs.logits.softmax(dim=-1).detach().cpu().flatten().numpy().tolist()
                idx = probs.index(max(probs))
                
                row.append(labels[idx])
                row.append("{:.0%}".format(probs[idx]) )
                cwriter.writerow(row) 
            return "out.csv" 


def detect_gibberish_deep(line,f):
    if line:
        inputs = tokenizer(line, return_tensors="pt")
        labels = DLmodel.config.id2label
        outputs = DLmodel(**inputs)
        probs = outputs.logits.softmax(dim=-1).detach().cpu().flatten().numpy().tolist()
        output=dict(zip(labels.values(), probs))
        readable_output=""
        for k,v in output.items():
            readable_output+=k+" : "+ "{:.0%}".format(v) + "\n"
        return readable_output, None, output
    if f:
        return None, annotate_csv_deep(f),None



def detect_gibberish_abstract(model, line,f):
    if model == "Deep Learning Model":
        return detect_gibberish_deep(line,f)
    else:
        return detect_gibberish(line, f)


inputLine=gr.inputs.Textbox(lines=1, placeholder="Input text here, if both text and file have values, only the text input will be processed.", default="", label="Text", optional=False)
inputFile=gr.inputs.File(file_count="single", type="file", label="File to Annotate", optional=True)


choices = ["Deep Learning Model", "Markov Chain"]
inputModel=gr.inputs.Dropdown(choices)


outputLine=gr.outputs.Textbox(type="auto", label=None)
outputFile=gr.outputs.File( label="Annotated CSV")
label = gr.outputs.Label(num_top_classes=4)


examples=[
    ["Deep Learning Model","quetzalcoatl","demo_blank.csv"],
    ["Deep Learning Model","aasdf","demo_blank.csv"],
    ["Deep Learning Model","Covfefe","demo_blank.csv"],
    ["Markov Chain","quetzalcoatl","demo_blank.csv"],
    ["Markov Chain","aasdf","demo_blank.csv"],
    ["Markov Chain","Covfefe","demo_blank.csv"],
    ["Deep Learning Model","","demo_bad.txt"],
    ["Deep Learning Model","","demo_mixed.txt"],
    ["Markov Chain","","demo_bad.txt"],
    ["Markov Chain","","demo_mixed.txt"],
]
#iface = gr.Interface(fn=[detect_gibberish], inputs=["text",inputFile], outputs=["text",outputFile],examples=examples, allow_flagging='never')

#iface.launch()


iface = gr.Interface(fn=[detect_gibberish_abstract], inputs=[inputModel,inputLine,inputFile], outputs=["text",outputFile,label],examples=examples, allow_flagging='never')
iface.launch()