File size: 4,177 Bytes
9a34627
ead2dcb
 
 
 
9a34627
 
 
 
 
 
 
 
 
ead2dcb
 
 
 
 
 
 
9a34627
ead2dcb
9a34627
ead2dcb
9a34627
 
 
ead2dcb
 
 
 
 
 
 
 
 
 
 
9a34627
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ead2dcb
 
9a34627
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ead2dcb
9a34627
 
 
 
 
 
 
ead2dcb
9a34627
 
ead2dcb
 
9a34627
 
 
 
 
 
 
 
 
 
ead2dcb
9a34627
 
 
 
ead2dcb
9a34627
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from cProfile import label
from fastapi import File
import gradio as gr
from gib_detect_module import detect
import csv
import torch
import tensorflow as tf


from transformers import AutoModelForSequenceClassification, AutoTokenizer
DLmodel = AutoModelForSequenceClassification.from_pretrained("madhurjindal/autonlp-Gibberish-Detector-492513457", use_auth_token=True)
tokenizer = AutoTokenizer.from_pretrained("madhurjindal/autonlp-Gibberish-Detector-492513457", use_auth_token=True)



def greet(name):
    return "Hello " + name + "!!"

def detect_gibberish(line,f):
    if line:
        if detect(line):
            return "Valid!!!!", None,None
        else:
            return "Bollocks Giberrish",None,None
    elif f:
        return None, annotate_csv(f), None

 


def annotate_csv(f):
    with open(f.name) as csvfile:
        creader = csv.reader(csvfile, delimiter=',', quotechar='"')
    
        with open('out.csv', 'w', newline='') as csvout:
            cwriter = csv.writer(csvout, delimiter=',',
                                quotechar='"', quoting=csv.QUOTE_MINIMAL)
            for row in creader:
                row.append(str(detect(row[0])))
                cwriter.writerow(row) 
            return "out.csv" 


def annotate_csv_deep(f):
    labels = DLmodel.config.id2label
    with open(f.name) as csvfile:
        creader = csv.reader(csvfile, delimiter=',', quotechar='"')
        with open('out.csv', 'w', newline='') as csvout:
            cwriter = csv.writer(csvout, delimiter=',',
                                quotechar='"', quoting=csv.QUOTE_MINIMAL)
            for row in creader:
                inputs = tokenizer(row, return_tensors="pt")
                outputs = DLmodel(**inputs)
                probs = outputs.logits.softmax(dim=-1).detach().cpu().flatten().numpy().tolist()
                idx = probs.index(max(probs))
                
                row.append(labels[idx])
                row.append("{:.0%}".format(probs[idx]) )
                cwriter.writerow(row) 
            return "out.csv" 


def detect_gibberish_deep(line,f):
    if line:
        inputs = tokenizer(line, return_tensors="pt")
        labels = DLmodel.config.id2label
        outputs = DLmodel(**inputs)
        probs = outputs.logits.softmax(dim=-1).detach().cpu().flatten().numpy().tolist()
        output=dict(zip(labels.values(), probs))
        readable_output=""
        for k,v in output.items():
            readable_output+=k+" : "+ "{:.0%}".format(v) + "\n"
        return readable_output, None, output
    if f:
        return None, annotate_csv_deep(f),None



def detect_gibberish_abstract(model, line,f):
    if model == "Deep Learning Model":
        return detect_gibberish_deep(line,f)
    else:
        return detect_gibberish(line, f)


inputLine=gr.inputs.Textbox(lines=1, placeholder="Input text here, if both text and file have values, only the text input will be processed.", default="", label="Text", optional=False)
inputFile=gr.inputs.File(file_count="single", type="file", label="File to Annotate", optional=True)


choices = ["Deep Learning Model", "Markov Chain"]
inputModel=gr.inputs.Dropdown(choices)


outputLine=gr.outputs.Textbox(type="auto", label=None)
outputFile=gr.outputs.File( label="Annotated CSV")
label = gr.outputs.Label(num_top_classes=4)


examples=[
    ["Deep Learning Model","quetzalcoatl","demo_blank.csv"],
    ["Deep Learning Model","aasdf","demo_blank.csv"],
    ["Deep Learning Model","Covfefe","demo_blank.csv"],
    ["Markov Chain","quetzalcoatl","demo_blank.csv"],
    ["Markov Chain","aasdf","demo_blank.csv"],
    ["Markov Chain","Covfefe","demo_blank.csv"],
    ["Deep Learning Model","","demo_bad.txt"],
    ["Deep Learning Model","","demo_mixed.txt"],
    ["Markov Chain","","demo_bad.txt"],
    ["Markov Chain","","demo_mixed.txt"],
]
#iface = gr.Interface(fn=[detect_gibberish], inputs=["text",inputFile], outputs=["text",outputFile],examples=examples, allow_flagging='never')

#iface.launch()


iface = gr.Interface(fn=[detect_gibberish_abstract], inputs=[inputModel,inputLine,inputFile], outputs=["text",outputFile,label],examples=examples, allow_flagging='never')
iface.launch()