eloi-goncalves commited on
Commit
9fd0422
·
1 Parent(s): 4922309

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -204
app.py CHANGED
@@ -1,210 +1,26 @@
1
- from transformers import AutoModelForQuestionAnswering, AutoModelForSeq2SeqLM, AutoTokenizer, PegasusForConditionalGeneration, PegasusTokenizer, pipeline
2
- import gradio as grad
3
- import ast
4
 
5
- # mdl_name = "deepset/roberta-base-squad2"
6
- # my_pipeline = pipeline('question-answering', model=mdl_name, tokenizer=mdl_name)
7
 
8
- # model_translate_name = 'danhsf/m2m100_418M-finetuned-kde4-en-to-pt_BR'
9
- # model_translate = AutoModelForSeq2SeqLM.from_pretrained(model_translate_name)
10
- # model_translate_token = AutoTokenizer.from_pretrained(model_translate_name)
11
- # translate_pipeline = pipeline('translation', model=model_translate_name)
12
 
13
- def answer_question(question,context):
14
- text= "{"+"'question': '"+question+"','context': '"+context+"'}"
15
- di=ast.literal_eval(text)
16
- response = my_pipeline(di)
17
- print('response', response)
18
- return response
19
- #grad.Interface(answer_question, inputs=["text","text"], outputs="text").launch()
20
 
21
 
22
- def translate(text):
23
- inputs = model_translate_token(text, return_tensor='pt')
24
- translate_output = model_translate.generate(**inputs)
25
- response = model_translate_token(translate_output[0], skip_special_tokens=True)
26
- #response = translate_pipeline(text)
27
- return response
28
- # grad.Interface(translate, inputs=['text',], outputs='text').launch()
29
 
30
-
31
- # mdl_name = "google/pegasus-xsum"
32
- # pegasus_tkn = PegasusTokenizer.from_pretrained(mdl_name)
33
- # mdl = PegasusForConditionalGeneration.from_pretrained(mdl_name)
34
-
35
- def summarize(text):
36
- tokens = pegasus_tkn(text, truncation=True, padding="longest", return_tensors="pt")
37
- txt_summary = mdl.generate(**tokens, num_return_sequences=5, max_length=200, temperature=1.5,num_beams=10)
38
- response = pegasus_tkn.batch_decode(txt_summary, skip_special_tokens=True)
39
- return response
40
-
41
- # txt=grad.Textbox(lines=10, label="English", placeholder="English Text here")
42
- # out=grad.Textbox(lines=10, label="Summary")
43
- # grad.Interface(summarize, inputs=txt, outputs=out).launch()
44
-
45
- # ZeroShotClassification using pipeline
46
- # from transformers import pipeline
47
- # import gradio as grad
48
- # zero_shot_classifier = pipeline("zero-shot-classification")
49
- def classify(text,labels):
50
- classifer_labels = labels.split(",")
51
- #["software", "politics", "love", "movies", "emergency", "advertisment","sports"]
52
- response = zero_shot_classifier(text,classifer_labels)
53
- return response
54
- # txt=grad.Textbox(lines=1, label="English", placeholder="text to be classified")
55
- # labels=grad.Textbox(lines=1, label="Labels", placeholder="comma separated labels")
56
- # out=grad.Textbox(lines=1, label="Classification")
57
- # grad.Interface(classify, inputs=[txt,labels], outputs=out).launch()
58
-
59
- # Text classification using BartForSequenceClassification
60
- # from transformers import BartForSequenceClassification, BartTokenizer
61
- # import gradio as grad
62
- # bart_tkn = BartTokenizer.from_pretrained('facebook/bart-large-mnli')
63
- # mdl = BartForSequenceClassification.from_pretrained('facebook/bart-large-mnli')
64
- def classify(text,label):
65
- tkn_ids = bart_tkn.encode(text, label, return_tensors='pt')
66
- tkn_lgts = mdl(tkn_ids)[0]
67
- entail_contra_tkn_lgts = tkn_lgts[:,[0,2]]
68
- probab = entail_contra_tkn_lgts.softmax(dim=1)
69
- response = probab[:,1].item() * 100
70
- return response
71
- # txt=grad.Textbox(lines=1, label="English", placeholder="text to be classified")
72
- # labels=grad.Textbox(lines=1, label="Label", placeholder="Input a Label")
73
- # out=grad.Textbox(lines=1, label="Probablity of label being true is")
74
- # grad.Interface(classify, inputs=[txt,labels], outputs=out).launch()
75
-
76
- # GPT2
77
- # from transformers import GPT2LMHeadModel,GPT2Tokenizer
78
- # import gradio as grad
79
- # mdl = GPT2LMHeadModel.from_pretrained('gpt2')
80
- # gpt2_tkn=GPT2Tokenizer.from_pretrained('gpt2')
81
- def generate(starting_text):
82
- tkn_ids = gpt2_tkn.encode(starting_text, return_tensors = 'pt')
83
- gpt2_tensors = mdl.generate(tkn_ids,max_length=100,no_repeat_ngram_size=True,num_beams=3,do_sample=True)
84
- response=""
85
- #response = gpt2_tensors
86
- for i, x in enumerate(gpt2_tensors):
87
- response=response+f"{i}: {gpt2_tkn.decode(x, skip_special_tokens=True)}"
88
- return response
89
- # txt=grad.Textbox(lines=1, label="English", placeholder="English Text here")
90
- # out=grad.Textbox(lines=1, label="Generated Text")
91
- # grad.Interface(generate, inputs=txt, outputs=out).launch()
92
-
93
- #DistlGPT2
94
- # from transformers import pipeline, set_seed
95
- # import gradio as grad
96
- # gpt2_pipe = pipeline('text-generation', model='distilgpt2')
97
- # set_seed(42)
98
- def generateDistlGPT2(starting_text):
99
- response= gpt2_pipe(starting_text, max_length=20, num_return_sequences=5)
100
- return response
101
- # txt=grad.Textbox(lines=1, label="English", placeholder="English Text here")
102
- # out=grad.Textbox(lines=1, label="Generated Text")
103
- # grad.Interface(generateDistlGPT2, inputs=txt, outputs=out).launch()
104
-
105
- #Text Generation
106
- #Question Generation
107
- # from transformers import AutoModelWithLMHead, AutoTokenizer
108
- # import gradio as grad
109
- # text2text_tkn = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
110
- # mdl = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
111
- def text2text(context,answer):
112
- input_text = "answer: %s context: %s </s>" % (answer, context)
113
- features = text2text_tkn ([input_text], return_tensors='pt')
114
- output = mdl.generate(input_ids=features['input_ids'],
115
- attention_mask=features['attention_mask'],
116
- max_length=64)
117
- response=text2text_tkn.decode(output[0])
118
- return response
119
- # context=grad.Textbox(lines=10, label="English", placeholder="Context")
120
- # ans=grad.Textbox(lines=1, label="Answer")
121
- # out=grad.Textbox(lines=1, label="Genereated Question")
122
- # grad.Interface(text2text, inputs=[context,ans], outputs=out).launch()
123
-
124
- #T5 summaryzer
125
- # from transformers import AutoTokenizer, AutoModelWithLMHead
126
- # import gradio as grad
127
- # text2text_tkn = AutoTokenizer.from_pretrained("deep-learning-analytics/wikihow-t5-small")
128
- # mdl = AutoModelWithLMHead.from_pretrained("deep-learning-analytics/wikihow-t5-small")
129
- def text2text_summary(para):
130
- initial_txt = para.strip().replace("\n","")
131
- tkn_text = text2text_tkn.encode(initial_txt, return_tensors="pt")
132
- tkn_ids = mdl.generate(
133
- tkn_text,
134
- max_length=250,
135
- num_beams=5,
136
- repetition_penalty=2.5,
137
- early_stopping=True
138
- )
139
- response = text2text_tkn.decode(tkn_ids[0], skip_special_tokens=True)
140
- return response
141
- # para=grad.Textbox(lines=10, label="Paragraph", placeholder="Copy paragraph")
142
- # out=grad.Textbox(lines=1, label="Summary")
143
- # grad.Interface(text2text_summary, inputs=para, outputs=out).launch()
144
-
145
- # T5 Translate
146
- # from transformers import T5ForConditionalGeneration, T5Tokenizer
147
- # import gradio as grad
148
- # text2text_tkn= T5Tokenizer.from_pretrained("t5-small")
149
- # mdl = T5ForConditionalGeneration.from_pretrained("t5-small")
150
- def text2text_translation(text):
151
- inp = "translate English to Portuguese: "+text
152
- enc = text2text_tkn(inp, return_tensors="pt", max_length=512, truncation=True)
153
- tokens = mdl.generate(**enc, max_length=100, num_return_sequences=1, early_stopping=True)
154
- response=text2text_tkn.decode(tokens[0], skip_special_tokens=True)
155
- return response
156
- # para=grad.Textbox(lines=1, label="English Text", placeholder="Text in English")
157
- # out=grad.Textbox(lines=1, label="Portuguese Translation")
158
- # grad.Interface(text2text_translation, inputs=para, outputs=out).launch()
159
-
160
-
161
-
162
- # ChatBot
163
- from transformers import AutoModelForCausalLM, AutoTokenizer,BlenderbotForConditionalGeneration
164
- import torch
165
- chat_tkn = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
166
- mdl = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
167
- #chat_tkn = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
168
- #mdl = BlenderbotForConditionalGeneration.from_pretrained("facebook/blenderbot-400M-distill")
169
-
170
-
171
- def converse(user_input, chat_history=[]):
172
- user_input_ids = chat_tkn(user_input + chat_tkn.eos_token, return_tensors='pt').input_ids
173
- # keep history in the tensor
174
- bot_input_ids = torch.cat([torch.LongTensor(chat_history), user_input_ids], dim=-1)
175
- # get response
176
- chat_history = mdl.generate(bot_input_ids, max_length=1000, pad_token_id=chat_tkn.eos_token_id).tolist()
177
- print (chat_history)
178
- response = chat_tkn.decode(chat_history[0]).split("<|endoftext|>")
179
- print("starting to print response")
180
- print(response)
181
- # html for display
182
- html = "<div class='mychat'>"
183
- for x, mesg in enumerate(response):
184
- if x%2!=0 :
185
- mesg="Alicia:"+mesg
186
- clazz="alicia"
187
- else :
188
- clazz="user"
189
- print("value of x")
190
- print(x)
191
- print("message")
192
- print (mesg)
193
- html += "<div class='mesg {}'> {}</div>".format(clazz, mesg)
194
- html += "</div>"
195
- print(html)
196
- return html, chat_history
197
- import gradio as grad
198
- css = """
199
- .mychat {display:flex;flex-direction:column}
200
- .mesg {padding:5px;margin-bottom:5px;border-radius:5px;width:75%}
201
- .mesg.user {background-color:lightblue;color:white}
202
- .mesg.alicia {background-color:orange;color:white,align-self:self-end}
203
- .footer {display:none !important}
204
- """
205
- text=grad.inputs.Textbox(placeholder="Lets chat")
206
- grad.Interface(fn=converse,
207
- theme="default",
208
- inputs=[text, "state"],
209
- outputs=["html", "state"],
210
- css=css).launch()
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import numpy as np
4
 
5
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
 
6
 
7
+ def transcribe(stream, new_chunk):
8
+ sr, y = new_chunk
9
+ y = y.astype(np.float32)
10
+ y /= np.max(np.abs(y))
11
 
12
+ if stream is not None:
13
+ stream = np.concatenate([stream, y])
14
+ else:
15
+ stream = y
16
+ return stream, transcriber({"sampling_rate": sr, "raw": stream})["text"]
 
 
17
 
18
 
19
+ demo = gr.Interface(
20
+ transcribe,
21
+ ["state", gr.Audio(source="microphone", streaming=True)],
22
+ ["state", "text"],
23
+ live=True,
24
+ )
 
25
 
26
+ demo.launch()