Spaces:
Running
Running
File size: 2,241 Bytes
4a3c603 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# coding=utf-8
# author: xusong <[email protected]>
# time: 2022/9/05 14:12
"""
TODO: 还要能判断是否需要回复。
"""
import torch
import gradio as gr
from info import article
from kplug import modeling_kplug_s2s_patch
from transformers import BertTokenizer, BartForConditionalGeneration
model = BartForConditionalGeneration.from_pretrained("eson/kplug-base-jddc")
tokenizer = BertTokenizer.from_pretrained("eson/kplug-base-jddc")
def predict(input, history=[]):
"""
拼接方案:直接拼接history作为输入,不区分角色。虽然简单粗糙,但是encoder-decoder架构不会混淆输入和输出(如果是gpt架构就需要区分角色了)。
"""
# append the new user input tokens to the chat history
history = history + [input] # history如果包含错误的response,可能会造成误差传递
# tokenize the new input sentence
bot_input_ids = tokenizer.encode("".join(history)[-500:], return_tensors='pt')
# bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1)
# generate a response
response = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id).tolist()
# convert the tokens to text, and then split the responses into lines
response = "".join(tokenizer.decode(response[0], skip_special_tokens=True).split())
history = history + [response]
response = [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)] # convert to tuples of list
return response, history
jddc_examples = [
# 价保
"昨天刚买的怎么就降了几十块,应该补给我差价吧",
"请问这个猕猴桃是有货的吗?",
# 到货时间
"我下的这个单怎么还没到",
# 快递
"发什么快递",
"能发邮政吗",
]
jddc_iface = gr.Interface(
fn=predict,
# inputs=["text", "state"],
inputs=[
gr.Textbox(
label="输入文本",
value="发什么快递"), # gr.State() 报错
"state"
],
outputs=["chatbot", "state"],
examples=jddc_examples,
title="电商客服-生成式对话(Response Generation)",
article=article,
)
if __name__ == "__main__":
jddc_iface.launch()
|