GIGAChat-farsi / app.py
GIGAParviz's picture
Upload app.py
4fd7fd0 verified
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, GenerationConfig
import re
import time
tokenizer = AutoTokenizer.from_pretrained("GIGAParviz/T5_fa_law_chatbot")
model = AutoModelForSeq2SeqLM.from_pretrained("GIGAParviz/T5_fa_law_chatbot")
def clear_text(text):
if isinstance(text, str):
text = re.sub("[^آ-ی۰-۹]+", " ", text)
else:
text = str(text)
return text
def generate_response(message, chat_history):
question = clear_text(message)
start = "Answer The Question in farsi: "
end = "Answer: "
prompt = [start + question + end]
generation_config = GenerationConfig(
max_new_tokens=128,
do_sample=True,
top_k=50,
top_p=0.95,
temperature=0.8,
repetition_penalty=1.2
)
tokenized_test_text = tokenizer(prompt, return_tensors='pt').input_ids.to("cpu")
model.to("cpu")
outputs = model.generate(tokenized_test_text, generation_config=generation_config, max_new_tokens=128)
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
for i in range(0, len(result), 10):
yield chat_history + [(message, result[:i + 10])]
time.sleep(0.1)
yield chat_history + [(message, result)]
with gr.Blocks() as demo:
gr.Markdown("<h1 style='text-align: center;'>💬 Legal Chatbot</h1><p style='text-align: center;'>made by A.M.Parviz \</p>")
chatbot = gr.Chatbot(label="جواب")
msg = gr.Textbox(label="ورودی", placeholder="سوال حقوقی خودتون رو بپرسید", lines=1)
msg.submit(generate_response, [msg, chatbot], chatbot)
clear = gr.ClearButton([msg, chatbot])
demo.launch()