Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| from transformers import GPT2Tokenizer, GPT2LMHeadModel | |
| import torch | |
| model = GPT2LMHeadModel.from_pretrained('raihanp/medical-chatbot') | |
| tokenizer = GPT2Tokenizer.from_pretrained('openai-community/gpt2-medium') | |
| def chat_with_bot(prompt): | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| # Tokenize the input | |
| inputs = tokenizer(f"[INST] {prompt} \n[/INST]", return_tensors="pt", padding=True) | |
| length = 18 + len(prompt) | |
| # Generate a response | |
| with torch.no_grad(): | |
| outputs = model.generate(inputs['input_ids'], attention_mask=inputs['attention_mask'],max_new_tokens=50, num_return_sequences=1, pad_token_id=tokenizer.pad_token_id) | |
| # Decode and return the response | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return response[length:] | |
| iface = gr.Interface(fn=chat_with_bot, inputs="text", outputs="text") | |
| iface.launch() |