import re import torch import gradio as gr from transformers import GPT2LMHeadModel, GPT2Tokenizer # Load the model and tokenizer from Hugging Face repository model_repo_id = "Ajay12345678980/QA_bot" # Replace with your model repository ID # Initialize the model and tokenizer model = GPT2LMHeadModel.from_pretrained(model_repo_id) tokenizer = GPT2Tokenizer.from_pretrained(model_repo_id) # Define the prediction function def generate_answer(question): input_ids = tokenizer.encode(question, return_tensors="pt").to("cuda") # Create the attention mask and pad token id attention_mask = torch.ones_like(input_ids).to("cuda") pad_token_id = tokenizer.eos_token_id #output = model[0].generate( output = model.generate( input_ids, max_new_tokens=100, num_return_sequences=1, attention_mask=attention_mask, pad_token_id=pad_token_id ) decoded_output = tokenizer.decode(output[0], skip_special_tokens=True) start_index = decoded_output.find("Answer") end_index = decoded_output.find("") if end_index != -1: # Extract the text between "Answer" and "" answer_text = decoded_output[start_index + len("Answer"):end_index].strip() return answer_text else: # If "" is not found, return the text following "Answer" answer_text = decoded_output[start_index + len("Answer"):].strip() return answer_text #return tokenizer.decode(output[0], skip_special_tokens=True) #return tokenizer.decode(output, skip_special_tokens=True) # Gradio interface setup interface = gr.Interface( fn=predict, inputs="text", outputs="text", title="GPT-2 Text Generation", description="Enter some text and see what the model generates!" ) # Launch the Gradio app if __name__ == "__main__": interface.launch()