import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM # Define the BLOOM model name model_name = "CreitinGameplays/bloom-3b-conversational" # Load tokenizer and model tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) def generate_text(prompt): """Generates text using the BLOOM model from Hugging Face Transformers.""" # Encode the prompt into tokens input_ids = tokenizer(prompt, return_tensors="pt").input_ids # Generate text with the prompt and limit the maximum length to 256 tokens output = model.generate( input_ids=input_ids, max_length=256, num_beams=1, num_return_sequences=1, # Generate only 1 sequence do_sample=True, # Enable sampling for creativity top_k=50, # Sample from the top 50 most likely tokens at each step top_p=0.15, # Filter out highly probable unlikely continuations temperature=0.1, # Control the randomness of the generated text (1.0 for default) repetition_penalty=1.165 ) # Decode the generated token sequence back to text generated_text = tokenizer.decode(output[0], skip_special_tokens=True) return generated_text # Define the Gradio interface interface = gr.Interface( fn=generate_text, inputs=[ gr.Textbox(label="Text Prompt", value="<|system|> You are a helpful AI assistant <|prompter|> What is an AI? <|assistant|>"), ], outputs="text", description="Interact with BLOOM (Loaded with Hugging Face Transformers)", ) # Launch the Gradio interface interface.launch()