import os import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # Inserir a chave da API da Hugging Face os.environ["HF_API_TOKEN"] = "sua_chave_da_api_aqui" # Carregar os modelos disponíveis models = { "GPT-MUX (Code)": { "tokenizer": AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b", use_auth_token=True), "model": AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b", torch_dtype=torch.float16, device_map="auto", use_auth_token=True) }, "Mistral": { "tokenizer": AutoTokenizer.from_pretrained("mistral_model_endpoint", use_auth_token=True), "model": AutoModelForCausalLM.from_pretrained("mistral_model_endpoint", use_auth_token=True) }, "CodeLlama": { "tokenizer": AutoTokenizer.from_pretrained("codellama_model_endpoint", use_auth_token=True), "model": AutoModelForCausalLM.from_pretrained("codellama_model_endpoint", use_auth_token=True) } # Outros modelos podem ser adicionados aqui futuramente } # Função para gerar resposta com base no modelo selecionado def generate_response(model_choice, input_data, max_tokens, temperature, top_p): selected_model = models[model_choice] tokenizer = selected_model["tokenizer"] model = selected_model["model"] inputs = tokenizer(input_data, return_tensors="pt").to("cuda") outputs = model.generate(inputs.input_ids, max_length=max_tokens, temperature=temperature, top_p=top_p) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response def generate_response_from_file(file, model_choice, max_tokens, temperature, top_p): if file is None: return "Por favor, envie um arquivo." content = file.read().decode("utf-8") # Assumindo que o arquivo seja um texto selected_model = models[model_choice] tokenizer = selected_model["tokenizer"] model = selected_model["model"] inputs = tokenizer(content, return_tensors="pt").to("cuda") outputs = model.generate(inputs.input_ids, max_length=max_tokens, temperature=temperature, top_p=top_p) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response # Interface Gradio com múltiplas opções de modelo demo = gr.Blocks() with demo: with gr.Tabs(): with gr.Tab("Texto"): # Seletor de modelo model_selector_text = gr.Dropdown(list(models.keys()), label="Selecione o Modelo") # Entrada de dados input_box = gr.Textbox(label="Insira o Texto") # Sliders de parâmetros max_tokens_slider_text = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens") temperature_slider_text = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature") top_p_slider_text = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)") # Saída do texto gerado output_text = gr.Textbox(label="Resultado Gerado") # Botão para gerar resposta generate_btn_text = gr.Button("Gerar Resposta") generate_btn_text.click(generate_response, inputs=[model_selector_text, input_box, max_tokens_slider_text, temperature_slider_text, top_p_slider_text], outputs=output_text) with gr.Tab("Upload de Arquivo"): # Seletor de modelo model_selector_file = gr.Dropdown(list(models.keys()), label="Selecione o Modelo") # Upload de arquivo file_input = gr.File(label="Envie um arquivo de texto") # Sliders de parâmetros max_tokens_slider_file = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens") temperature_slider_file = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature") top_p_slider_file = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)") # Saída do texto gerado output_file = gr.Textbox(label="Resultado Gerado") # Botão para gerar resposta generate_btn_file = gr.Button("Gerar Resposta") generate_btn_file.click(generate_response_from_file, inputs=[file_input, model_selector_file, max_tokens_slider_file, temperature_slider_file, top_p_slider_file], outputs=output_file) # Lançar a interface if __name__ == "__main__": demo.launch()