M4sterStudy commited on
Commit
bc867cd
1 Parent(s): 14c1ab3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -12
app.py CHANGED
@@ -2,7 +2,6 @@ import os
2
  from huggingface_hub import login
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import gradio as gr
5
- import torch
6
 
7
  # Autenticar usando el token almacenado como secreto
8
  hf_token = os.getenv("HF_API_TOKEN")
@@ -11,21 +10,17 @@ login(hf_token)
11
  # Cargar el modelo y el tokenizador
12
  model_name = "DeepESP/gpt2-spanish"
13
  tokenizer = AutoTokenizer.from_pretrained(model_name)
14
- model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")
15
 
16
  def chat_with_gpt2_spanish(input_text):
17
- # Comprobar si la GPU está disponible
18
- device = "cuda" if torch.cuda.is_available() else "cpu"
19
- print(f"Using device: {device}")
20
-
21
- inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512).to(device)
22
  outputs = model.generate(
23
  **inputs,
24
- max_length=100,
25
- num_beams=1,
26
- temperature=0.7,
27
- top_p=0.9,
28
- no_repeat_ngram_size=2,
29
  early_stopping=True
30
  )
31
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
2
  from huggingface_hub import login
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import gradio as gr
 
5
 
6
  # Autenticar usando el token almacenado como secreto
7
  hf_token = os.getenv("HF_API_TOKEN")
 
10
  # Cargar el modelo y el tokenizador
11
  model_name = "DeepESP/gpt2-spanish"
12
  tokenizer = AutoTokenizer.from_pretrained(model_name)
13
+ model = AutoModelForCausalLM.from_pretrained(model_name)
14
 
15
  def chat_with_gpt2_spanish(input_text):
16
+ inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512)
 
 
 
 
17
  outputs = model.generate(
18
  **inputs,
19
+ max_length=30, # Limitar la longitud de la respuesta
20
+ num_beams=1, # Usar solo un haz para velocidad
21
+ temperature=0.7, # Ajustar la temperatura para respuestas menos repetitivas
22
+ top_p=0.9, # Usar top-p (nucleus sampling) para variedad
23
+ no_repeat_ngram_size=2, # Evitar la repetición de n-gramas
24
  early_stopping=True
25
  )
26
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)