Spaces:
Paused
Paused
M4sterStudy
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,6 @@ import os
|
|
2 |
from huggingface_hub import login
|
3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
import gradio as gr
|
5 |
-
import torch
|
6 |
|
7 |
# Autenticar usando el token almacenado como secreto
|
8 |
hf_token = os.getenv("HF_API_TOKEN")
|
@@ -11,21 +10,17 @@ login(hf_token)
|
|
11 |
# Cargar el modelo y el tokenizador
|
12 |
model_name = "DeepESP/gpt2-spanish"
|
13 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
14 |
-
model = AutoModelForCausalLM.from_pretrained(model_name)
|
15 |
|
16 |
def chat_with_gpt2_spanish(input_text):
|
17 |
-
|
18 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
19 |
-
print(f"Using device: {device}")
|
20 |
-
|
21 |
-
inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512).to(device)
|
22 |
outputs = model.generate(
|
23 |
**inputs,
|
24 |
-
max_length=
|
25 |
-
num_beams=1,
|
26 |
-
temperature=0.7,
|
27 |
-
top_p=0.9,
|
28 |
-
no_repeat_ngram_size=2,
|
29 |
early_stopping=True
|
30 |
)
|
31 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
2 |
from huggingface_hub import login
|
3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
import gradio as gr
|
|
|
5 |
|
6 |
# Autenticar usando el token almacenado como secreto
|
7 |
hf_token = os.getenv("HF_API_TOKEN")
|
|
|
10 |
# Cargar el modelo y el tokenizador
|
11 |
model_name = "DeepESP/gpt2-spanish"
|
12 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
13 |
+
model = AutoModelForCausalLM.from_pretrained(model_name)
|
14 |
|
15 |
def chat_with_gpt2_spanish(input_text):
|
16 |
+
inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512)
|
|
|
|
|
|
|
|
|
17 |
outputs = model.generate(
|
18 |
**inputs,
|
19 |
+
max_length=30, # Limitar la longitud de la respuesta
|
20 |
+
num_beams=1, # Usar solo un haz para velocidad
|
21 |
+
temperature=0.7, # Ajustar la temperatura para respuestas menos repetitivas
|
22 |
+
top_p=0.9, # Usar top-p (nucleus sampling) para variedad
|
23 |
+
no_repeat_ngram_size=2, # Evitar la repetici贸n de n-gramas
|
24 |
early_stopping=True
|
25 |
)
|
26 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|