Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,7 @@ import uvicorn
|
|
7 |
from dotenv import load_dotenv
|
8 |
from difflib import SequenceMatcher
|
9 |
import re
|
|
|
10 |
|
11 |
# Cargar variables de entorno
|
12 |
load_dotenv()
|
@@ -36,12 +37,17 @@ model_configs = [
|
|
36 |
class ModelManager:
|
37 |
def __init__(self):
|
38 |
self.models = []
|
|
|
39 |
|
40 |
def load_model(self, model_config):
|
41 |
print(f"Cargando modelo: {model_config['name']}...")
|
42 |
return {"model": Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename']), "name": model_config['name']}
|
43 |
|
44 |
def load_all_models(self):
|
|
|
|
|
|
|
|
|
45 |
print("Iniciando carga de modelos...")
|
46 |
with ThreadPoolExecutor(max_workers=len(model_configs)) as executor:
|
47 |
futures = [executor.submit(self.load_model, config) for config in model_configs]
|
@@ -53,11 +59,16 @@ class ModelManager:
|
|
53 |
print(f"Modelo cargado exitosamente: {model['name']}")
|
54 |
except Exception as e:
|
55 |
print(f"Error al cargar el modelo: {e}")
|
|
|
|
|
|
|
56 |
print("Todos los modelos han sido cargados.")
|
57 |
-
return models
|
58 |
|
59 |
-
# Instanciar ModelManager
|
60 |
model_manager = ModelManager()
|
|
|
|
|
61 |
global_data['models'] = model_manager.load_all_models()
|
62 |
|
63 |
# Modelo global para la solicitud de chat
|
@@ -68,6 +79,7 @@ class ChatRequest(BaseModel):
|
|
68 |
temperature: float = 0.7
|
69 |
|
70 |
# Función para generar respuestas de chat
|
|
|
71 |
def generate_chat_response(request, model_data):
|
72 |
try:
|
73 |
user_input = normalize_input(request.message)
|
|
|
7 |
from dotenv import load_dotenv
|
8 |
from difflib import SequenceMatcher
|
9 |
import re
|
10 |
+
import spaces # Importar la librería spaces
|
11 |
|
12 |
# Cargar variables de entorno
|
13 |
load_dotenv()
|
|
|
37 |
class ModelManager:
|
38 |
def __init__(self):
|
39 |
self.models = []
|
40 |
+
self.loaded = False # Para verificar si ya están cargados
|
41 |
|
42 |
def load_model(self, model_config):
|
43 |
print(f"Cargando modelo: {model_config['name']}...")
|
44 |
return {"model": Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename']), "name": model_config['name']}
|
45 |
|
46 |
def load_all_models(self):
|
47 |
+
if self.loaded: # Si los modelos ya están cargados, no los vuelve a cargar
|
48 |
+
print("Modelos ya están cargados. No es necesario volver a cargarlos.")
|
49 |
+
return self.models
|
50 |
+
|
51 |
print("Iniciando carga de modelos...")
|
52 |
with ThreadPoolExecutor(max_workers=len(model_configs)) as executor:
|
53 |
futures = [executor.submit(self.load_model, config) for config in model_configs]
|
|
|
59 |
print(f"Modelo cargado exitosamente: {model['name']}")
|
60 |
except Exception as e:
|
61 |
print(f"Error al cargar el modelo: {e}")
|
62 |
+
|
63 |
+
self.models = models
|
64 |
+
self.loaded = True # Marcar como cargados
|
65 |
print("Todos los modelos han sido cargados.")
|
66 |
+
return self.models
|
67 |
|
68 |
+
# Instanciar ModelManager
|
69 |
model_manager = ModelManager()
|
70 |
+
|
71 |
+
# Cargar modelos al iniciar la aplicación, solo la primera vez
|
72 |
global_data['models'] = model_manager.load_all_models()
|
73 |
|
74 |
# Modelo global para la solicitud de chat
|
|
|
79 |
temperature: float = 0.7
|
80 |
|
81 |
# Función para generar respuestas de chat
|
82 |
+
@spaces.GPU(duration=0) # Anotación para usar GPU con duración 0
|
83 |
def generate_chat_response(request, model_data):
|
84 |
try:
|
85 |
user_input = normalize_input(request.message)
|