Uhhy commited on
Commit
a81956f
1 Parent(s): 14c6d65

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -6
app.py CHANGED
@@ -5,6 +5,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
5
  from tqdm import tqdm
6
  import uvicorn
7
  from dotenv import load_dotenv
 
8
  import re
9
  import spaces
10
 
@@ -43,28 +44,30 @@ class ModelManager:
43
  self.models = []
44
  self.loaded = False
45
 
46
- @spaces.GPU(duration=0)
47
  def load_model(self, model_config):
48
  print(f"Cargando modelo: {model_config['name']}...")
49
  return {"model": Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename']), "name": model_config['name']}
50
 
51
- @spaces.GPU(duration=0)
52
  def load_all_models(self):
53
  if self.loaded:
 
54
  return self.models
55
 
 
56
  with ThreadPoolExecutor() as executor:
57
  futures = [executor.submit(self.load_model, config) for config in model_configs]
58
  models = []
59
- for future in as_completed(futures):
60
  try:
61
  model = future.result()
62
  models.append(model)
 
63
  except Exception as e:
64
- pass
65
 
66
  self.models = models
67
  self.loaded = True
 
68
  return self.models
69
 
70
  model_manager = ModelManager()
@@ -115,6 +118,7 @@ def remove_repetitive_responses(responses):
115
  return unique_responses
116
 
117
  def select_best_response(responses):
 
118
  responses = remove_repetitive_responses(responses)
119
  responses = [remove_duplicates(response['response']) for response in responses]
120
  unique_responses = list(dict.fromkeys(responses))
@@ -126,6 +130,8 @@ async def generate_chat(request: ChatRequest):
126
  if not request.message.strip():
127
  raise HTTPException(status_code=400, detail="The message cannot be empty.")
128
 
 
 
129
  responses = []
130
  num_models = len(global_data['models'])
131
 
@@ -136,12 +142,14 @@ async def generate_chat(request: ChatRequest):
136
  response = future.result()
137
  responses.append(response)
138
  except Exception as exc:
139
- pass
140
 
141
  if not responses:
142
  raise HTTPException(status_code=500, detail="Error: No se generaron respuestas.")
143
 
144
  best_response = select_best_response(responses)
 
 
145
 
146
  return {
147
  "best_response": best_response,
@@ -149,4 +157,4 @@ async def generate_chat(request: ChatRequest):
149
  }
150
 
151
  if __name__ == "__main__":
152
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
5
  from tqdm import tqdm
6
  import uvicorn
7
  from dotenv import load_dotenv
8
+ from difflib import SequenceMatcher
9
  import re
10
  import spaces
11
 
 
44
  self.models = []
45
  self.loaded = False
46
 
 
47
  def load_model(self, model_config):
48
  print(f"Cargando modelo: {model_config['name']}...")
49
  return {"model": Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename']), "name": model_config['name']}
50
 
 
51
  def load_all_models(self):
52
  if self.loaded:
53
+ print("Modelos ya están cargados. No es necesario volver a cargarlos.")
54
  return self.models
55
 
56
+ print("Iniciando carga de modelos...")
57
  with ThreadPoolExecutor() as executor:
58
  futures = [executor.submit(self.load_model, config) for config in model_configs]
59
  models = []
60
+ for future in tqdm(as_completed(futures), total=len(model_configs), desc="Cargando modelos", unit="modelo"):
61
  try:
62
  model = future.result()
63
  models.append(model)
64
+ print(f"Modelo cargado exitosamente: {model['name']}")
65
  except Exception as e:
66
+ print(f"Error al cargar el modelo: {e}")
67
 
68
  self.models = models
69
  self.loaded = True
70
+ print("Todos los modelos han sido cargados.")
71
  return self.models
72
 
73
  model_manager = ModelManager()
 
118
  return unique_responses
119
 
120
  def select_best_response(responses):
121
+ print("Filtrando respuestas...")
122
  responses = remove_repetitive_responses(responses)
123
  responses = [remove_duplicates(response['response']) for response in responses]
124
  unique_responses = list(dict.fromkeys(responses))
 
130
  if not request.message.strip():
131
  raise HTTPException(status_code=400, detail="The message cannot be empty.")
132
 
133
+ print(f"Procesando solicitud: {request.message}")
134
+
135
  responses = []
136
  num_models = len(global_data['models'])
137
 
 
142
  response = future.result()
143
  responses.append(response)
144
  except Exception as exc:
145
+ print(f"Error en la generación de respuesta: {exc}")
146
 
147
  if not responses:
148
  raise HTTPException(status_code=500, detail="Error: No se generaron respuestas.")
149
 
150
  best_response = select_best_response(responses)
151
+
152
+ print(f"Mejor respuesta seleccionada: {best_response}")
153
 
154
  return {
155
  "best_response": best_response,
 
157
  }
158
 
159
  if __name__ == "__main__":
160
+ uvicorn.run(app, host="0.0.0.0", port=7860)