Hjgugugjhuhjggg commited on
Commit
e3c0a4b
·
verified ·
1 Parent(s): 3294219

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -2
app.py CHANGED
@@ -20,7 +20,7 @@ from sklearn.metrics.pairwise import cosine_similarity
20
  from sklearn.feature_extraction.text import TfidfVectorizer
21
  import nltk
22
  from nltk.corpus import stopwords
23
- from huggingface_hub import download_url
24
 
25
  nltk.download('stopwords')
26
 
@@ -250,7 +250,7 @@ async def load_models():
250
  model_path = os.path.join("models", model["filename"])
251
  if not os.path.exists(model_path):
252
  url = f"https://huggingface.co/{model['repo_id']}/resolve/main/{model['filename']}"
253
- download_url(url, model_path)
254
  global_data['models'][model['name']] = Llama(model_path)
255
 
256
  async def generate_model_response(model, inputs):
@@ -297,6 +297,44 @@ iface = gr.Interface(
297
  def run_gradio():
298
  iface.launch(server_port=7862, prevent_thread_lock=True)
299
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  if __name__ == "__main__":
301
  asyncio.run(load_models())
302
  Thread(target=run_uvicorn).start()
 
20
  from sklearn.feature_extraction.text import TfidfVectorizer
21
  import nltk
22
  from nltk.corpus import stopwords
23
+ import wget
24
 
25
  nltk.download('stopwords')
26
 
 
250
  model_path = os.path.join("models", model["filename"])
251
  if not os.path.exists(model_path):
252
  url = f"https://huggingface.co/{model['repo_id']}/resolve/main/{model['filename']}"
253
+ wget.download(url, model_path)
254
  global_data['models'][model['name']] = Llama(model_path)
255
 
256
  async def generate_model_response(model, inputs):
 
297
  def run_gradio():
298
  iface.launch(server_port=7862, prevent_thread_lock=True)
299
 
300
+ def release_resources():
301
+ try:
302
+ torch.cuda.empty_cache()
303
+ gc.collect()
304
+ except Exception as e:
305
+ print(f"Failed to release resources: {e}")
306
+
307
+ def resource_manager():
308
+ MAX_RAM_PERCENT = 1
309
+ MAX_CPU_PERCENT = 1
310
+ MAX_GPU_PERCENT = 1
311
+ MAX_RAM_MB = 1
312
+
313
+ while True:
314
+ try:
315
+ virtual_mem = psutil.virtual_memory()
316
+ current_ram_percent = virtual_mem.percent
317
+ current_ram_mb = virtual_mem.used / (1024 * 1024)
318
+
319
+ if current_ram_percent > MAX_RAM_PERCENT or current_ram_mb > MAX_RAM_MB:
320
+ release_resources()
321
+
322
+ current_cpu_percent = psutil.cpu_percent()
323
+ if current_cpu_percent > MAX_CPU_PERCENT:
324
+ psutil.Process(os.getpid()).nice()
325
+
326
+ if torch.cuda.is_available():
327
+ gpu = torch.cuda.current_device()
328
+ gpu_mem = torch.cuda.memory_percent(gpu)
329
+
330
+ if gpu_mem > MAX_GPU_PERCENT:
331
+ release_resources()
332
+
333
+ except Exception as e:
334
+ print(f"Error in resource manager: {e}")
335
+
336
+ resource_manager()
337
+
338
  if __name__ == "__main__":
339
  asyncio.run(load_models())
340
  Thread(target=run_uvicorn).start()