Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -20,7 +20,7 @@ from sklearn.metrics.pairwise import cosine_similarity
|
|
20 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
21 |
import nltk
|
22 |
from nltk.corpus import stopwords
|
23 |
-
|
24 |
|
25 |
nltk.download('stopwords')
|
26 |
|
@@ -250,7 +250,7 @@ async def load_models():
|
|
250 |
model_path = os.path.join("models", model["filename"])
|
251 |
if not os.path.exists(model_path):
|
252 |
url = f"https://huggingface.co/{model['repo_id']}/resolve/main/{model['filename']}"
|
253 |
-
|
254 |
global_data['models'][model['name']] = Llama(model_path)
|
255 |
|
256 |
async def generate_model_response(model, inputs):
|
@@ -297,6 +297,44 @@ iface = gr.Interface(
|
|
297 |
def run_gradio():
|
298 |
iface.launch(server_port=7862, prevent_thread_lock=True)
|
299 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
300 |
if __name__ == "__main__":
|
301 |
asyncio.run(load_models())
|
302 |
Thread(target=run_uvicorn).start()
|
|
|
20 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
21 |
import nltk
|
22 |
from nltk.corpus import stopwords
|
23 |
+
import wget
|
24 |
|
25 |
nltk.download('stopwords')
|
26 |
|
|
|
250 |
model_path = os.path.join("models", model["filename"])
|
251 |
if not os.path.exists(model_path):
|
252 |
url = f"https://huggingface.co/{model['repo_id']}/resolve/main/{model['filename']}"
|
253 |
+
wget.download(url, model_path)
|
254 |
global_data['models'][model['name']] = Llama(model_path)
|
255 |
|
256 |
async def generate_model_response(model, inputs):
|
|
|
297 |
def run_gradio():
|
298 |
iface.launch(server_port=7862, prevent_thread_lock=True)
|
299 |
|
300 |
+
def release_resources():
|
301 |
+
try:
|
302 |
+
torch.cuda.empty_cache()
|
303 |
+
gc.collect()
|
304 |
+
except Exception as e:
|
305 |
+
print(f"Failed to release resources: {e}")
|
306 |
+
|
307 |
+
def resource_manager():
|
308 |
+
MAX_RAM_PERCENT = 1
|
309 |
+
MAX_CPU_PERCENT = 1
|
310 |
+
MAX_GPU_PERCENT = 1
|
311 |
+
MAX_RAM_MB = 1
|
312 |
+
|
313 |
+
while True:
|
314 |
+
try:
|
315 |
+
virtual_mem = psutil.virtual_memory()
|
316 |
+
current_ram_percent = virtual_mem.percent
|
317 |
+
current_ram_mb = virtual_mem.used / (1024 * 1024)
|
318 |
+
|
319 |
+
if current_ram_percent > MAX_RAM_PERCENT or current_ram_mb > MAX_RAM_MB:
|
320 |
+
release_resources()
|
321 |
+
|
322 |
+
current_cpu_percent = psutil.cpu_percent()
|
323 |
+
if current_cpu_percent > MAX_CPU_PERCENT:
|
324 |
+
psutil.Process(os.getpid()).nice()
|
325 |
+
|
326 |
+
if torch.cuda.is_available():
|
327 |
+
gpu = torch.cuda.current_device()
|
328 |
+
gpu_mem = torch.cuda.memory_percent(gpu)
|
329 |
+
|
330 |
+
if gpu_mem > MAX_GPU_PERCENT:
|
331 |
+
release_resources()
|
332 |
+
|
333 |
+
except Exception as e:
|
334 |
+
print(f"Error in resource manager: {e}")
|
335 |
+
|
336 |
+
resource_manager()
|
337 |
+
|
338 |
if __name__ == "__main__":
|
339 |
asyncio.run(load_models())
|
340 |
Thread(target=run_uvicorn).start()
|