File size: 1,046 Bytes
113dbd0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# poll the progress.txt file forever
import os
from datetime import datetime
from time import sleep

from loguru import logger

while True:
    try:
        with open("progress.txt", "r") as f:
            progress = f.read()
            last_mod_time = datetime.fromtimestamp(os.path.getmtime("progress.txt"))
            if (datetime.now() - last_mod_time).seconds > 60 * 7:
                # no progress for 7 minutes, restart/kill with -9
                logger.info("restarting server to fix cuda issues (device side asserts)")
                os.system("/usr/bin/bash kill -SIGHUP `pgrep gunicorn`")
                os.system("/usr/bin/bash kill -SIGHUP `pgrep uvicorn`")
                os.system("kill -9 `pgrep gunicorn`")
                os.system("kill -9 `pgrep uvicorn`")
                os.system("killall -9 uvicorn")
                os.system("ps | grep uvicorn | awk '{print $1}' | xargs kill -9")

            if progress == "done":
                break
    except Exception as e:
        print(e)
        pass
    sleep(60*5)