File size: 7,846 Bytes
1583ce9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
import paramiko
import schedule
import time
import os
import sys
from flask import Flask, jsonify, render_template_string
from threading import Thread
import logging

app = Flask(__name__)

vps_status = {}

# 设置日志
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(sys.stdout),
        logging.StreamHandler(sys.stderr)
    ]
)
logger = logging.getLogger()

def get_vps_configs():
    configs = []
    index = 1
    while True:
        hostname = os.environ.get(f'HOSTNAME_{index}')
        if not hostname:
            break
        
        config = {
            'index': index,
            'hostname': hostname,
            'username': os.environ.get(f'USERNAME_{index}'),
            'password': os.environ.get(f'PASSWORD_{index}'),
            'script_path': os.environ.get(f'SCRIPT_PATH_{index}')
        }
        configs.append(config)
        
        logger.info(f"Config {index}: {config['hostname']}, {config['username']}, {config['script_path']}")
        
        index += 1
    return configs

def check_and_run_script(config):
    logger.info(f"Checking VPS {config['index']}: {config['hostname']}")
    client = None
    try:
        client = paramiko.SSHClient()
        client.set_missing_host_key_policy(paramiko.AutoAddPolicy())

        client.connect(
            hostname=config['hostname'], 
            username=config['username'],
            password=config['password'],
            port=22
        )

        script_path = config['script_path']
        script_name = os.path.basename(script_path)

        # 获取上次保存的 PID(如果有)
        last_pid = vps_status.get(config['index'], {}).get('pid', None)

        # 使用pgrep检查进程是否正在运行并获取PID 
        check_command = f"pgrep -f {script_name}"
        stdin, stdout, stderr = client.exec_command(check_command)
        current_pid = stdout.read().decode('utf-8').strip()

        if current_pid:
            # 如果有PID,进程在运行
            if current_pid == last_pid:
                # PID与上次一致,进程一直在运行
                status = "Active"
                pid = current_pid
                logger.info(f"Script is running. PID: {pid}")
            else:
                # PID与上次不一致,进程曾经中断过,已被重启
                status = "Restarted" 
                pid = current_pid
                logger.info(f"Script has been restarted. New PID: {pid}")
        else:
            logger.info(f"Script not running on {config['hostname']}. Executing restart script.")
            restart_command = f"nohup /bin/sh {script_path} > /dev/null 2>&1 & echo $!"  
            stdin, stdout, stderr = client.exec_command(restart_command)
            new_pid = stdout.read().decode('utf-8').strip()

            if new_pid.isdigit():
                pid = new_pid
                status = "Restarted"
                logger.info(f"Script restarted. New PID: {pid}")
            else:
                pid = "N/A" 
                status = "Restart Failed"
                logger.error(f"Failed to restart script")

        # 获取脚本运行时间 
        if pid != "N/A":
            runtime_command = f"ps -p {pid} -o etime"
            stdin, stdout, stderr = client.exec_command(runtime_command)
            output = stdout.read().decode('utf-8').strip()
            runtime = output.split("\n")[-1].strip() if output else "Unknown"
        else:
            runtime = "N/A"
        
        vps_status[config['index']] = {
            'hostname': config['hostname'], 
            'status': status,
            'last_check': time.strftime('%Y-%m-%d %H:%M:%S'),
            'username': config['username'],
            'runtime': runtime,
            'pid': pid
        }

    except Exception as e:
        logger.error(f"Error occurred while checking VPS {config['index']} - {config['hostname']}: {str(e)}")
        vps_status[config['index']] = {
            'hostname': config['hostname'],
            'status': f"Error: {str(e)}",   
            'last_check': time.strftime('%Y-%m-%d %H:%M:%S'),
            'username': config['username'],
            'runtime': "N/A",
            'pid': "N/A"
        }
    finally:
        if client:
            client.close()

def check_all_vps():
    logger.info("Starting VPS check")
    vps_configs = get_vps_configs()
    for config in vps_configs:
        check_and_run_script(config)
    
    # 创建表格头
    table = "+---------+-----------------------+----------+-------------------------+----------+-------------------+-------+\n"
    table += "| Index   | Hostname              | Status   | Last Check              | Username | Runtime           | PID   |\n"  
    table += "+---------+-----------------------+----------+-------------------------+----------+-------------------+-------+\n"

    # 添加每个VPS的状态
    for index in sorted(vps_status.keys()):
        status = vps_status[index]
        table += "| {:<7} | {:<21} | {:<8} | {:<23} | {:<8} | {:<17} | {:<5} |\n".format(
            index,
            status['hostname'][:21], 
            status['status'][:8],
            status['last_check'],
            status['username'][:8],
            status['runtime'][:17],  # 展示运行时间
            status['pid'][:5]
        )
        table += "+---------+-----------------------+----------+-------------------------+----------+-------------------+-------+\n"
    
    logger.info("\n" + table)

@app.route('/')
def index():
    sorted_indexes = sorted(vps_status.keys())
    html = '''
    <h1>VPS Status Overview</h1>
    <table border="1">
        <tr>
            <th>Index</th>
            <th>Hostname</th>
            <th>Status</th>
            <th>Last Check</th>
            <th>Username</th>
            <th>Runtime</th>
            <th>PID</th>
        </tr>
        {% for index in sorted_indexes %}
        <tr>
            <td><a href="/status/{{ index }}">{{ index }}</a></td>
            <td>{{ vps_status[index].hostname }}</a></td>
            <td>{{ vps_status[index].status }}</td>
            <td>{{ vps_status[index].last_check }}</td>
            <td>{{ vps_status[index].username }}</td>
            <td>{{ vps_status[index].runtime }}</td>
            <td>{{ vps_status[index].pid }}</td>
        </tr>
        {% endfor %}
    </table>
    '''
    return render_template_string(html, vps_status=vps_status, sorted_indexes=sorted_indexes)

@app.route('/status/<int:index>')
def vps_status_detail(index):
    if index in vps_status:
        return jsonify(vps_status[index])
    else:
        return jsonify({"error": "VPS not found"}), 404

@app.route('/health')
def health_check():
    return jsonify({"status": "healthy", "uptime": time.time() - start_time}), 200

def run_flask():
    app.run(host='0.0.0.0', port=8080)

def main():
    global start_time
    start_time = time.time()
    
    logger.info("===== VPS monitoring script is starting =====")
    
    flask_thread = Thread(target=run_flask)
    flask_thread.start()
    logger.info("Flask server started in background")

    vps_configs = get_vps_configs()
    logger.info(f"Found {len(vps_configs)} VPS configurations")

    logger.info("Running initial VPS check")
    check_all_vps()

    schedule.every(15).minutes.do(check_all_vps)
    logger.info("Scheduled VPS check every 15 minutes")
    
    logger.info("===== VPS monitoring script is running =====")
    
    heartbeat_count = 0
    while True:
        schedule.run_pending()
        time.sleep(60)
        heartbeat_count += 1
        if heartbeat_count % 5 == 0:  # 每5分钟输出一次心跳信息
            logger.info(f"Heartbeat: Script is still running. Uptime: {heartbeat_count} minutes")

if __name__ == "__main__":
    main()