kingtest commited on
Commit
1583ce9
·
verified ·
1 Parent(s): e1f903b

Create vps_monitor.py

Browse files
Files changed (1) hide show
  1. vps_monitor.py +232 -0
vps_monitor.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import paramiko
2
+ import schedule
3
+ import time
4
+ import os
5
+ import sys
6
+ from flask import Flask, jsonify, render_template_string
7
+ from threading import Thread
8
+ import logging
9
+
10
+ app = Flask(__name__)
11
+
12
+ vps_status = {}
13
+
14
+ # 设置日志
15
+ logging.basicConfig(
16
+ level=logging.INFO,
17
+ format='%(asctime)s - %(levelname)s - %(message)s',
18
+ handlers=[
19
+ logging.StreamHandler(sys.stdout),
20
+ logging.StreamHandler(sys.stderr)
21
+ ]
22
+ )
23
+ logger = logging.getLogger()
24
+
25
+ def get_vps_configs():
26
+ configs = []
27
+ index = 1
28
+ while True:
29
+ hostname = os.environ.get(f'HOSTNAME_{index}')
30
+ if not hostname:
31
+ break
32
+
33
+ config = {
34
+ 'index': index,
35
+ 'hostname': hostname,
36
+ 'username': os.environ.get(f'USERNAME_{index}'),
37
+ 'password': os.environ.get(f'PASSWORD_{index}'),
38
+ 'script_path': os.environ.get(f'SCRIPT_PATH_{index}')
39
+ }
40
+ configs.append(config)
41
+
42
+ logger.info(f"Config {index}: {config['hostname']}, {config['username']}, {config['script_path']}")
43
+
44
+ index += 1
45
+ return configs
46
+
47
+ def check_and_run_script(config):
48
+ logger.info(f"Checking VPS {config['index']}: {config['hostname']}")
49
+ client = None
50
+ try:
51
+ client = paramiko.SSHClient()
52
+ client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
53
+
54
+ client.connect(
55
+ hostname=config['hostname'],
56
+ username=config['username'],
57
+ password=config['password'],
58
+ port=22
59
+ )
60
+
61
+ script_path = config['script_path']
62
+ script_name = os.path.basename(script_path)
63
+
64
+ # 获取上次保存的 PID(如果有)
65
+ last_pid = vps_status.get(config['index'], {}).get('pid', None)
66
+
67
+ # 使用pgrep检查进程是否正在运行并获取PID
68
+ check_command = f"pgrep -f {script_name}"
69
+ stdin, stdout, stderr = client.exec_command(check_command)
70
+ current_pid = stdout.read().decode('utf-8').strip()
71
+
72
+ if current_pid:
73
+ # 如果有PID,进程在运行
74
+ if current_pid == last_pid:
75
+ # PID与上次一致,进程一直在运行
76
+ status = "Active"
77
+ pid = current_pid
78
+ logger.info(f"Script is running. PID: {pid}")
79
+ else:
80
+ # PID与上次不一致,进程曾经中断过,已被重启
81
+ status = "Restarted"
82
+ pid = current_pid
83
+ logger.info(f"Script has been restarted. New PID: {pid}")
84
+ else:
85
+ logger.info(f"Script not running on {config['hostname']}. Executing restart script.")
86
+ restart_command = f"nohup /bin/sh {script_path} > /dev/null 2>&1 & echo $!"
87
+ stdin, stdout, stderr = client.exec_command(restart_command)
88
+ new_pid = stdout.read().decode('utf-8').strip()
89
+
90
+ if new_pid.isdigit():
91
+ pid = new_pid
92
+ status = "Restarted"
93
+ logger.info(f"Script restarted. New PID: {pid}")
94
+ else:
95
+ pid = "N/A"
96
+ status = "Restart Failed"
97
+ logger.error(f"Failed to restart script")
98
+
99
+ # 获取脚本运行时间
100
+ if pid != "N/A":
101
+ runtime_command = f"ps -p {pid} -o etime"
102
+ stdin, stdout, stderr = client.exec_command(runtime_command)
103
+ output = stdout.read().decode('utf-8').strip()
104
+ runtime = output.split("\n")[-1].strip() if output else "Unknown"
105
+ else:
106
+ runtime = "N/A"
107
+
108
+ vps_status[config['index']] = {
109
+ 'hostname': config['hostname'],
110
+ 'status': status,
111
+ 'last_check': time.strftime('%Y-%m-%d %H:%M:%S'),
112
+ 'username': config['username'],
113
+ 'runtime': runtime,
114
+ 'pid': pid
115
+ }
116
+
117
+ except Exception as e:
118
+ logger.error(f"Error occurred while checking VPS {config['index']} - {config['hostname']}: {str(e)}")
119
+ vps_status[config['index']] = {
120
+ 'hostname': config['hostname'],
121
+ 'status': f"Error: {str(e)}",
122
+ 'last_check': time.strftime('%Y-%m-%d %H:%M:%S'),
123
+ 'username': config['username'],
124
+ 'runtime': "N/A",
125
+ 'pid': "N/A"
126
+ }
127
+ finally:
128
+ if client:
129
+ client.close()
130
+
131
+ def check_all_vps():
132
+ logger.info("Starting VPS check")
133
+ vps_configs = get_vps_configs()
134
+ for config in vps_configs:
135
+ check_and_run_script(config)
136
+
137
+ # 创建表格头
138
+ table = "+---------+-----------------------+----------+-------------------------+----------+-------------------+-------+\n"
139
+ table += "| Index | Hostname | Status | Last Check | Username | Runtime | PID |\n"
140
+ table += "+---------+-----------------------+----------+-------------------------+----------+-------------------+-------+\n"
141
+
142
+ # 添加每个VPS的状态
143
+ for index in sorted(vps_status.keys()):
144
+ status = vps_status[index]
145
+ table += "| {:<7} | {:<21} | {:<8} | {:<23} | {:<8} | {:<17} | {:<5} |\n".format(
146
+ index,
147
+ status['hostname'][:21],
148
+ status['status'][:8],
149
+ status['last_check'],
150
+ status['username'][:8],
151
+ status['runtime'][:17], # 展示运行时间
152
+ status['pid'][:5]
153
+ )
154
+ table += "+---------+-----------------------+----------+-------------------------+----------+-------------------+-------+\n"
155
+
156
+ logger.info("\n" + table)
157
+
158
+ @app.route('/')
159
+ def index():
160
+ sorted_indexes = sorted(vps_status.keys())
161
+ html = '''
162
+ <h1>VPS Status Overview</h1>
163
+ <table border="1">
164
+ <tr>
165
+ <th>Index</th>
166
+ <th>Hostname</th>
167
+ <th>Status</th>
168
+ <th>Last Check</th>
169
+ <th>Username</th>
170
+ <th>Runtime</th>
171
+ <th>PID</th>
172
+ </tr>
173
+ {% for index in sorted_indexes %}
174
+ <tr>
175
+ <td><a href="/status/{{ index }}">{{ index }}</a></td>
176
+ <td>{{ vps_status[index].hostname }}</a></td>
177
+ <td>{{ vps_status[index].status }}</td>
178
+ <td>{{ vps_status[index].last_check }}</td>
179
+ <td>{{ vps_status[index].username }}</td>
180
+ <td>{{ vps_status[index].runtime }}</td>
181
+ <td>{{ vps_status[index].pid }}</td>
182
+ </tr>
183
+ {% endfor %}
184
+ </table>
185
+ '''
186
+ return render_template_string(html, vps_status=vps_status, sorted_indexes=sorted_indexes)
187
+
188
+ @app.route('/status/<int:index>')
189
+ def vps_status_detail(index):
190
+ if index in vps_status:
191
+ return jsonify(vps_status[index])
192
+ else:
193
+ return jsonify({"error": "VPS not found"}), 404
194
+
195
+ @app.route('/health')
196
+ def health_check():
197
+ return jsonify({"status": "healthy", "uptime": time.time() - start_time}), 200
198
+
199
+ def run_flask():
200
+ app.run(host='0.0.0.0', port=8080)
201
+
202
+ def main():
203
+ global start_time
204
+ start_time = time.time()
205
+
206
+ logger.info("===== VPS monitoring script is starting =====")
207
+
208
+ flask_thread = Thread(target=run_flask)
209
+ flask_thread.start()
210
+ logger.info("Flask server started in background")
211
+
212
+ vps_configs = get_vps_configs()
213
+ logger.info(f"Found {len(vps_configs)} VPS configurations")
214
+
215
+ logger.info("Running initial VPS check")
216
+ check_all_vps()
217
+
218
+ schedule.every(15).minutes.do(check_all_vps)
219
+ logger.info("Scheduled VPS check every 15 minutes")
220
+
221
+ logger.info("===== VPS monitoring script is running =====")
222
+
223
+ heartbeat_count = 0
224
+ while True:
225
+ schedule.run_pending()
226
+ time.sleep(60)
227
+ heartbeat_count += 1
228
+ if heartbeat_count % 5 == 0: # 每5分钟输出一次心跳信息
229
+ logger.info(f"Heartbeat: Script is still running. Uptime: {heartbeat_count} minutes")
230
+
231
+ if __name__ == "__main__":
232
+ main()