import paramiko import schedule import time import os import sys from flask import Flask, jsonify, render_template_string from threading import Thread import logging app = Flask(__name__) vps_status = {} # 设置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.StreamHandler(sys.stdout), logging.StreamHandler(sys.stderr) ] ) logger = logging.getLogger() def get_vps_configs(): configs = [] index = 1 while True: hostname = os.environ.get(f'HOSTNAME_{index}') if not hostname: break config = { 'index': index, 'hostname': hostname, 'username': os.environ.get(f'USERNAME_{index}'), 'password': os.environ.get(f'PASSWORD_{index}'), 'script_path': os.environ.get(f'SCRIPT_PATH_{index}') } configs.append(config) logger.info(f"Config {index}: {config['hostname']}, {config['username']}, {config['script_path']}") index += 1 return configs def check_and_run_script(config): logger.info(f"Checking VPS {config['index']}: {config['hostname']}") client = None try: client = paramiko.SSHClient() client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) client.connect( hostname=config['hostname'], username=config['username'], password=config['password'], port=22 ) script_path = config['script_path'] script_name = os.path.basename(script_path) # 获取上次保存的 PID(如果有) last_pid = vps_status.get(config['index'], {}).get('pid', None) # 使用pgrep检查进程是否正在运行并获取PID check_command = f"pgrep -f {script_name}" stdin, stdout, stderr = client.exec_command(check_command) current_pid = stdout.read().decode('utf-8').strip() if current_pid: # 如果有PID,进程在运行 if current_pid == last_pid: # PID与上次一致,进程一直在运行 status = "Active" pid = current_pid logger.info(f"Script is running. PID: {pid}") else: # PID与上次不一致,进程曾经中断过,已被重启 status = "Restarted" pid = current_pid logger.info(f"Script has been restarted. New PID: {pid}") else: logger.info(f"Script not running on {config['hostname']}. Executing restart script.") restart_command = f"nohup /bin/sh {script_path} > /dev/null 2>&1 & echo $!" stdin, stdout, stderr = client.exec_command(restart_command) new_pid = stdout.read().decode('utf-8').strip() if new_pid.isdigit(): pid = new_pid status = "Restarted" logger.info(f"Script restarted. New PID: {pid}") else: pid = "N/A" status = "Restart Failed" logger.error(f"Failed to restart script") # 获取脚本运行时间 if pid != "N/A": runtime_command = f"ps -p {pid} -o etime" stdin, stdout, stderr = client.exec_command(runtime_command) output = stdout.read().decode('utf-8').strip() runtime = output.split("\n")[-1].strip() if output else "Unknown" else: runtime = "N/A" vps_status[config['index']] = { 'hostname': config['hostname'], 'status': status, 'last_check': time.strftime('%Y-%m-%d %H:%M:%S'), 'username': config['username'], 'runtime': runtime, 'pid': pid } except Exception as e: logger.error(f"Error occurred while checking VPS {config['index']} - {config['hostname']}: {str(e)}") vps_status[config['index']] = { 'hostname': config['hostname'], 'status': f"Error: {str(e)}", 'last_check': time.strftime('%Y-%m-%d %H:%M:%S'), 'username': config['username'], 'runtime': "N/A", 'pid': "N/A" } finally: if client: client.close() def check_all_vps(): logger.info("Starting VPS check") vps_configs = get_vps_configs() for config in vps_configs: check_and_run_script(config) # 创建表格头 table = "+---------+-----------------------+----------+-------------------------+----------+-------------------+-------+\n" table += "| Index | Hostname | Status | Last Check | Username | Runtime | PID |\n" table += "+---------+-----------------------+----------+-------------------------+----------+-------------------+-------+\n" # 添加每个VPS的状态 for index in sorted(vps_status.keys()): status = vps_status[index] table += "| {:<7} | {:<21} | {:<8} | {:<23} | {:<8} | {:<17} | {:<5} |\n".format( index, status['hostname'][:21], status['status'][:8], status['last_check'], status['username'][:8], status['runtime'][:17], # 展示运行时间 status['pid'][:5] ) table += "+---------+-----------------------+----------+-------------------------+----------+-------------------+-------+\n" logger.info("\n" + table) @app.route('/') def index(): sorted_indexes = sorted(vps_status.keys()) html = '''

VPS Status Overview

{% for index in sorted_indexes %} {% endfor %}
Index Hostname Status Last Check Username Runtime PID
{{ index }} {{ vps_status[index].hostname }} {{ vps_status[index].status }} {{ vps_status[index].last_check }} {{ vps_status[index].username }} {{ vps_status[index].runtime }} {{ vps_status[index].pid }}
''' return render_template_string(html, vps_status=vps_status, sorted_indexes=sorted_indexes) @app.route('/status/') def vps_status_detail(index): if index in vps_status: return jsonify(vps_status[index]) else: return jsonify({"error": "VPS not found"}), 404 @app.route('/health') def health_check(): return jsonify({"status": "healthy", "uptime": time.time() - start_time}), 200 def run_flask(): app.run(host='0.0.0.0', port=8080) def main(): global start_time start_time = time.time() logger.info("===== VPS monitoring script is starting =====") flask_thread = Thread(target=run_flask) flask_thread.start() logger.info("Flask server started in background") vps_configs = get_vps_configs() logger.info(f"Found {len(vps_configs)} VPS configurations") logger.info("Running initial VPS check") check_all_vps() schedule.every(15).minutes.do(check_all_vps) logger.info("Scheduled VPS check every 15 minutes") logger.info("===== VPS monitoring script is running =====") heartbeat_count = 0 while True: schedule.run_pending() time.sleep(60) heartbeat_count += 1 if heartbeat_count % 5 == 0: # 每5分钟输出一次心跳信息 logger.info(f"Heartbeat: Script is still running. Uptime: {heartbeat_count} minutes") if __name__ == "__main__": main()