import pandas as pd from ucimlrepo import fetch_ucirepo from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestRegressor import joblib import matplotlib.pyplot as plt # 获取数据集 student_performance = fetch_ucirepo(id=320) # 获取特征和目标 X = student_performance.data.features y = student_performance.data.targets # 查看特征和目标的前几行 print(X.head()) print(y.head()) # 编码分类变量 X = pd.get_dummies(X, drop_first=True) # 划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X, y['G3'], test_size=0.2, random_state=42) # 创建并训练模型 model = RandomForestRegressor(n_estimators=100, random_state=42) model.fit(X_train, y_train) # 保存模型 model_path = "C:/Users/baby7/Desktop/推理/model_checkpoints/random_forest_model.pkl" joblib.dump(model, model_path) print(f"模型已保存到 {model_path}") # 加载模型 loaded_model = joblib.load(model_path) print("模型已加载") # 使用加载的模型进行预测 y_pred = loaded_model.predict(X_test) # X_test 是您的测试数据 print("预测结果:", y_pred) # 评估模型性能 from sklearn.metrics import mean_squared_error mse = mean_squared_error(y_test, y_pred) print(f'均方误差: {mse:.2f}') import matplotlib.pyplot as plt plt.scatter(y_test, y_pred) plt.xlabel('真实值') plt.ylabel('预测值') plt.title('真实值与预测值对比') plt.plot([0, 20], [0, 20], color='red', linestyle='--') # 参考线 plt.show()