AISkywalker
/

ResNet1D

Model card Files Files and versions Community

AISkywalker commited on Apr 11

Commit

3a85408

verified ·

1 Parent(s): 90cf66f

Upload 65 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +30 -0
11522105.pdf +3 -0
data_load.py +48 -0
data_processing.py +265 -0
resnet1d.py +297 -0
resnet1d_multitask.py +155 -0
results/loss_curves_modelA_20241207_145904.png +3 -0
results/loss_curves_modelB_20241207_151513.png +3 -0
results/loss_curves_modelC_20241207_153102.png +3 -0
results/metrics_modelA_20241207_145904.txt +12 -0
results/metrics_modelB_20241207_151513.txt +12 -0
results/metrics_modelC_20241207_153102.txt +12 -0
results/modelC_scatter_CEC_20241207_153102.png +0 -0
results/modelC_scatter_CaCO3_20241207_153102.png +0 -0
results/modelC_scatter_K_20241207_153102.png +0 -0
results/modelC_scatter_N_20241207_153102.png +0 -0
results/modelC_scatter_OC_20241207_153102.png +0 -0
results/modelC_scatter_P_20241207_153102.png +0 -0
results/modelC_scatter_pH.in.CaCl2_20241207_153102.png +3 -0
results/modelC_scatter_pH.in.H2O_20241207_153102.png +0 -0
results/training_metrics_modelA_20241207_145904.png +3 -0
results/training_metrics_modelB_20241207_151513.png +3 -0
results/training_metrics_modelC_20241207_153102.png +3 -0
results1/loss_curves_modelC_20241207_155007_Abs-SG0.png +3 -0
results1/loss_curves_modelC_20241207_160632_Abs-SG0-SNV.png +3 -0
results1/loss_curves_modelC_20241207_162218_Abs-SG1.png +3 -0
results1/loss_curves_modelC_20241207_163811_Abs-SG1-SNV.png +3 -0
results1/loss_curves_modelC_20241207_165344_Abs-SG2.png +3 -0
results1/loss_curves_modelC_20241207_170911_Abs-SG2-SNV.png +3 -0
results1/metrics_modelC_20241210_142058_Abs-SG0.txt +12 -0
results1/metrics_modelC_20241210_143517_Abs-SG0-SNV.txt +12 -0
results1/metrics_modelC_20241210_144926_Abs-SG1.txt +12 -0
results1/metrics_modelC_20241210_150332_Abs-SG1-SNV.txt +12 -0
results1/metrics_modelC_20241210_151845_Abs-SG2.txt +12 -0
results1/metrics_modelC_20241210_153333_Abs-SG2-SNV.txt +12 -0
results1/training_metrics_modelC_20241207_155007_Abs-SG0.png +3 -0
results1/training_metrics_modelC_20241207_160632_Abs-SG0-SNV.png +3 -0
results1/training_metrics_modelC_20241207_162218_Abs-SG1.png +3 -0
results1/training_metrics_modelC_20241207_163811_Abs-SG1-SNV.png +3 -0
results1/training_metrics_modelC_20241207_165344_Abs-SG2.png +3 -0
results1/training_metrics_modelC_20241207_170911_Abs-SG2-SNV.png +3 -0
results2/loss_curves_modelC_20241213_202047_5.png +3 -0
results2/loss_curves_modelC_20241213_203438_10.png +3 -0
results2/loss_curves_modelC_20241213_204103_15.png +3 -0
results2/metrics_modelC_20241213_202047_5.txt +4 -0
results2/metrics_modelC_20241213_203438_10.txt +4 -0
results2/metrics_modelC_20241213_204103_15.txt +4 -0
results2/training_metrics_modelC_20241213_202047_5.png +3 -0
results2/training_metrics_modelC_20241213_203438_10.png +3 -0
results2/training_metrics_modelC_20241213_204103_15.png +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,33 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+11522105.pdf filter=lfs diff=lfs merge=lfs -text
+results/loss_curves_modelA_20241207_145904.png filter=lfs diff=lfs merge=lfs -text
+results/loss_curves_modelB_20241207_151513.png filter=lfs diff=lfs merge=lfs -text
+results/loss_curves_modelC_20241207_153102.png filter=lfs diff=lfs merge=lfs -text
+results/modelC_scatter_pH.in.CaCl2_20241207_153102.png filter=lfs diff=lfs merge=lfs -text
+results/training_metrics_modelA_20241207_145904.png filter=lfs diff=lfs merge=lfs -text
+results/training_metrics_modelB_20241207_151513.png filter=lfs diff=lfs merge=lfs -text
+results/training_metrics_modelC_20241207_153102.png filter=lfs diff=lfs merge=lfs -text
+results1/loss_curves_modelC_20241207_155007_Abs-SG0.png filter=lfs diff=lfs merge=lfs -text
+results1/loss_curves_modelC_20241207_160632_Abs-SG0-SNV.png filter=lfs diff=lfs merge=lfs -text
+results1/loss_curves_modelC_20241207_162218_Abs-SG1.png filter=lfs diff=lfs merge=lfs -text
+results1/loss_curves_modelC_20241207_163811_Abs-SG1-SNV.png filter=lfs diff=lfs merge=lfs -text
+results1/loss_curves_modelC_20241207_165344_Abs-SG2.png filter=lfs diff=lfs merge=lfs -text
+results1/loss_curves_modelC_20241207_170911_Abs-SG2-SNV.png filter=lfs diff=lfs merge=lfs -text
+results1/training_metrics_modelC_20241207_155007_Abs-SG0.png filter=lfs diff=lfs merge=lfs -text
+results1/training_metrics_modelC_20241207_160632_Abs-SG0-SNV.png filter=lfs diff=lfs merge=lfs -text
+results1/training_metrics_modelC_20241207_162218_Abs-SG1.png filter=lfs diff=lfs merge=lfs -text
+results1/training_metrics_modelC_20241207_163811_Abs-SG1-SNV.png filter=lfs diff=lfs merge=lfs -text
+results1/training_metrics_modelC_20241207_165344_Abs-SG2.png filter=lfs diff=lfs merge=lfs -text
+results1/training_metrics_modelC_20241207_170911_Abs-SG2-SNV.png filter=lfs diff=lfs merge=lfs -text
+results2/loss_curves_modelC_20241213_202047_5.png filter=lfs diff=lfs merge=lfs -text
+results2/loss_curves_modelC_20241213_203438_10.png filter=lfs diff=lfs merge=lfs -text
+results2/loss_curves_modelC_20241213_204103_15.png filter=lfs diff=lfs merge=lfs -text
+results2/training_metrics_modelC_20241213_202047_5.png filter=lfs diff=lfs merge=lfs -text
+results2/training_metrics_modelC_20241213_203438_10.png filter=lfs diff=lfs merge=lfs -text
+results2/training_metrics_modelC_20241213_204103_15.png filter=lfs diff=lfs merge=lfs -text
+results3/loss_curves_modelC_20241213_211327_15.png filter=lfs diff=lfs merge=lfs -text
+results3/training_metrics_modelC_20241213_211327_15.png filter=lfs diff=lfs merge=lfs -text
+shap_summary_plot.png filter=lfs diff=lfs merge=lfs -text
+shap_top10_wavelengths.png filter=lfs diff=lfs merge=lfs -text

11522105.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d350619292928420963d55ea260c286edc0ccad267f2bbefa9b97cee4ee3661
+size 2284006

data_load.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import numpy as np
+import pandas as pd
+from sklearn.model_selection import train_test_split
+def load_soil_data(file_path, target_columns):
+    """
+    参数:
+    - file_path: 包含土壤数据的文件路径（假设为CSV格式）
+    - target_columns: 列表，包含8个目标土壤指标的列名
+    返回:
+    - X_train, X_test, y_train, y_test: 训练和测试集的特征和目标值，划分为8:2
+    - wavelengths: 波长信息数组
+    """
+    # 读取CSV文件
+    data = pd.read_csv(file_path)
+    # 提取波长信息（波长在前4200列的列头中）
+    wavelengths = data.columns[:4200].str.replace('spc.', '').astype(float)
+    # 假设每个Record包含4200个数据点
+    X = data.iloc[:, :4200].values  # 取前4200列作为特征
+    y = data[target_columns].values  # 取目标列作为标签
+    # 确保特征数据是浮点数类型
+    X = X.astype('float32')
+    # 分割数据集为训练集和测试集，训练集占80%
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    # 将特征数据重塑为ResNet模型的输入形状
+    # 对于1D卷积，我们需要(batch_size, channels, sequence_length)的形状
+    X_train = X_train.reshape(-1, 1, 4200)  # 一个通道，序列长度为4200
+    X_test = X_test.reshape(-1, 1, 4200)
+    return X_train, X_test, y_train, y_test, wavelengths
+if __name__ == "__main__":
+    # 使用示例
+    file_path = 'LUCAS.2009_abs.csv'
+    target_columns = ['pH.in.CaCl2', 'pH.in.H2O', 'OC', 'CaCO3', 'N', 'P', 'K', 'CEC']
+    X_train, X_test, y_train, y_test, wavelengths = load_soil_data(file_path, target_columns)
+    print("X_train shape:", X_train.shape)
+    print("X_test shape:", X_test.shape)
+    print("y_train shape:", y_train.shape)
+    print("y_test shape:", y_test.shape)
+    print("wavelengths shape:", wavelengths.shape)

data_processing.py ADDED Viewed

	@@ -0,0 +1,265 @@

+import numpy as np
+from scipy.signal import savgol_filter
+import matplotlib.pyplot as plt
+from data_load import load_soil_data
+def apply_sg_filter(spectra, window_length=15, polyorder=2, deriv=0):
+    """
+    应用Savitzky-Golay滤波器进行光谱平滑或求导
+    参数:
+    - spectra: 输入光谱数据，形状为(n_samples, n_wavelengths)
+    - window_length: 窗口长度，必须是奇数
+    - polyorder: 多项式最高阶数
+    - deriv: 求导阶数，0表示平滑，1表示一阶导数，2表示二阶导数
+    返回:
+    - 处理后的光谱数据
+    """
+    return np.array([savgol_filter(spectrum, window_length, polyorder, deriv=deriv)
+                    for spectrum in spectra])
+def apply_snv(spectra):
+    """
+    应用标准正态变量(SNV)转换 （标准正态变量变换）
+    参数:
+    - spectra: 输入光谱数据，形状为(n_samples, n_wavelengths)
+    返回:
+    - SNV处理后的光谱数据
+    """
+    # 对每个样本进行SNV转换
+    spectra_snv = np.zeros_like(spectra)
+    for i in range(spectra.shape[0]):
+        spectrum = spectra[i]
+        # 计算均值和标准差
+        mean = np.mean(spectrum)
+        std = np.std(spectrum)
+        # 应用SNV转换
+        spectra_snv[i] = (spectrum - mean) / std
+    return spectra_snv
+def process_spectra(spectra, method='Abs-SG0'):
+    """
+    根据指定方法处理光谱数据
+    参数:
+    - spectra: 输入光谱数据，形状为(n_samples, n_wavelengths)
+    - method: 处理方法，可选值包括：
+        'Abs-SG0': SG平滑
+        'Abs-SG0-SNV': SG平滑+SNV
+        'Abs-SG1': SG一阶导
+        'Abs-SG1-SNV': SG一阶导+SNV
+        'Abs-SG2': SG二阶导
+        'Abs-SG2-SNV': SG二阶导+SNV
+    返回:
+    - 处理后的光谱数据
+    """
+    if method == 'Abs-SG0':
+        return apply_sg_filter(spectra, deriv=0)
+    elif method == 'Abs-SG0-SNV':
+        sg_spectra = apply_sg_filter(spectra, deriv=0)
+        return apply_snv(sg_spectra)
+    elif method == 'Abs-SG1':
+        return apply_sg_filter(spectra, deriv=1)
+    elif method == 'Abs-SG1-SNV':
+        sg_spectra = apply_sg_filter(spectra, deriv=1)
+        return apply_snv(sg_spectra)
+    elif method == 'Abs-SG2':
+        return apply_sg_filter(spectra, deriv=2)
+    elif method == 'Abs-SG2-SNV':
+        sg_spectra = apply_sg_filter(spectra, deriv=2)
+        return apply_snv(sg_spectra)
+    else:
+        raise ValueError(f"Unsupported method: {method}")
+def remove_wavelength_bands(spectra, wavelengths):
+    """
+    移除400-499.5nm和2450-2499.5nm的波段
+    参数:
+    - spectra: 输入光谱数据，形状为(n_samples, n_wavelengths)
+    - wavelengths: 波长值数组
+    返回:
+    - 处理后的光谱数据和对应的波长值
+    """
+    # 创建掩码，保留所需波段
+    mask = ~((wavelengths >= 400) & (wavelengths <= 499.5) |
+             (wavelengths >= 2450) & (wavelengths <= 2499.5))
+    # 应用掩码
+    filtered_spectra = spectra[:, mask]
+    filtered_wavelengths = wavelengths[mask]
+    return filtered_spectra, filtered_wavelengths
+def downsample_spectra(spectra, wavelengths, bin_size):
+    """
+    对光谱数据进行降采样
+    参数:
+    - spectra: 输入光谱数据，形状为(n_samples, n_wavelengths)
+    - wavelengths: 波长值数组
+    - bin_size: 降采样窗口大小（5nm、10nm或15nm）
+    返回:
+    - 降采样后的光谱数据和对应的波长值
+    """
+    # 计算每个bin的边界
+    bins = np.arange(wavelengths[0], wavelengths[-1] + bin_size, bin_size)
+    # 初始化结果数组
+    n_bins = len(bins) - 1
+    downsampled_spectra = np.zeros((spectra.shape[0], n_bins))
+    downsampled_wavelengths = np.zeros(n_bins)
+    # 对每个bin进行平均
+    for i in range(n_bins):
+        mask = (wavelengths >= bins[i]) & (wavelengths < bins[i+1])
+        if np.any(mask):
+            downsampled_spectra[:, i] = np.mean(spectra[:, mask], axis=1)
+            downsampled_wavelengths[i] = np.mean([bins[i], bins[i+1]])
+    return downsampled_spectra, downsampled_wavelengths
+def preprocess_with_downsampling(spectra, wavelengths, bin_size=5):
+    """
+    完整的预处理流程：移除特定波段并进行降采样
+    参数:
+    - spectra: 输入光谱数据，形状为(n_samples, n_wavelengths)
+    - wavelengths: 波长值数组
+    - bin_size: 降采样窗口大小（5nm、10nm或15nm）
+    返回:
+    - 处理后的光谱数据和对应的波长值
+    """
+    # 首先移除指定波段
+    filtered_spectra, filtered_wavelengths = remove_wavelength_bands(spectra, wavelengths)
+    # 然后进行降采样
+    downsampled_spectra, downsampled_wavelengths = downsample_spectra(
+        filtered_spectra, filtered_wavelengths, bin_size)
+    return downsampled_spectra, downsampled_wavelengths
+def plot_processed_spectra_with_range(original_spectra, wavelengths=None):
+    """
+    绘制处理方法的光谱图，包括平均曲线和范围
+    参数:
+    - original_spectra: 原始光谱数据，形状为(n_samples, n_wavelengths)
+    - wavelengths: 波长值，如果为None则使用索引值
+    """
+    methods = ['Abs-SG0', 'Abs-SG0-SNV', 'Abs-SG1',
+              'Abs-SG1-SNV', 'Abs-SG2', 'Abs-SG2-SNV']
+    if wavelengths is None:
+        wavelengths = np.arange(original_spectra.shape[1])
+    fig, axes = plt.subplots(2, 3, figsize=(18, 10))  # 布局：2行3列
+    axes = axes.ravel()
+    for i, method in enumerate(methods):
+        processed = process_spectra(original_spectra, method)  # 获取处理后的数据
+        mean_curve = np.mean(processed, axis=0)  # 平均光谱曲线
+        min_curve = np.min(processed, axis=0)   # 最小值光谱
+        max_curve = np.max(processed, axis=0)   # 最大值光谱
+        # 绘制范围
+        axes[i].fill_between(wavelengths, min_curve, max_curve, color='skyblue', alpha=0.3, label='Range')
+        # 绘制平均曲线
+        axes[i].plot(wavelengths, mean_curve, color='steelblue', label='Average Curve')
+        # 设置标题和图例
+        axes[i].set_title(f'({chr(97 + i)}) {method}', loc='center', fontsize=12)  # a, b, c...
+        axes[i].set_xlabel('Wavelength/nm', fontsize=10)
+        axes[i].set_ylabel('Absorbance', fontsize=10)
+        axes[i].legend()
+        axes[i].grid(True)
+    # 调整布局
+    plt.tight_layout(h_pad=2.5, w_pad=3.0)
+    plt.show()
+# 示例调用
+if __name__ == '__main__':
+    # 1. 加载数据
+    file_path = 'LUCAS.2009_abs.csv'
+    target_columns = ['pH.in.CaCl2', 'pH.in.H2O', 'OC', 'CaCO3', 'N', 'P', 'K', 'CEC']
+    X_train, X_test, y_train, y_test ,wavelengths= load_soil_data(file_path, target_columns)
+    # 2. 将数据重塑为2D
+    X_train_2d = X_train.reshape(X_train.shape[0], -1)
+    # 4. 展示原始数据的光谱处理结果
+    print("\n=== 光谱预处理结果 ===")
+    plot_processed_spectra_with_range(X_train_2d, wavelengths)
+    # 5. 移除特定波段并进行不同程度的降采样
+    print("\n=== 波段移除和降采样结果 ===")
+    bin_sizes = [5, 10, 15]  # 不同的降采样窗口大小
+    # 为不同的降采样结果创建一个新的图
+    plt.figure(figsize=(15, 5))
+    for i, bin_size in enumerate(bin_sizes):
+        # 处理数据
+        processed_spectra, processed_wavelengths = preprocess_with_downsampling(
+            X_train_2d, wavelengths, bin_size)
+        # 打印信息
+        print(f"\n使用 {bin_size}nm 降采样:")
+        print(f"处理后的光谱形状: {processed_spectra.shape}")
+        print(f"波长数量: {len(processed_wavelengths)}")
+        # 绘制降采样结果
+        plt.subplot(1, 3, i+1)
+        mean_curve = np.mean(processed_spectra, axis=0)
+        std_curve = np.std(processed_spectra, axis=0)
+        plt.plot(processed_wavelengths, mean_curve, 'b-', label=f'Mean ({bin_size}nm)')
+        plt.fill_between(processed_wavelengths,
+                        mean_curve - std_curve,
+                        mean_curve + std_curve,
+                        color='skyblue', alpha=0.2, label='Standard Deviation Range')
+        plt.title(f'Downsampling {bin_size}nm\n(Wavelengths: {len(processed_wavelengths)})')
+        plt.xlabel('Wavelength (nm)')
+        plt.ylabel('Absorbance')
+        plt.legend()
+        plt.grid(True)
+    plt.tight_layout()
+    plt.show()
+    # 6. 展示完整预处理流程的示例
+    print("\n=== 完整预处理流程示例 ===")
+    # 先进行光谱预处理
+    processed_spectra = process_spectra(X_train_2d, method='Abs-SG0-SNV')
+    # 然后进行波段移除和降采样
+    final_spectra, final_wavelengths = preprocess_with_downsampling(
+        processed_spectra, wavelengths, bin_size=10)
+    print(f"最终处理后的数据形状: {final_spectra.shape}")
+    print(f"最终波长数量: {len(final_wavelengths)}")

resnet1d.py ADDED Viewed

	@@ -0,0 +1,297 @@

+"""
+resnet for 1-d signal data, pytorch version
+Shenda Hong, Oct 2019
+"""
+import numpy as np
+from collections import Counter
+from tqdm import tqdm
+from matplotlib import pyplot as plt
+from sklearn.metrics import classification_report
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.nn.functional as F
+from torch.utils.data import Dataset, DataLoader
+class MyDataset(Dataset):
+    def __init__(self, data, label):
+        self.data = data
+        self.label = label
+    def __getitem__(self, index):
+        return (torch.tensor(self.data[index], dtype=torch.float), torch.tensor(self.label[index], dtype=torch.long))
+    def __len__(self):
+        return len(self.data)
+class MyConv1dPadSame(nn.Module):
+    """
+    extend nn.Conv1d to support SAME padding
+    """
+    def __init__(self, in_channels, out_channels, kernel_size, stride, groups=1):
+        super(MyConv1dPadSame, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.groups = groups
+        self.conv = torch.nn.Conv1d(
+            in_channels=self.in_channels,
+            out_channels=self.out_channels,
+            kernel_size=self.kernel_size,
+            stride=self.stride,
+            groups=self.groups)
+    def forward(self, x):
+        net = x
+        # compute pad shape
+        in_dim = net.shape[-1]
+        out_dim = (in_dim + self.stride - 1) // self.stride
+        p = max(0, (out_dim - 1) * self.stride + self.kernel_size - in_dim)
+        pad_left = p // 2
+        pad_right = p - pad_left
+        net = F.pad(net, (pad_left, pad_right), "constant", 0)
+        net = self.conv(net)
+        return net
+class MyMaxPool1dPadSame(nn.Module):
+    """
+    extend nn.MaxPool1d to support SAME padding
+    """
+    def __init__(self, kernel_size):
+        super(MyMaxPool1dPadSame, self).__init__()
+        self.kernel_size = kernel_size
+        self.stride = 1
+        self.max_pool = torch.nn.MaxPool1d(kernel_size=self.kernel_size)
+    def forward(self, x):
+        net = x
+        # compute pad shape
+        in_dim = net.shape[-1]
+        out_dim = (in_dim + self.stride - 1) // self.stride
+        p = max(0, (out_dim - 1) * self.stride + self.kernel_size - in_dim)
+        pad_left = p // 2
+        pad_right = p - pad_left
+        net = F.pad(net, (pad_left, pad_right), "constant", 0)
+        net = self.max_pool(net)
+        return net
+class BasicBlock(nn.Module):
+    """
+    ResNet Basic Block
+    """
+    def __init__(self, in_channels, out_channels, kernel_size, stride, groups, downsample, use_bn, use_do, is_first_block=False):
+        super(BasicBlock, self).__init__()
+        self.in_channels = in_channels
+        self.kernel_size = kernel_size
+        self.out_channels = out_channels
+        self.stride = stride
+        self.groups = groups
+        self.downsample = downsample
+        if self.downsample:
+            self.stride = stride
+        else:
+            self.stride = 1
+        self.is_first_block = is_first_block
+        self.use_bn = use_bn
+        self.use_do = use_do
+        # the first conv
+        self.bn1 = nn.BatchNorm1d(in_channels)
+        self.relu1 = nn.ReLU()
+        self.do1 = nn.Dropout(p=0.5)
+        self.conv1 = MyConv1dPadSame(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=self.stride,
+            groups=self.groups)
+        # the second conv
+        self.bn2 = nn.BatchNorm1d(out_channels)
+        self.relu2 = nn.ReLU()
+        self.do2 = nn.Dropout(p=0.5)
+        self.conv2 = MyConv1dPadSame(
+            in_channels=out_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=1,
+            groups=self.groups)
+        self.max_pool = MyMaxPool1dPadSame(kernel_size=self.stride)
+    def forward(self, x):
+        identity = x
+        # the first conv
+        out = x
+        if not self.is_first_block:
+            if self.use_bn:
+                out = self.bn1(out)
+            out = self.relu1(out)
+            if self.use_do:
+                out = self.do1(out)
+        out = self.conv1(out)
+        # the second conv
+        if self.use_bn:
+            out = self.bn2(out)
+        out = self.relu2(out)
+        if self.use_do:
+            out = self.do2(out)
+        out = self.conv2(out)
+        # if downsample, also downsample identity
+        if self.downsample:
+            identity = self.max_pool(identity)
+        # if expand channel, also pad zeros to identity
+        if self.out_channels != self.in_channels:
+            identity = identity.transpose(-1,-2)
+            ch1 = (self.out_channels-self.in_channels)//2
+            ch2 = self.out_channels-self.in_channels-ch1
+            identity = F.pad(identity, (ch1, ch2), "constant", 0)
+            identity = identity.transpose(-1,-2)
+        # shortcut
+        out += identity
+        return out
+class ResNet1D(nn.Module):
+    """
+    Input:
+        X: (n_samples, n_channel, n_length)
+        Y: (n_samples)
+    Output:
+        out: (n_samples)
+    Pararmetes:
+        in_channels: dim of input, the same as n_channel
+        base_filters: number of filters in the first several Conv layer, it will double at every 4 layers
+        kernel_size: width of kernel
+        stride: stride of kernel moving
+        groups: set larget to 1 as ResNeXt
+        n_block: number of blocks
+        n_classes: number of classes
+    """
+    def __init__(self, in_channels, base_filters, kernel_size, stride, groups, n_block, n_classes, downsample_gap=2, increasefilter_gap=4, use_bn=True, use_do=True, verbose=False):
+        super(ResNet1D, self).__init__()
+        self.verbose = verbose
+        self.n_block = n_block
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.groups = groups
+        self.use_bn = use_bn
+        self.use_do = use_do
+        self.downsample_gap = downsample_gap # 2 for base model
+        self.increasefilter_gap = increasefilter_gap # 4 for base model
+        # first block
+        self.first_block_conv = MyConv1dPadSame(in_channels=in_channels, out_channels=base_filters, kernel_size=self.kernel_size, stride=1)
+        self.first_block_bn = nn.BatchNorm1d(base_filters)
+        self.first_block_relu = nn.ReLU()
+        out_channels = base_filters
+        # residual blocks
+        self.basicblock_list = nn.ModuleList()
+        for i_block in range(self.n_block):
+            # is_first_block
+            if i_block == 0:
+                is_first_block = True
+            else:
+                is_first_block = False
+            # downsample at every self.downsample_gap blocks
+            if i_block % self.downsample_gap == 1:
+                downsample = True
+            else:
+                downsample = False
+            # in_channels and out_channels
+            if is_first_block:
+                in_channels = base_filters
+                out_channels = in_channels
+            else:
+                # increase filters at every self.increasefilter_gap blocks
+                in_channels = int(base_filters*2**((i_block-1)//self.increasefilter_gap))
+                if (i_block % self.increasefilter_gap == 0) and (i_block != 0):
+                    out_channels = in_channels * 2
+                else:
+                    out_channels = in_channels
+            tmp_block = BasicBlock(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=self.kernel_size,
+                stride = self.stride,
+                groups = self.groups,
+                downsample=downsample,
+                use_bn = self.use_bn,
+                use_do = self.use_do,
+                is_first_block=is_first_block)
+            self.basicblock_list.append(tmp_block)
+        # final prediction
+        self.final_bn = nn.BatchNorm1d(out_channels)
+        self.final_relu = nn.ReLU(inplace=True)
+        self.do = nn.Dropout(p=0.3)
+        self.dense = nn.Linear(out_channels, n_classes)
+        # self.softmax = nn.Softmax(dim=1)
+    def forward(self, x):
+        out = x
+        # first conv
+        if self.verbose:
+            print('input shape', out.shape)
+        out = self.first_block_conv(out)
+        if self.verbose:
+            print('after first conv', out.shape)
+        if self.use_bn:
+            out = self.first_block_bn(out)
+        out = self.first_block_relu(out)
+        # residual blocks, every block has two conv
+        for i_block in range(self.n_block):
+            net = self.basicblock_list[i_block]
+            if self.verbose:
+                print('i_block: {0}, in_channels: {1}, out_channels: {2}, downsample: {3}'.format(i_block, net.in_channels, net.out_channels, net.downsample))
+            out = net(out)
+            if self.verbose:
+                print(out.shape)
+        # final prediction
+        if self.use_bn:
+            out = self.final_bn(out)
+        out = self.final_relu(out)
+        out = out.mean(-1)
+        if self.verbose:
+            print('final pooling', out.shape)
+        # out = self.do(out)
+        out = self.dense(out)
+        if self.verbose:
+            print('dense', out.shape)
+        # out = self.softmax(out)
+        if self.verbose:
+            print('softmax', out.shape)
+        return out

resnet1d_multitask.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import torch
+import torch.nn as nn
+import torchvision.models as models
+import resnet1d
+__all__ = ['ResNet1D_MultiTask', 'get_model']
+class ResNet1D_MultiTask(resnet1d.ResNet1D):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # 获取特征维度
+        in_features = self.dense.in_features
+        # 移除原始的预测层
+        delattr(self, 'dense')
+                # 添加多任务预测头
+        self.prediction_head = nn.Sequential(
+            # 第一层：512 -> 256
+            nn.Linear(in_features, in_features//2),
+            nn.BatchNorm1d(in_features//2),
+            nn.ReLU(),
+            nn.Dropout(p=0.3),
+            # 第二层：256 -> 128
+            nn.Linear(in_features//2, in_features//4),
+            nn.BatchNorm1d(in_features//4),
+            nn.ReLU(),
+            nn.Dropout(p=0.3),
+            # 输出层：128 -> 8
+            nn.Linear(in_features//4, 8)
+        )
+    def forward(self, x):
+        # 获取特征提取器的输出
+        out = x
+        # first conv
+        out = self.first_block_conv(out)
+        if self.use_bn:
+            out = self.first_block_bn(out)
+        out = self.first_block_relu(out)
+        # residual blocks
+        for i_block in range(self.n_block):
+            net = self.basicblock_list[i_block]
+            out = net(out)
+        # 特征聚合
+        if self.use_bn:
+            out = self.final_bn(out)
+        out = self.final_relu(out)
+        out = out.mean(-1)  # 全局平均池化
+        out=self.prediction_head(out)
+        return out  # 输出 8 个指标的预测值
+def get_model(model_type):
+    if model_type == 'A':  # ResNet18
+        return ResNet1D_MultiTask(
+            in_channels=1,
+            base_filters=32,  # 减小base_filters，降低显存占用
+            kernel_size=3,  # 使用3x3卷积核
+            stride=2,
+            groups=1,
+            n_block=8,  # ResNet18的配置
+            n_classes=8
+        )
+    elif model_type == 'B':  # ResNet34
+        return ResNet1D_MultiTask(
+            in_channels=1,
+            base_filters=32,  # 调整base_filters
+            kernel_size=3,  # 使用3x3卷积核
+            stride=2,
+            groups=1,
+            n_block=16,  # ResNet34的配置
+            n_classes=8
+        )
+    elif model_type == 'C':  # ResNet50
+        return ResNet1D_MultiTask(
+            in_channels=1,
+            base_filters=32,  # 调整base_filters
+            kernel_size=3,  # 使用3x3卷积核
+            stride=2,
+            groups=1,
+            n_block=24,  # ResNet50的配置
+            n_classes=8
+        )
+    else:
+        raise ValueError("Invalid model type. Choose 'A' for ResNet18, 'B' for ResNet34, or 'C' for ResNet50")
+def print_model_info():
+    """
+    打印模型关键信息（简化版）
+    """
+    try:
+        from torchsummary import summary
+    except ImportError:
+        print("请先安装torchsummary: pip install torchsummary")
+        return
+    import torch
+    device = torch.device("cpu")
+    model_types = ['A', 'B', 'C']
+    model_names = {
+        'A': 'ResNet18',
+        'B': 'ResNet34',
+        'C': 'ResNet50'
+    }
+    # 模型配置信息
+    model_configs = {
+        'A': {'n_block': 8, 'base_filters': 32, 'kernel_size': 3},
+        'B': {'n_block': 16, 'base_filters': 32, 'kernel_size': 3},
+        'C': {'n_block': 24, 'base_filters': 32, 'kernel_size': 3}
+    }
+    print("\n" + "="*50)
+    print(f"{'LUCAS土壤光谱分析模型架构':^48}")
+    print("="*50)
+    print(f"{'输入: (batch_size=15228, channels=1, length=130)':^48}")
+    print(f"{'输出: 8个土壤属性预测值':^48}")
+    print("-"*50)
+    for model_type in model_types:
+        model = get_model(model_type).to(device)
+        config = model_configs[model_type]
+        total_params = sum(p.numel() for p in model.parameters())
+        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
+        print(f"\n[Model {model_type}: {model_names[model_type]}]")
+        print(f"网络深度: {config['n_block']} blocks")
+        print(f"基础通道数: {config['base_filters']}")
+        print(f"卷积核大小: {config['kernel_size']}")
+        print(f"总参数量: {total_params:,}")
+        print(f"可训练参数: {trainable_params:,}")
+        # 只打印主要层的信息
+        main_layers = {}
+        for name, module in model.named_children():
+            params = sum(p.numel() for p in module.parameters())
+            if params > 0 and params/total_params > 0.05:  # 只显示占比>5%的层
+                main_layers[name] = params
+        if main_layers:
+            print("\n主要层结构:")
+            for name, params in main_layers.items():
+                print(f"  {name:15}: {params:,} ({params/total_params*100:.1f}%)")
+        print("-"*50)
+if __name__ == '__main__':
+    print_model_info()

results/loss_curves_modelA_20241207_145904.png ADDED Viewed

Git LFS Details

SHA256: 74563cdb0e349d35f03ccb4052b911d4efb2cd269918d051db9fa89d71f53fac
Pointer size: 131 Bytes
Size of remote file: 344 kB

results/loss_curves_modelB_20241207_151513.png ADDED Viewed

Git LFS Details

SHA256: 28f3c1684d86887403a35aeacc30f3a9a0f8d459ab7747501016ad17b53f6dc8
Pointer size: 131 Bytes
Size of remote file: 324 kB

results/loss_curves_modelC_20241207_153102.png ADDED Viewed

Git LFS Details

SHA256: d295300f658e010fd8e8d1920f5138357df2fcf895b65710b877d659823f75b4
Pointer size: 131 Bytes
Size of remote file: 311 kB

results/metrics_modelA_20241207_145904.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+Results for Model A generated at 2024-12-07 14:59:04
+--------------------------------------------------
+Indicator 1 (pH.in.CaCl2) - RMSE: 1.2717, R2: -0.2851
+Indicator 2 (pH.in.H2O) - RMSE: 1.2939, R2: -0.5375
+Indicator 3 (OC) - RMSE: 9.3670, R2: 0.0148
+Indicator 4 (CaCO3) - RMSE: 11.4724, R2: -0.1613
+Indicator 5 (N) - RMSE: 1.8129, R2: 0.1423
+Indicator 6 (P) - RMSE: 6.2296, R2: -0.1063
+Indicator 7 (K) - RMSE: 15.0763, R2: -0.2626
+Indicator 8 (CEC) - RMSE: 3.6354, R2: -0.0210
+Average Test Loss: 30.3030

results/metrics_modelB_20241207_151513.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+Results for Model B generated at 2024-12-07 15:15:13
+--------------------------------------------------
+Indicator 1 (pH.in.CaCl2) - RMSE: 1.2917, R2: -0.3678
+Indicator 2 (pH.in.H2O) - RMSE: 1.2833, R2: -0.4877
+Indicator 3 (OC) - RMSE: 8.2344, R2: 0.4116
+Indicator 4 (CaCO3) - RMSE: 10.9131, R2: 0.0491
+Indicator 5 (N) - RMSE: 1.6586, R2: 0.3991
+Indicator 6 (P) - RMSE: 6.1583, R2: -0.0565
+Indicator 7 (K) - RMSE: 14.5700, R2: -0.1014
+Indicator 8 (CEC) - RMSE: 3.5120, R2: 0.1108
+Average Test Loss: 27.7304

results/metrics_modelC_20241207_153102.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+Results for Model C generated at 2024-12-07 15:31:02
+--------------------------------------------------
+Indicator 1 (pH.in.CaCl2) - RMSE: 1.1903, R2: 0.0136
+Indicator 2 (pH.in.H2O) - RMSE: 1.1664, R2: -0.0155
+Indicator 3 (OC) - RMSE: 8.0283, R2: 0.4683
+Indicator 4 (CaCO3) - RMSE: 10.7531, R2: 0.1037
+Indicator 5 (N) - RMSE: 1.6682, R2: 0.3850
+Indicator 6 (P) - RMSE: 6.1934, R2: -0.0808
+Indicator 7 (K) - RMSE: 14.2200, R2: 0.0007
+Indicator 8 (CEC) - RMSE: 3.4098, R2: 0.2098
+Average Test Loss: 26.7518

results/modelC_scatter_CEC_20241207_153102.png ADDED Viewed

results/modelC_scatter_CaCO3_20241207_153102.png ADDED Viewed

results/modelC_scatter_K_20241207_153102.png ADDED Viewed

results/modelC_scatter_N_20241207_153102.png ADDED Viewed

results/modelC_scatter_OC_20241207_153102.png ADDED Viewed

results/modelC_scatter_P_20241207_153102.png ADDED Viewed

results/modelC_scatter_pH.in.CaCl2_20241207_153102.png ADDED Viewed

Git LFS Details

SHA256: bfe9d7e5c1a9dab36214437abfcb0c5c5c71bd9ba1bd641d8421218fcf1938b0
Pointer size: 131 Bytes
Size of remote file: 107 kB

results/modelC_scatter_pH.in.H2O_20241207_153102.png ADDED Viewed

results/training_metrics_modelA_20241207_145904.png ADDED Viewed

Git LFS Details

SHA256: dbbfd7f34776c3fc177f34f7099fb49a2c2efc9a29ec0cf07232b3adaf8bae9d
Pointer size: 131 Bytes
Size of remote file: 155 kB

results/training_metrics_modelB_20241207_151513.png ADDED Viewed

Git LFS Details

SHA256: 38f8c04a46f8f78f3ba13183d7ce323bae2cfde4569558f16b0a93a03433dd42
Pointer size: 131 Bytes
Size of remote file: 171 kB

results/training_metrics_modelC_20241207_153102.png ADDED Viewed

Git LFS Details

SHA256: dd7aa5d9db3411efeffb12872ee9e97bfad03e8d4e5788b4cdab985ca38b8003
Pointer size: 131 Bytes
Size of remote file: 175 kB

results1/loss_curves_modelC_20241207_155007_Abs-SG0.png ADDED Viewed

Git LFS Details

SHA256: 86157560533edc2e4171a848757e32efe854f18ed01d16bbcb631e0dc48bf14a
Pointer size: 131 Bytes
Size of remote file: 313 kB

results1/loss_curves_modelC_20241207_160632_Abs-SG0-SNV.png ADDED Viewed

Git LFS Details

SHA256: 7a8ee4c97aaac8e3bc5fae6828edef8120b06fd3c935b433c705b3eb1f1fd4c8
Pointer size: 131 Bytes
Size of remote file: 304 kB

results1/loss_curves_modelC_20241207_162218_Abs-SG1.png ADDED Viewed

Git LFS Details

SHA256: b77caf0b8ccc50954dba9c42fee9e43effd7eb52153449f0ba59d9c9d913207e
Pointer size: 131 Bytes
Size of remote file: 279 kB

results1/loss_curves_modelC_20241207_163811_Abs-SG1-SNV.png ADDED Viewed

Git LFS Details

SHA256: 15c9a197fa7389591cdf6d770b9f36b2c70d5927708eded6ea601f53ee7d70a1
Pointer size: 131 Bytes
Size of remote file: 302 kB

results1/loss_curves_modelC_20241207_165344_Abs-SG2.png ADDED Viewed

Git LFS Details

SHA256: 00f3f5b56f2c5439ee63999177b6386b769ac98ae34d4183fa9e58d80323596a
Pointer size: 131 Bytes
Size of remote file: 303 kB

results1/loss_curves_modelC_20241207_170911_Abs-SG2-SNV.png ADDED Viewed

Git LFS Details

SHA256: d46668a66ef6aef1ba63848a207fcae22d0941532146fc9b30dd3dba037f97c6
Pointer size: 131 Bytes
Size of remote file: 304 kB

results1/metrics_modelC_20241210_142058_Abs-SG0.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+Results for Model C generated at 2024-12-10 14:20:58
+--------------------------------------------------
+Indicator 1 (pH.in.CaCl2) - RMSE: 1.1329, R2: 0.1905
+Indicator 2 (pH.in.H2O) - RMSE: 1.1135, R2: 0.1565
+Indicator 3 (OC) - RMSE: 8.1022, R2: 0.4485
+Indicator 4 (CaCO3) - RMSE: 10.7757, R2: 0.0961
+Indicator 5 (N) - RMSE: 1.6102, R2: 0.4662
+Indicator 6 (P) - RMSE: 6.1459, R2: -0.0480
+Indicator 7 (K) - RMSE: 13.9761, R2: 0.0675
+Indicator 8 (CEC) - RMSE: 3.2445, R2: 0.3522
+Average Test Loss: 27.7911

results1/metrics_modelC_20241210_143517_Abs-SG0-SNV.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+Results for Model C generated at 2024-12-10 14:35:17
+--------------------------------------------------
+Indicator 1 (pH.in.CaCl2) - RMSE: 1.0763, R2: 0.3406
+Indicator 2 (pH.in.H2O) - RMSE: 1.0515, R2: 0.3294
+Indicator 3 (OC) - RMSE: 6.0624, R2: 0.8271
+Indicator 4 (CaCO3) - RMSE: 8.5423, R2: 0.6430
+Indicator 5 (N) - RMSE: 1.4266, R2: 0.6711
+Indicator 6 (P) - RMSE: 6.1328, R2: -0.0392
+Indicator 7 (K) - RMSE: 13.8842, R2: 0.0918
+Indicator 8 (CEC) - RMSE: 3.3248, R2: 0.2857
+Average Test Loss: 21.7375

results1/metrics_modelC_20241210_144926_Abs-SG1.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+Results for Model C generated at 2024-12-10 14:49:26
+--------------------------------------------------
+Indicator 1 (pH.in.CaCl2) - RMSE: 1.1943, R2: 0.0002
+Indicator 2 (pH.in.H2O) - RMSE: 1.1764, R2: -0.0507
+Indicator 3 (OC) - RMSE: 9.0927, R2: 0.1252
+Indicator 4 (CaCO3) - RMSE: 11.3249, R2: -0.1028
+Indicator 5 (N) - RMSE: 1.8572, R2: 0.0553
+Indicator 6 (P) - RMSE: 6.0900, R2: -0.0105
+Indicator 7 (K) - RMSE: 14.0982, R2: 0.0345
+Indicator 8 (CEC) - RMSE: 3.6072, R2: 0.0103
+Average Test Loss: 30.6785

results1/metrics_modelC_20241210_150332_Abs-SG1-SNV.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+Results for Model C generated at 2024-12-10 15:03:32
+--------------------------------------------------
+Indicator 1 (pH.in.CaCl2) - RMSE: 0.9366, R2: 0.6220
+Indicator 2 (pH.in.H2O) - RMSE: 0.9237, R2: 0.6007
+Indicator 3 (OC) - RMSE: 6.9911, R2: 0.6943
+Indicator 4 (CaCO3) - RMSE: 7.3689, R2: 0.8023
+Indicator 5 (N) - RMSE: 1.6105, R2: 0.4658
+Indicator 6 (P) - RMSE: 5.9795, R2: 0.0609
+Indicator 7 (K) - RMSE: 13.8154, R2: 0.1097
+Indicator 8 (CEC) - RMSE: 3.5810, R2: 0.0387
+Average Test Loss: 21.2949

results1/metrics_modelC_20241210_151845_Abs-SG2.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+Results for Model C generated at 2024-12-10 15:18:45
+--------------------------------------------------
+Indicator 1 (pH.in.CaCl2) - RMSE: 1.5963, R2: -2.1907
+Indicator 2 (pH.in.H2O) - RMSE: 1.5703, R2: -2.3361
+Indicator 3 (OC) - RMSE: 9.5270, R2: -0.0543
+Indicator 4 (CaCO3) - RMSE: 11.4900, R2: -0.1685
+Indicator 5 (N) - RMSE: 1.9782, R2: -0.2159
+Indicator 6 (P) - RMSE: 6.6043, R2: -0.3975
+Indicator 7 (K) - RMSE: 15.9916, R2: -0.5983
+Indicator 8 (CEC) - RMSE: 4.1587, R2: -0.7483
+Average Test Loss: 35.7320

results1/metrics_modelC_20241210_153333_Abs-SG2-SNV.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+Results for Model C generated at 2024-12-10 15:33:33
+--------------------------------------------------
+Indicator 1 (pH.in.CaCl2) - RMSE: 0.8667, R2: 0.7227
+Indicator 2 (pH.in.H2O) - RMSE: 0.8609, R2: 0.6986
+Indicator 3 (OC) - RMSE: 5.8075, R2: 0.8544
+Indicator 4 (CaCO3) - RMSE: 6.3981, R2: 0.8877
+Indicator 5 (N) - RMSE: 1.4128, R2: 0.6836
+Indicator 6 (P) - RMSE: 5.8376, R2: 0.1469
+Indicator 7 (K) - RMSE: 12.9567, R2: 0.3112
+Indicator 8 (CEC) - RMSE: 3.2131, R2: 0.3770
+Average Test Loss: 17.8127