构建一个简单 TCN 模型,通过加更改训练数据比例、调整训练参数等对比模型预测效果。

class Chomp1d(nn.Module):
    def __init__(self, chomp_size):
        super(Chomp1d, self).__init__()
        self.chomp_size = chomp_size
    def forward(self, x):
        """
        其实这就是一个裁剪的模块,裁剪多出来的padding
        """
        return x[:, :, :-self.chomp_size].contiguous()
class TemporalBlock(nn.Module):
    def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2):
        """
        相当于一个Residual block
        :param n_inputs: int, 输入通道数
        :param n_outputs: int, 输出通道数
        :param kernel_size: int, 卷积核尺寸
        :param stride: int, 步长,一般为1
        :param dilation: int, 膨胀系数
        :param padding: int, 填充系数
        :param dropout: float, dropout比率
        """
        super(TemporalBlock, self).__init__()
        self.conv1 = weight_norm(nn.Conv1d(n_inputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation))
        # 经过 conv1,输出的 size 其实是 (Batch, input_channel, seq_len + padding)
        self.chomp1 = Chomp1d(padding)  # 裁剪掉多出来的 padding 部分,维持输出时间步为 seq_len
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)
        self.conv2 = weight_norm(nn.Conv1d(n_outputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation))
        self.chomp2 = Chomp1d(padding)  #  裁剪掉多出来的 padding 部分,维持输出时间步为 seq_len
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout)
        self.net = nn.Sequential(self.conv1, self.chomp1, self.relu1, self.dropout1,
                                 self.conv2, self.chomp2, self.relu2, self.dropout2)
        self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
        self.relu = nn.ReLU()
        # self.init_weights()
    def init_weights(self):
        """
        参数初始化
        :return:
        """
        self.conv1.weight.data.normal_(0, 0.01)
        self.conv2.weight.data.normal_(0, 0.01)
        if self.downsample is not None:
            self.downsample.weight.data.normal_(0, 0.01)
    def forward(self, x):
        """
        :param x: size of (Batch, input_channel, seq_len)
        :return:
        """
        out = self.net(x)
        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)
class TemporalConvNet(nn.Module):
    def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2):
        """
        TCN,目前paper给出的TCN结构很好的支持每个时刻为一个数的情况,即sequence结构,
        对于每个时刻为一个向量这种一维结构,勉强可以把向量拆成若干该时刻的输入通道,
        对于每个时刻为一个矩阵或更高维图像的情况,就不太好办。
        :param num_inputs: int, 输入通道数
        :param num_channels: list,每层的hidden_channel数,例如[25,25,25,25]表示有4个隐层,每层hidden_channel数为25
        :param kernel_size: int, 卷积核尺寸
        :param dropout: float, drop_out比率
        """
        super(TemporalConvNet, self).__init__()
        layers = []
        num_levels = len(num_channels)
        for i in range(num_levels):
            dilation_size = 2 ** i   # 膨胀系数:1,2,4,8……
            in_channels = num_inputs if i == 0 else num_channels[i-1]  # 确定每一层的输入通道数
            out_channels = num_channels[i]  # 确定每一层的输出通道数
            layers += [TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,
                                     padding=(kernel_size-1) * dilation_size, dropout=dropout)]
        self.network = nn.Sequential(*layers)
    def forward(self, x):
        """
        输入x的结构不同于RNN,一般RNN的size为(Batch, seq_len, channels)或者(seq_len, Batch, channels),
        这里把seq_len放在channels后面,把所有时间步的数据拼起来,当做Conv1d的输入尺寸,实现卷积跨时间步的操作,
        很巧妙的设计。
        
        :param x: size of (Batch, input_channel, seq_len)
        :return: size of (Batch, output_channel, seq_len)
        """
        return self.network(x)
class TCN(nn.Module):
    def __init__(self, input_size, output_size, num_channels, kernel_size, dropout):
        super(TCN, self).__init__()
        self.tcn = TemporalConvNet(input_size, num_channels, kernel_size = kernel_size, dropout = dropout)
        self.linear = nn.Linear(num_channels[-1], output_size)
 
    def forward(self, x, option):
        # x(batch_size, seq_len, input_size)
        x = x.permute(0, 2, 1)
        # x(batch_size, input_size, seq_len)
        if option == 1:
            y = self.tcn(x)#[N,C_out,L_out=L_in]
            y = self.linear(y[:, :, -1])
        else:
            raise ValueError("option 输入不合法")
        return y

# 查看模型效果

TCN 配置如下:

model_params = {
    # 'input_size',C_in
    'input_size':   input_size,
    # 单步,预测未来一个时刻
    'output_size':  label_length,
    'num_channels': [16] * 2,
    'kernel_size':  2,
    'dropout': .0
}

效果如下:

层数 活性数据 1 活性数据 2 色谱数据
1 iloc1_epoch100_seq7_layer1_predict iloc3_epoch100_seq7_layer1_predict iloc5_epoch100_seq7_layer1_predict

😂这效果嘎嘎猛,跟 CNNLSTM 模型有异曲同工之妙,点到为止,没继续更改配置。

# 调整训练参数

主要修改的是 seq_length 长度

长度 活性数据 1 活性数据 2 色谱数据
7 iloc1_epoch100_seq7_layer1_predict iloc3_epoch100_seq7_layer1_predict iloc5_epoch100_seq7_layer1_predict
9 iloc1_epoch100_seq9_layer1_predict iloc3_epoch100_seq9_layer1_predict iloc5_epoch100_seq9_layer1_predict
11 iloc1_epoch100_seq11_layer1_predict iloc3_epoch100_seq11_layer1_predict iloc5_epoch100_seq11_layer1_predict
15 iloc1_epoch100_seq15_layer1_predict iloc3_epoch100_seq15_layer1_predict iloc5_epoch100_seq15_layer1_predict

增加 seq_length 长度,对活性数据影响不大,对色谱数据有略微影响,折中考虑将 seq_length 长度设为 9 接着讨论训练数据的影响。

# 改变训练数据比例

只改变训练数据比例,5%,10%,15%,20% 的训练数据,训练数据分配情况参考 FNN改变训练数据比例 部分。本小节只展示不同训练比例对该模型的影响。

根据不同比例训练得到的结果图像如下所示:

比例 活性数据 1 活性数据 2 色谱数据
5 iloc1_epoch100_seq9_layer1_predict iloc3_epoch100_seq9_layer1_predict iloc5_epoch100_seq9_layer1_predict
10 iloc1_epoch100_seq9_layer1_predict iloc3_epoch100_seq9_layer1_predict iloc5_epoch100_seq9_layer1_predict
15 iloc1_epoch100_seq9_layer1_predict iloc3_epoch100_seq9_layer1_predict iloc5_epoch100_seq9_layer1_predict
20 iloc1_epoch100_seq9_layer1_predict iloc3_epoch100_seq9_layer1_predict iloc5_epoch100_seq9_layer1_predict

随着训练数据比例扩大,模型能够很好的拟合数据趋势,以至于一些小峰也得到了很好的拟合效果,不是我们期望的结果,因此训练比例可以确定为 5%