构建一个简单 Seq2Seq 模型,通过更改训练数据比例、调整训练参数等对比模型预测效果。

class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(self.input_size, self.hidden_size, batch_first=True)
    def forward(self, input_seq, hidden=None):
        # x(batch_size, input_size)
        # input_seq = input_seq.unsqueeze(1)
        # x(batch_size, 1, input_size)
        if hidden is None:
            h_0 = torch.zeros(1, input_seq.size(0), self.hidden_size).to(input_seq.device)
            c_0 = torch.zeros(1, input_seq.size(0), self.hidden_size).to(input_seq.device)
            output, hidden  = self.lstm(input_seq, (h_0, c_0))
        else:
            output, hidden= self.lstm(input_seq, hidden)
        return output, hidden
class Decoder(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_directions = 1
        self.lstm = nn.LSTM(input_size, self.hidden_size, batch_first=True)
        self.linear = nn.Linear(self.hidden_size, self.output_size)
    def forward(self, input_seq, hidden):
        # x(batch_size, input_size)
        input_seq = input_seq.unsqueeze(1)
        # x(batch_size, 1, input_size)
        output, hidden = self.lstm(input_seq, hidden)
        # output(seq_len, num * hidden_size)
        pred = self.linear(output[:, -1, :])  # pred(1, output_size)
        return pred, hidden
class Seq2Seq(nn.Module):
    def __init__(self, input_size, output_size):
        super().__init__()
        self.output_size = output_size
        self.hidden_size = 64
        self.Encoder = Encoder(input_size, self.hidden_size)
        self.Decoder = Decoder(input_size, self.hidden_size, output_size)
    def forward(self, encoder_inputs, decoder_inputs=None, option=1, teacher_forcing_ratio=0.5):
        # x(batch_size, seq_len, input_size)
        if option != 1:
            raise ValueError("option 输入不合法")
        target_len = self.output_size  # 预测步长
        # encoder_inputs = encoder_inputs.permute(1, 0, 2)
        # x(seq_len, batch_size, input_size)
        batch_size, seq_len, input_size = encoder_inputs.shape[0], encoder_inputs.shape[1], encoder_inputs.shape[2]
        encoder_output, encoder_hidden = self.Encoder(encoder_inputs)
        # 是否使用 Teacher Forcing
        if random.random() < teacher_forcing_ratio and decoder_inputs is not None:
            use_teacher_forcing = True 
        else:
            use_teacher_forcing = False
        # Decoder's first input will be the '<sos>' token
        decoder_input_t = torch.tensor([[0.0]]*batch_size, device=encoder_inputs.device)  # SOS
        decoder_hidden = encoder_hidden
        outputs = torch.zeros(batch_size, target_len).to(encoder_inputs.device) # 存放预测结果
        for t in range(target_len):
            decoder_output, decoder_hidden = self.Decoder(decoder_input_t, decoder_hidden)
            outputs[:, t] = decoder_output.squeeze()
            if use_teacher_forcing:
                # Teacher Forcing: 利用实际目标作为下一步的输入
                decoder_input_t = decoder_inputs.index_select(1, torch.tensor([t]))
            else:
                # 没有 Teacher Forcing:使用模型预测的输出作为下一步输入
                decoder_input_t = decoder_output
        return outputs

# 查看模型效果

效果如下:

层数 活性数据 1 活性数据 2 色谱数据
1 iloc1_epoch100_seq7_layer1_predict iloc3_epoch100_seq7_layer1_predict iloc5_epoch100_seq7_layer1_predict

🚩Encode 用的是一层 LSTM,Decode 用的是一层 LSTM 。跟直接用两层 LSTM 相比,效果还是有差距。

# 调整训练参数

主要修改的是 seq_length 长度

长度 活性数据 1 活性数据 2 色谱数据
7 iloc1_epoch100_seq7_layer1_predict iloc3_epoch100_seq7_layer1_predict iloc5_epoch100_seq7_layer1_predict
9 iloc1_epoch100_seq9_layer1_predict iloc3_epoch100_seq9_layer1_predict iloc5_epoch100_seq9_layer1_predict
11 iloc1_epoch100_seq11_layer1_predict iloc3_epoch100_seq11_layer1_predict iloc5_epoch100_seq11_layer1_predict
15 iloc1_epoch100_seq15_layer1_predict iloc3_epoch100_seq15_layer1_predict iloc5_epoch100_seq15_layer1_predict

🚩 增加 seq_length 长度,整体来说对数据没有太大影响,当等于 11 时,活性数据的预测结果出现了波动,预测数据逐渐平滑。为了检验到更多异常值,将 seq_length 长度设为 9 接着讨论训练数据的影响。

# 改变训练数据比例

只改变训练数据比例,5%,10%,15%,20% 的训练数据,训练数据分配情况参考 FNN改变训练数据比例 部分。本小节只展示不同训练比例对该模型的影响。

根据不同比例训练得到的结果图像如下所示:

比例 活性数据 1 活性数据 2 色谱数据
5 iloc1_epoch100_seq9_layer1_predict iloc3_epoch100_seq9_layer1_predict iloc5_epoch100_seq9_layer1_predict
10 iloc1_epoch100_seq9_layer1_predict iloc3_epoch100_seq9_layer1_predict iloc5_epoch100_seq9_layer1_predict
15 iloc1_epoch100_seq9_layer1_predict iloc3_epoch100_seq9_layer1_predict iloc5_epoch100_seq9_layer1_predict
20 iloc1_epoch100_seq9_layer1_predict iloc3_epoch100_seq9_layer1_predict iloc5_epoch100_seq9_layer1_predict

🚩 可以看到,随着训练比例增大,模型预测的结果会越贴合真实值。