作用 RNN就会把之前的记忆都累积起来, 一起分析.
LSTM LSTM 就像延缓记忆衰退的良药, 可以带来更好的结果. 普通的RNN容易健忘
数据集 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 import torch.utils .data as Data import torchimport torchvisiontorch.manual_seed (1 ) # reproducible # Hyper Parameters EPOCH = 1 # 训练整批数据多少次, 为了节约时间, 我们只训练一次BATCH_SIZE = 64 TIME_STEP = 28 # rnn 时间步数 / 图片高度INPUT_SIZE = 28 # rnn 每步输入值 / 图片每行像素LR = 0.01 # learning rateDOWNLOAD_MNIST = True # 如果你已经下载好了mnist数据就写上 Fasle # Mnist 手写数字 train_data = torchvision.datasets .MNIST ( root='./mnist/' , # 保存或者提取位置 train=True , # this is training data transform=torchvision.transforms .ToTensor (), # 转换 PIL .Image or numpy.ndarray 成 # torch.FloatTensor (C x H x W), 训练的时候 normalize 成 [0.0 , 1.0 ] 区间 download=DOWNLOAD_MNIST , # 没下载就下载, 下载了就不用再下了 ) test_data = torchvision.datasets .MNIST (root='./mnist/' , train=False ) # 批训练 50samples, 1 channel, 28x28 (50 , 1 , 28 , 28 ) train_loader = Data .DataLoader (dataset=train_data, batch_size=BATCH_SIZE , shuffle=True ) # 为了节约时间, 我们测试时只测试前2000 个 test_x = torch.unsqueeze (test_data.data , dim=1 ).type (torch.FloatTensor )[:2000 ]/255. # shape from (2000 , 28 , 28 ) to (2000 , 1 , 28 , 28 ), value in range (0 ,1 ) test_y = test_data.targets [:2000 ]
RNN模型 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 from torch import nnclass RNN (nn.Module ): def __init__ (self): super (RNN , self).__init__ () self.rnn = nn.LSTM ( # LSTM 效果要比 nn.RNN () 好多了 input_size=28 , # 图片每行的数据像素点 hidden_size=64 , # rnn hidden unit num_layers=1 , # 有几层 RNN layers batch_first=True , # input & output 会是以 batch size 为第一维度的特征集 e.g . (batch, time_step, input_size) ) self.out = nn.Linear (64 , 10 ) # 输出层 def forward (self, x): # x shape (batch, time_step, input_size) # r_out shape (batch, time_step, output_size) # h_n shape (n_layers, batch, hidden_size) LSTM 有两个 hidden states, h_n 是分线, h_c 是主线 # h_c shape (n_layers, batch, hidden_size) r_out, (h_n, h_c) = self.rnn (x, None ) # None 表示 hidden state 会用全0 的 state # 选取最后一个时间点的 r_out 输出 # 这里 r_out[:, -1 , :] 的值也是 h_n 的值 out = self.out (r_out[:, -1 , :]) return out rnn = RNN () print (rnn)"" " RNN ( (rnn): LSTM(28, 64, batch_first=True) (out): Linear (64 -> 10) ) " ""
训练 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 from 数据集 import EPOCH ,train_loader,LR from RNN 模型 import rnn,nnimport torchoptimizer = torch.optim .Adam (rnn.parameters (), lr=LR ) # optimize all parameters loss_func = nn.CrossEntropyLoss () # the target label is not one-hotted # training and testing for epoch in range (EPOCH ): for step, (x, b_y) in enumerate (train_loader): # gives batch data b_x = x.view (-1 , 28 , 28 ) # reshape x to (batch, time_step, input_size) output = rnn (b_x) # rnn output loss = loss_func (output, b_y) # cross entropy loss optimizer.zero_grad () # clear gradients for this training step loss.backward () # backpropagation, compute gradients optimizer.step () # apply gradients "" " ... Epoch: 0 | train loss: 0.0945 | test accuracy: 0.94 Epoch: 0 | train loss: 0.0984 | test accuracy: 0.94 Epoch: 0 | train loss: 0.0332 | test accuracy: 0.95 Epoch: 0 | train loss: 0.1868 | test accuracy: 0.96 " "" import 预测
预测 1 2 3 4 5 6 7 8 9 10 11 import torchfrom RNN 模型 import rnnfrom 数据集 import test_x,test_ytest_output = rnn (test_x[:10 ].view (-1 , 28 , 28 )) pred_y = torch.max (test_output, 1 )[1 ].data .numpy ().squeeze () print (pred_y, 'prediction number' )print (test_y[:10 ], 'real number' )"" " [7 2 1 0 4 1 4 9 5 9] prediction number [7 2 1 0 4 1 4 9 5 9] real number " ""
作者声明