程式碼

import torch
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms #從torchvision中引入影象轉換 #採用隨機批量梯度下降,batch_size設為64
batch_size = 64
#用Compose串聯多個“圖片變換操作”(此處將ToTensor和Normalize組合)
transform = transforms.Compose([
#ToTensor()將shape為(H, W, C)de numpy.darray或者img轉為shape為(C, H, W)的tensor,其將每一個數值歸一化到(0,1)
transforms.ToTensor(),
#標準化:使用公式" (x - mean) / std ",將每一個元素分佈到(-1, 1)
transforms.Normalize(mean = (0.1307,), std = (0.3081,)) #由於mnist資料集的圖片均為灰度圖片(單通道),所以mean和std各自值輸入了一個值
]) # 獲取訓練集
train_dataset = datasets.MNIST(
#指定儲存路徑
root = "./mnist",
#獲取的是訓練集
train = True,
#若在指定路徑下找不到目標檔案則會自動下載
download = True,
#對所獲取的資料集執行上述的transform處理
transform = transform
)
# 獲取測試集
test_dataset = datasets.MNIST(
root = "./mnist",
train = False,
download = True,
transform = transform
)
# 定義資料載入器
train_loader = DataLoader(train_dataset, shuffle = True, batch_size = batch_size)
test_loader = DataLoader(test_dataset, shuffle = False, batch_size = batch_size) # 定義網路模型
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
# 第一層卷積層採用Conv2d模組:輸入1維,輸出10維,卷積核尺寸5x5(此處輸入輸出的維度表示的是通道數),不擴充(padding),不設偏置
self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=5, padding=0, bias=False)
self.conv2 = torch.nn.Conv2d(10, 20, kernel_size=5)
# 池化層採用MaxPool2d模組:kernel_size=2表示池化視窗大小為2x2
self.pooling = torch.nn.MaxPool2d(kernel_size=2)
self.fc = torch.nn.Linear(320, 10) def forward(self, x):
#定義batch的大小是資料張量的第0個維度的資料,也就是每次傳入的批量大小
batch_size = x.size(0)
#先做卷積再做池化,然後啟用
x = F.relu(self.pooling(self.conv1(x)))
x = F.relu(self.pooling(self.conv2(x)))
# 改變x的形狀,為了匹配FC層的輸入(傳入fc層的需為二維矩陣)
x = x.view(batch_size, -1)
#送入全連線層
x = self.fc(x)
return x # 例項化模型
model = Model() # 構造多分類交叉熵損失函式
criterion = torch.nn.CrossEntropyLoss()
# 構造優化器:優化模型中的所有引數,學習率=0.01, 加入一個衝量0.5
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01, momentum=0.5) # 定義訓練過程
def train(epoch):
running_loss = 0
for batch_idx, data in enumerate(train_loader, 0):
inputs, labels = data
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step() running_loss += loss.item()
if(batch_idx+1) % 300 ==0:
print(f' [Epoch:{epoch+1},Btach_idx:{batch_idx+1}],loss:{running_loss / 300:.3f} ')
running_loss = 0 # 定義測試過程
def test():
# 已經預測結束且預測正確的樣本數(初始化為0 )
correct = 0
# 已經預測結束的樣本數(初始化為0)
total = 0
with torch.no_grad(): #測試過程不需要梯度優化
for data in test_loader:
images, labels = data
outputs = model(images)
# model最後輸出的是一個10維的矩陣(1行10列),返回‘預測最大值predicted’和‘預測最大值下標’_
_, predicted = torch.max(outputs.data, dim = 1)
#更新已預測結束的樣本數
total += labels.size(0)
# 更新已預測結束且預測正確的樣本數
correct += (predicted == labels).sum().item()
print(f' Accuracy on testdatset:{100 * (correct/total):.2f}% ') #輸出準確率 # 開始執行
if __name__ == '__main__':
for epoch in range(10):
train(epoch)
test()

執行效果

 [Epoch:1,Btach_idx:300],loss:0.627
[Epoch:1,Btach_idx:600],loss:0.190
[Epoch:1,Btach_idx:900],loss:0.143
Accuracy on testdataset:96.71%
[Epoch:2,Btach_idx:300],loss:0.115
[Epoch:2,Btach_idx:600],loss:0.097
[Epoch:2,Btach_idx:900],loss:0.086
Accuracy on testdataset:97.69%
[Epoch:3,Btach_idx:300],loss:0.080
[Epoch:3,Btach_idx:600],loss:0.073
[Epoch:3,Btach_idx:900],loss:0.069
Accuracy on testdataset:97.86%
[Epoch:4,Btach_idx:300],loss:0.062
[Epoch:4,Btach_idx:600],loss:0.064
[Epoch:4,Btach_idx:900],loss:0.061
Accuracy on testdataset:98.44%
[Epoch:5,Btach_idx:300],loss:0.052
[Epoch:5,Btach_idx:600],loss:0.051
[Epoch:5,Btach_idx:900],loss:0.059
Accuracy on testdataset:98.50%
[Epoch:6,Btach_idx:300],loss:0.049
[Epoch:6,Btach_idx:600],loss:0.048
[Epoch:6,Btach_idx:900],loss:0.050
Accuracy on testdataset:98.45%
[Epoch:7,Btach_idx:300],loss:0.047
[Epoch:7,Btach_idx:600],loss:0.041
[Epoch:7,Btach_idx:900],loss:0.045
Accuracy on testdataset:98.36%
[Epoch:8,Btach_idx:300],loss:0.040
[Epoch:8,Btach_idx:600],loss:0.042
[Epoch:8,Btach_idx:900],loss:0.041
Accuracy on testdataset:98.73%
[Epoch:9,Btach_idx:300],loss:0.032
[Epoch:9,Btach_idx:600],loss:0.041
[Epoch:9,Btach_idx:900],loss:0.038
Accuracy on testdataset:98.57%
[Epoch:10,Btach_idx:300],loss:0.033
[Epoch:10,Btach_idx:600],loss:0.035
[Epoch:10,Btach_idx:900],loss:0.036
Accuracy on testdataset:98.59%

補充

class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
# 第一層卷積層採用Conv2d模組:輸入1維,輸出10維,卷積核尺寸5x5(此處輸入輸出的維度表示的是通道數),不擴充(padding),不設偏置
self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=5, padding=0, bias=False)
self.conv2 = torch.nn.Conv2d(10, 20, kernel_size=5)
# 池化層採用MaxPool2d模組:kernel_size=2表示池化視窗大小為2x2
self.pooling = torch.nn.MaxPool2d(kernel_size=2)
self.fc = torch.nn.Linear(320, 10) def forward(self, x):
#定義batch的大小是資料張量的第0個維度的資料,也就是每次傳入的批量大小
batch_size = x.size(0)
#先做卷積再做池化,然後啟用
x = F.relu(self.pooling(self.conv1(x)))
x = F.relu(self.pooling(self.conv2(x)))
# 改變x的形狀,為了匹配FC層的輸入(傳入fc層的需為二維矩陣)
x = x.view(batch_size, -1)
#送入全連線層
x = self.fc(x)
return x

Q:self.fc = torch.nn.Linear(320, 10)中的320在不通過手算推理的前提下如何得知?

A:隨便填一個數字,執行程式碼,通過檢視報錯資訊獲取FC層的真實輸入維數