1. 程式人生 > >[pytorch筆記] torch.nn vs torch.nn.functional; model.eval() vs torch.no_grad(); nn.Sequential() vs nn.moduleList

[pytorch筆記] torch.nn vs torch.nn.functional; model.eval() vs torch.no_grad(); nn.Sequential() vs nn.moduleList

sage mar 函數 關閉 style eval blank n) ref

1. torch.nn與torch.nn.functional之間的區別和聯系

https://blog.csdn.net/GZHermit/article/details/78730856

nnnn.functional之間的差別如下,我們以conv2d的定義為例

torch.nn.Conv2d

import torch.nn.functional as F
class Conv2d(_ConvNd):

    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
                 padding
=0, dilation=1, groups=1, bias=True): kernel_size = _pair(kernel_size) stride = _pair(stride) padding = _pair(padding) dilation = _pair(dilation) super(Conv2d, self).__init__( in_channels, out_channels, kernel_size, stride, padding, dilation, False, _pair(0), groups, bias)
def forward(self, input): return F.conv2d(input, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)

torch.nn.functional.conv2d

def conv2d(input, weight, bias=None, stride=1, padding=0, dilation=1,
           groups=1):

    if input is
not None and input.dim() != 4: raise ValueError("Expected 4D tensor as input, got {}D tensor instead.".format(input.dim())) f = _ConvNd(_pair(stride), _pair(padding), _pair(dilation), False, _pair(0), groups, torch.backends.cudnn.benchmark, torch.backends.cudnn.deterministic, torch.backends.cudnn.enabled) return f(input, weight, bias)

區別:

1. nn.Conv2d是一個類;F.conv2d是一個函數

聯系:

nn.Conv2d的forword()函數是用F.conv2d()實現的,兩者功能並無區別。

(在Module類裏的__call__實現了forward()函數的調用,所以當實例化nn.Conv2d類時,forward()函數也被執行了,詳細可閱讀torch源碼)

為什麽要有這樣的兩種實現方式同時存在呢?

原因其實在於,為了兼顧靈活性和便利性。

在建圖過程中,往往有兩種層,一種如全連接層,卷積層等,當中有 Variable, 另一種如 Pooling層,ReLU層,當中沒有 Variable.

如果所有的層都用 nn.functional 來定義,那麽所有的Variable, 如 weights, bias 等,都需要用戶手動定義,非常不便;

如果所有的層都用 nn 來定義,那麽即便是簡單的計算都需要建類來做,而這些可以用更為簡單的函數來代替。

綜上,在定義網絡的時候,如果層內有 Variable, 那麽用 nn 定義, 反之,則用 nn.functional定義。

2. ‘model.eval()’ vs ‘with torch.no_grad()’

https://discuss.pytorch.org/t/model-eval-vs-with-torch-no-grad/19615

1. model.eval() will notify all your layers that you are in eval mode, that way, batchnorm or dropout layers will work in eval model instead of training mode.

model.eval()會告知模型中的所有layers, 目前處在eval模式,batchnorm和dropout層等都會在eval模式中工作。
2. torch.no_grad() impacts the autograd engine and deactivate it. It will reduce memory usage and speed up computations but you won’t be able to backprop (which you don’t want in an eval script).

torch.no_grad() 會影響 autograd 引擎,並關閉它。這樣會降低內存的使用並且加速計算。但是將不可以使用backprop.

3. nn.Sequential() vs nn.moduleList

https://blog.csdn.net/e01528/article/details/84397174

對於cnn前饋神經網絡如果前饋一次寫一個forward函數會有些麻煩,在此就有兩種簡化方式,ModuleList和Sequential。

其中Sequential是一個特殊的module,它包含幾個子Module,前向傳播時會將輸入一層接一層的傳遞下去。

ModuleList也是一個特殊的module,可以包含幾個子module,可以像用list一樣使用它,但不能直接把輸入傳給ModuleList。

3.1 nn.Sequential()

1. 模型的建立方式:

import torch 
import torch.nn as nn 
from torch.autograd import Variable

‘‘‘ nn.Sequential
‘‘‘

net1 = nn.Sequential()
net1.add_module(conv, nn.Conv2d(3, 3, 3))
# net1.add_module(‘conv2‘, nn.Conv2d(3, 3, 2))
net1.add_module(batchnorm, nn.BatchNorm2d(3))
net1.add_module(activation_layer, nn.ReLU())

print("net1:")
print(net1)

# net1:
# Sequential(
#   (conv): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
#   (batchnorm): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
#   (activation_layer): ReLU()
# )


net2 = nn.Sequential(
    nn.Conv2d(3, 3, 3),
    nn.BatchNorm2d(3),
    nn.ReLU()
    )

print("net2:")
print(net2)

# net2:
# Sequential(
#   (0): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
#   (1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
#   (2): ReLU()
# )


from collections import OrderedDict
net3 = nn.Sequential(OrderedDict([
        (conv, nn.Conv2d(3, 3, 3)),
        (batchnorm, nn.BatchNorm2d(3)),
        (activation_layer, nn.ReLU())
    ]))

print("net3:")
print(net3)

# net3:
# Sequential(
#   (conv): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
#   (batchnorm): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
#   (activation_layer): ReLU()
# )

2. 獲取子Module對象

# get the sub module by the name or index
print("Get the sub module by the name or index:")
print(net1.conv)
print(net2[0])
print(net3.conv)

# Get the sub module by the name or index:
# Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
# Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
# Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))

3. 調用模型

# use the model
input = Variable(torch.rand(1, 3, 4, 4))
output1 = net1(input)
output2 = net2(input)
output3 = net3(input)
output4 = net3.activation_layer(net1.batchnorm(net1.conv(input)))
print("output1:", output1)
print("output2:", output2)
print("output3:", output3)
print("output4:", output4)
# output1: tensor([[[[0.0000, 0.1066],
#           [0.0075, 0.1379]],

#          [[0.0558, 0.9517],
#           [0.0000, 0.0000]],

#          [[0.5355, 0.0000],
#           [0.4478, 0.0000]]]], grad_fn=<ThresholdBackward0>)
# output2: tensor([[[[0.4227, 0.3509],
#           [0.0868, 0.0000]],

#          [[0.0000, 0.0034],
#           [0.0038, 0.0000]],

#          [[0.0000, 0.0000],
#           [0.4002, 0.1882]]]], grad_fn=<ThresholdBackward0>)
# output3: tensor([[[[0.0000, 0.0000],
#           [0.4779, 0.0000]],

#          [[0.0000, 1.5064],
#           [0.0000, 0.1515]],

#          [[0.7417, 0.0000],
#           [0.3366, 0.0000]]]], grad_fn=<ThresholdBackward0>)
# output4: tensor([[[[0.0000, 0.1066],
#           [0.0075, 0.1379]],

#          [[0.0558, 0.9517],
#           [0.0000, 0.0000]],

#          [[0.5355, 0.0000],
#           [0.4478, 0.0000]]]], grad_fn=<ThresholdBackward0>)

3.2 nn.moduleList

它被設計用來存儲任意數量的nn. module。

如果在構造函數__init__中用到list、tuple、dict等對象時,一定要思考是否應該用ModuleList或ParameterList代替。

1. 可以采用叠代或下標索引方式獲取Module

# 1. support index and enumerate
class MyModule(nn.Module):
    def __init__(self):
        super(MyModule, self).__init__()
        self.linears = nn.ModuleList([nn.Linear(10, 10) for i in range(10)])

    def forward(self, x):
        for i, l in enumerate(self.linears):
            x = self.linears[i // 2](x) + l(x)
        return x

2. extend 和 append方法

nn.moduleList定義對象後,有extend和append方法,用法和python中一樣。

extend是添加另一個modulelist ;

append是添加另一個module。

# 2. extend a modulelist; attend a module
class LinearNet(nn.Module):
    """docstring for LinearNet"""
    def __init__(self, input_size, num_layers, layers_size, output_size):
        super(LinearNet, self).__init__()
        self.linears = nn.ModuleList([nn.Linear(input_size, layers_size)])
        self.linears.extend([nn.Linear(layers_size, layers_size) for i in range(1, num_layers - 1)])
        self.linears.append(nn.Linear(layers_size, output_size))

model1 = LinearNet(5, 3, 4, 2)
print("---model LinearNet---")
print(model1)
print()

# ---model LinearNet---
# LinearNet(
#   (linears): ModuleList(
#     (0): Linear(in_features=5, out_features=4, bias=True)
#     (1): Linear(in_features=4, out_features=4, bias=True)
#     (2): Linear(in_features=4, out_features=2, bias=True)
#   )
# )

3. 建立以及使用方法

# 3. create and use -- not implement the forward
modellist = nn.ModuleList([nn.Linear(3, 4), nn.ReLU(), nn.Linear(4, 2)])
input = Variable(torch.randn(1, 3))
for model in modellist:
    input = model(input)

# output = modellist(input) --> wrong 因為modellist沒有實現forward方法

4. ModuleList與list的區別

普通list中的子module並不能被主module所識別,而ModuleList中的子module能夠被主module所識別。這意味著如果用list保存子module,將無法調整其參數,因其未加入到主module的參數中。

除ModuleList之外還有ParameterList,其是一個可以包含多個parameter的類list對象。在實際應用中,使用方式與ModuleList類似。

class MyModule_list(nn.Module):
    """docstring for MyModule_list"""
    def __init__(self):
        super(MyModule_list, self).__init__()
        self.list = [nn.Linear(3, 4), nn.ReLU()]
        self.module_list = nn.ModuleList([nn.Conv2d(3, 3, 3), nn.ReLU()])

    def forward(self):
        pass
model = MyModule_list()
print(model)

# MyModule_list(
#   (module_list): ModuleList(
#     (0): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
#     (1): ReLU()
#   )
# )
# 只有ModuleList的信息,並沒有list的信息

for name, param in model.named_parameters():
    print(name, param.size())

# module_list.0.weight torch.Size([3, 3, 3, 3])
# module_list.0.bias torch.Size([3])
# 只有ModuleList的信息,並沒有list的信息

[pytorch筆記] torch.nn vs torch.nn.functional; model.eval() vs torch.no_grad(); nn.Sequential() vs nn.moduleList