1. 程式人生 > >基於pytorch的改進的VDSR(基於FSRCNN的)

基於pytorch的改進的VDSR(基於FSRCNN的)

本博文為本人對FSRCNN+residual後的實驗的分析博文。不完全採用VDSR的程式碼,只對其中的residual部分做了參考。

之前的博文《學習筆記之——基於深度學習的影象超解析度重構》也介紹過VDSR,VDSR是基於SRCNN改進的,這裡做的是基於FSRCNN的VDSR。

VDSR的網路結構如下:

那麼本博文其實就是把這個網路結構中的SR部分由SRCNN改為FSRCNN

基於pytorch的VDSR連結(https://github.com/twtygqyy/pytorch-vdsr

paper(https://arxiv.org/pdf/1511.04587.pdf

下面給出修改後的程式碼:

python train.py -opt options/train/train_sr.json

#######################################################################################################3
#FSRCNN
class FSRCNN(nn.Module):
    def __init__(self, in_nc, out_nc, nf, nb, upscale=4, norm_type='batch', act_type='relu', \
            mode='NAC', res_scale=1, upsample_mode='upconv'):##play attention the upscales
        super(FSRCNN,self).__init__()
        #Feature extractionn
        self.conv1=nn.Conv2d(in_channels=in_nc,out_channels=nf,kernel_size=5,stride=1,padding=2)#nf=56.add padding ,make the data alignment
        self.prelu1=nn.PReLU()

        #Shrinking
        self.conv2=nn.Conv2d(in_channels=nf,out_channels=12,kernel_size=1,stride=1,padding=0)
        self.prelu2 = nn.PReLU()

        # Non-linear Mapping
        self.conv3=nn.Conv2d(in_channels=12,out_channels=12,kernel_size=3,stride=1,padding=1)
        self.prelu3 = nn.PReLU()
        self.conv4=nn.Conv2d(in_channels=12,out_channels=12,kernel_size=3,stride=1,padding=1)
        self.prelu4 = nn.PReLU()
        self.conv5=nn.Conv2d(in_channels=12,out_channels=12,kernel_size=3,stride=1,padding=1)
        self.prelu5 = nn.PReLU()
        self.conv6=nn.Conv2d(in_channels=12,out_channels=12,kernel_size=3,stride=1,padding=1)
        self.prelu6 = nn.PReLU()

        # Expanding
        self.conv7=nn.Conv2d(in_channels=12,out_channels=nf,kernel_size=1,stride=1,padding=0)
        self.prelu7 = nn.PReLU()

        # Deconvolution
        self.last_part= nn.ConvTranspose2d(in_channels=nf,out_channels=in_nc,kernel_size=9,stride=upscale, padding=4, output_padding=1)
        #for the residual
        self.DECO_part= nn.ConvTranspose2d(in_channels=in_nc,out_channels=in_nc,kernel_size=9,stride=upscale, padding=4, output_padding=1)



    def forward(self, x):#
         residual=self.DECO_part(x)###########
         out = self.prelu1(self.conv1(x))
         out = self.prelu2(self.conv2(out))
         out = self.prelu3(self.conv3(out))
         out = self.prelu4(self.conv4(out))
         out = self.prelu5(self.conv5(out))
         out = self.prelu6(self.conv6(out))
         out = self.prelu7(self.conv7(out))
         out = self.last_part(out)
         out = torch.add(out,residual)####################

         return out


##########################################################################################################

結果如下:

執行得特別得慢。。。。。

結果對比:

不採用反捲積,改為採用bicubic,程式碼修改如下:

#######################################################################################################3
#FSRCNN
class FSRCNN(nn.Module):
    def __init__(self, in_nc, out_nc, nf, nb, upscale=4, norm_type='batch', act_type='relu', \
            mode='NAC', res_scale=1, upsample_mode='upconv'):##play attention the upscales
        super(FSRCNN,self).__init__()
        #Feature extractionn
        self.conv1=nn.Conv2d(in_channels=in_nc,out_channels=nf,kernel_size=5,stride=1,padding=2)#nf=56.add padding ,make the data alignment
        self.prelu1=nn.PReLU()
 
        #Shrinking
        self.conv2=nn.Conv2d(in_channels=nf,out_channels=12,kernel_size=1,stride=1,padding=0)
        self.prelu2 = nn.PReLU()
 
        # Non-linear Mapping
        self.conv3=nn.Conv2d(in_channels=12,out_channels=12,kernel_size=3,stride=1,padding=1)
        self.prelu3 = nn.PReLU()
        self.conv4=nn.Conv2d(in_channels=12,out_channels=12,kernel_size=3,stride=1,padding=1)
        self.prelu4 = nn.PReLU()
        self.conv5=nn.Conv2d(in_channels=12,out_channels=12,kernel_size=3,stride=1,padding=1)
        self.prelu5 = nn.PReLU()
        self.conv6=nn.Conv2d(in_channels=12,out_channels=12,kernel_size=3,stride=1,padding=1)
        self.prelu6 = nn.PReLU()
 
        # Expanding
        self.conv7=nn.Conv2d(in_channels=12,out_channels=nf,kernel_size=1,stride=1,padding=0)
        self.prelu7 = nn.PReLU()
 
        # Deconvolution
        self.last_part= nn.ConvTranspose2d(in_channels=nf,out_channels=in_nc,kernel_size=9,stride=upscale, padding=4, output_padding=1)
 
 
    def forward(self, x):#
         
         residual=x
         m = nn.Upsample(scale_factor=2)
         residual=m(residual)
         #print(residual.size())
         #exit()

         out = self.prelu1(self.conv1(x))
         out = self.prelu2(self.conv2(out))
         out = self.prelu3(self.conv3(out))
         out = self.prelu4(self.conv4(out))
         out = self.prelu5(self.conv5(out))
         out = self.prelu6(self.conv6(out))
         out = self.prelu7(self.conv7(out))
         out = self.last_part(out)
         out=torch.add(out,residual)###################

         return out 
##########################################################################################################

 

網路沒有辦法收斂。。。。我也不知道什麼原因了。。。。。感覺就是採用了pytorch自帶的函式這樣插值就會這樣。。。。。那還是採用反捲積來實驗resudual把

再試試程式碼如下:

#######################################################################################################3
#FSRCNN
class FSRCNN(nn.Module):
    def __init__(self, in_nc, out_nc, nf, nb, upscale=4, norm_type='batch', act_type='relu', \
            mode='NAC', res_scale=1, upsample_mode='upconv'):##play attention the upscales
        super(FSRCNN,self).__init__()
        #Feature extractionn
        self.conv1=nn.Conv2d(in_channels=in_nc,out_channels=nf,kernel_size=5,stride=1,padding=2)#nf=56.add padding ,make the data alignment
        self.prelu1=nn.PReLU()
 
        #Shrinking
        self.conv2=nn.Conv2d(in_channels=nf,out_channels=12,kernel_size=1,stride=1,padding=0)
        self.prelu2 = nn.PReLU()
 
        # Non-linear Mapping
        self.conv3=nn.Conv2d(in_channels=12,out_channels=12,kernel_size=3,stride=1,padding=1)
        self.prelu3 = nn.PReLU()
        self.conv4=nn.Conv2d(in_channels=12,out_channels=12,kernel_size=3,stride=1,padding=1)
        self.prelu4 = nn.PReLU()
        self.conv5=nn.Conv2d(in_channels=12,out_channels=12,kernel_size=3,stride=1,padding=1)
        self.prelu5 = nn.PReLU()
        self.conv6=nn.Conv2d(in_channels=12,out_channels=12,kernel_size=3,stride=1,padding=1)
        self.prelu6 = nn.PReLU()
 
        # Expanding
        self.conv7=nn.Conv2d(in_channels=12,out_channels=nf,kernel_size=1,stride=1,padding=0)
        self.prelu7 = nn.PReLU()
 
        # Deconvolution
        self.last_part= nn.ConvTranspose2d(in_channels=nf,out_channels=in_nc,kernel_size=9,stride=upscale, padding=4, output_padding=1)

        #Upsmaple
        self.m=nn.UpsamplingBilinear2d(scale_factor=2)
 
 
    def forward(self, x):#
         
         residual_x=x
         residual=self.m(residual_x)
         #print(residual.size())
         #exit()
 
         out = self.prelu1(self.conv1(x))
         out = self.prelu2(self.conv2(out))
         out = self.prelu3(self.conv3(out))
         out = self.prelu4(self.conv4(out))
         out = self.prelu5(self.conv5(out))
         out = self.prelu6(self.conv6(out))
         out = self.prelu7(self.conv7(out))
         out = self.last_part(out)
         out=torch.add(out,residual)###################
 
         return out 
##########################################################################################################

結果也是一開始不收斂,但是慢慢得訓練就好多了,可惜最終得PSNR還是比較低

再換setting

#######################################################################################################3
#FSRCNN
class FSRCNN(nn.Module):
    def __init__(self, in_nc, out_nc, nf, nb, upscale=4, norm_type='batch', act_type='relu', \
            mode='NAC', res_scale=1, upsample_mode='upconv'):##play attention the upscales
        super(FSRCNN,self).__init__()
        #Feature extractionn
        self.conv1=nn.Conv2d(in_channels=in_nc,out_channels=nf,kernel_size=5,stride=1,padding=2)#nf=56.add padding ,make the data alignment
        self.prelu1=nn.PReLU()
 
        #Shrinking
        self.conv2=nn.Conv2d(in_channels=nf,out_channels=12,kernel_size=1,stride=1,padding=0)
        self.prelu2 = nn.PReLU()
 
        # Non-linear Mapping
        self.conv3=nn.Conv2d(in_channels=12,out_channels=12,kernel_size=3,stride=1,padding=1)
        self.prelu3 = nn.PReLU()
        self.conv4=nn.Conv2d(in_channels=12,out_channels=12,kernel_size=3,stride=1,padding=1)
        self.prelu4 = nn.PReLU()
        self.conv5=nn.Conv2d(in_channels=12,out_channels=12,kernel_size=3,stride=1,padding=1)
        self.prelu5 = nn.PReLU()
        self.conv6=nn.Conv2d(in_channels=12,out_channels=12,kernel_size=3,stride=1,padding=1)
        self.prelu6 = nn.PReLU()
 
        # Expanding
        self.conv7=nn.Conv2d(in_channels=12,out_channels=nf,kernel_size=1,stride=1,padding=0)
        self.prelu7 = nn.PReLU()
 
        # Deconvolution
        self.last_part= nn.ConvTranspose2d(in_channels=nf,out_channels=in_nc,kernel_size=9,stride=upscale, padding=4, output_padding=1)
 
        #Upsmaple
        self.m=nn.UpsamplingNearest2d(scale_factor=2)
 
 
    def forward(self, x):#
         
         residual_x=x
         residual=self.m(residual_x)
         #print(residual.size())
         #exit()
 
         out = self.prelu1(self.conv1(x))
         out = self.prelu2(self.conv2(out))
         out = self.prelu3(self.conv3(out))
         out = self.prelu4(self.conv4(out))
         out = self.prelu5(self.conv5(out))
         out = self.prelu6(self.conv6(out))
         out = self.prelu7(self.conv7(out))
         out = self.last_part(out)
         out=torch.add(out,residual)###################
 
         return out 
##########################################################################################################

這樣得結果看似學習率一開始設定太大了~~~那試試把學習率設定低一點看看

 

 

補充

 can't convert CUDA tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

 

關於pytorch中的上取樣https://blog.csdn.net/g11d111/article/details/82855946

 

class torch.nn.UpsamplingBilinear2d(size=None, scale_factor=None)[source]

https://pytorch-cn.readthedocs.io/zh/latest/package_references/torch-nn/#class-torchnnupsamplingbilinear2dsizenone-scale_factornonesource