1. 程式人生 > >使用訓練好的caffe模型分類圖片(python版)

使用訓練好的caffe模型分類圖片(python版)

 英文官方文件:http://nbviewer.jupyter.org/github/BVLC/caffe/blob/master/examples/00-classification.ipynb

  • 匯入python caffe包
import numpy as np
import matplotlib.pyplot as plt
# display plots in this notebook
%matplotlib inline

# set display defaults
plt.rcParams['figure.figsize'] = (10, 10)        # large images
plt.rcParams['image.interpolation'] = 'nearest'  # don't interpolate: show square pixels
plt.rcParams['image.cmap'] = 'gray'  # use grayscale output rather than a (potentially misleading) color heatmap


import sys
import os
caffe_root = './'  #指定caffe的根目錄 
sys.path.insert(0, caffe_root + 'python')    #將caffe python介面檔案路徑新增到python path中
import caffe

# 判斷model檔案是否存在
if os.path.isfile(caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'):
    print 'CaffeNet found.'
else:
    print 'Downloading pre-trained CaffeNet model...'

  • 載入網路,建立輸入處理

使用python  caffe.io.loadImage介面讀取圖片,返回的是[0-1]返回的np.float32陣列

def load_image(filename, color=True):
    """
    Load an image converting from grayscale or alpha as needed.

    Parameters
    ----------
    filename : string
    color : boolean
        flag for color format. True (default) loads as RGB while False
        loads as intensity (if image is already grayscale).

    Returns
    -------
    image : an image with type np.float32 in range [0, 1]
        of size (H x W x 3) in RGB or
        of size (H x W x 1) in grayscale.
    """
    img = skimage.img_as_float(skimage.io.imread(filename, as_grey=not color)).astype(np.float32)
    if img.ndim == 2:
        img = img[:, :, np.newaxis]
        if color:
            img = np.tile(img, (1, 1, 3))
    elif img.shape[2] == 4:
        img = img[:, :, :3]
    return img

python Transformer介面會對load_image讀取的圖片做處理,注意raw_scale實在減去均值和其他處理之前,而input_scale實在這些操作之後

    def preprocess(self, in_, data):
        """
        Format input for Caffe:
        - convert to single
        - resize to input dimensions (preserving number of channels)
        - transpose dimensions to K x H x W
        - reorder channels (for instance color to BGR)
        - scale raw input (e.g. from [0, 1] to [0, 255] for ImageNet models)
        - subtract mean
        - scale feature

        Parameters
        ----------
        in_ : name of input blob to preprocess for
        data : (H' x W' x K) ndarray

        Returns
        -------
        caffe_in : (K x H x W) ndarray for input to a Net
        """
        self.__check_input(in_)
        caffe_in = data.astype(np.float32, copy=False)
        transpose = self.transpose.get(in_)
        channel_swap = self.channel_swap.get(in_)
        raw_scale = self.raw_scale.get(in_)
        mean = self.mean.get(in_)
        input_scale = self.input_scale.get(in_)
        in_dims = self.inputs[in_][2:]

        #1 resize大小
        if caffe_in.shape[:2] != in_dims:   
            caffe_in = resize_image(caffe_in, in_dims)
        
        #2 維度變換,H*W*C轉換成  C*H*W
        if transpose is not None:     
            caffe_in = caffe_in.transpose(transpose)
        
        #3 通道變換
        if channel_swap is not None: #RGB 
            caffe_in = caffe_in[channel_swap, :, :]
       
        #4 raw_scale 讀取的圖片數值範圍在[0,1]時,raw_scale = 255,轉換成[0,255]
        if raw_scale is not None:
            caffe_in *= raw_scale
        
        #5 減去均值
        if mean is not None:   
            caffe_in -= mean

        # input_scale = 0.00390625時, 圖片資料轉換成[0,1] 
        if input_scale is not None:
            caffe_in *= input_scale
        return caffe_in
# 使用cpu計算
caffe.set_mode_cpu()

model_def = caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt'
model_weights = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'

# 載入網路
net = caffe.Net(model_def,      # 模型定義檔案
                model_weights,  # 模型引數檔案
                caffe.TEST)     # 啟用測試模式 (e.g., don't perform dropout)

# 載入均值檔案,mu的shape是(3,256,256), mean(1)實在第一個維度上做均值,返回shape為(3,256)
# 再mean(1)後,返回形狀是(3),分別是rgb三個通道上均值
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1) 
print 'mean-subtracted values:', zip('BGR', mu)
#mean-subtracted values: [('B', 104.0069879317889), ('G', 116.66876761696767), ('R', 122.6789143406786)]

# create transformer for the input called 'data'
# 建立一個轉換器,名字叫‘data’
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})

# transformer會將channels變成最外面的維度, 即 (H,W,C) 變成(C, W, C)
transformer.set_transpose('data', (2,0,1))  
transformer.set_mean('data', mu)            # 每個通道上減去均值
transformer.set_raw_scale('data', 255)      # 從[0, 1]的範圍放大到[0, 255]
transformer.set_channel_swap('data', (2,1,0))  #修改通道順序,從RGB變成BGR
  • 使用CPU分類
# 為了演示批處理,將輸入的batch size修改成50
net.blobs['data'].reshape(50,        # batch size
                          3,         # 3通道
                          227, 227)  # 圖片大小為 227x227

# caffe.io.load_image讀取圖片值的範圍是0-1,cv2.imread讀取圖片值的範圍是0-255
image = caffe.io.load_image(caffe_root + 'examples/images/cat.jpg')
# transformer進行圖片預處理,包括圖片值轉換到0-255
transformed_image = transformer.preprocess('data', image)
plt.imshow(image)



# 圖片資料拷貝到net申請記憶體中
net.blobs['data'].data[...] = transformed_image

### 前向傳播,執行圖片分類。
output = net.forward()
# top blob可能有多個,使用'prob'索引,後面的0表示第一張圖片的輸出
output_prob = output['prob'][0]  
# 獲取分類編號
print 'predicted class is:', output_prob.argmax()
# 輸出predicted class is: 281
  • 驗證分裂是否正確是否正確
# 載入imageNet的label檔案
labels_file = caffe_root + 'data/ilsvrc12/synset_words.txt'
if not os.path.exists(labels_file):
    !../data/ilsvrc12/get_ilsvrc_aux.sh
    
labels = np.loadtxt(labels_file, str, delimiter='\t')

print 'output label:', labels[output_prob.argmax()]
# 輸出內容   output label: n02123045 tabby, tabby cat


# sort預設升序排列,反轉後全最大前五個
top_inds = output_prob.argsort()[::-1][:5]  # reverse sort and take five largest items

print 'probabilities and labels:'
zip(output_prob[top_inds], labels[top_inds])

'''[(0.31243637, 'n02123045 tabby, tabby cat'),
 (0.2379719, 'n02123159 tiger cat'),
 (0.12387239, 'n02124075 Egyptian cat'),
 (0.10075711, 'n02119022 red fox, Vulpes vulpes'),
 (0.070957087, 'n02127052 lynx, catamount')]  
'''
  • 使用GPU模式
# CPU計算耗時
%timeit net.forward()
# 1 loop, best of 3: 1.42 s per loop


# 設定使用gpu,有多個gpu時使用編號的gpu
caffe.set_device(0)  # if we have multiple GPUs, pick the first one
caffe.set_mode_gpu()
net.forward()  # run once before timing to set up memory
%timeit net.forward()
# 10 loops, best of 3: 70.2 ms per loop