1. 程式人生 > >計算機視覺-自定義物件檢測器

計算機視覺-自定義物件檢測器

標籤: 自定義物件檢測訓練器, Hog與SVM聯合運用, imglab運用示例

1、模板匹配

執行指令:python template_matching.py --source 3.jpg --template 2.jpg

複製程式碼

import argparse
import cv2

ap = argparse.ArgumentParser()
ap.add_argument("-s", "--source", required=True, help="Path to the source image")
ap.add_argument("-t", "--template", required=True, help="Path to the template image")
args = vars(ap.parse_args())
  
source = cv2.imread(args["source"])
template = cv2.imread(args["template"])
(tempH, tempW) = template.shape[:2]
 
result = cv2.matchTemplate(source, template, cv2.TM_CCOEFF) #引數1:源影象 引數2:模板影象 引數3:模板匹配方法
(minVal, maxVal, minLoc, (x, y)) = cv2.minMaxLoc(result) #獲取最佳匹配的(x,y)座標

cv2.rectangle(source, (x, y), (x + tempW, y + tempH), (0, 255, 0), 2) #在源影象上繪製邊框

複製程式碼

 2、訓練自己的物體探測器

作用:結合caltech101資料集,結合.mat檔案,訓練物件檢測器,生成SVM線性支援向量機

train_detector.py

執行指令:python train_detector.py --class stop_sign_images --annotations stop_sign_annotations \

--output output/stop_sign_detector.svm

複製程式碼

from __future__ import print_function
from imutils import paths
from scipy.io import loadmat
from skimage import io
import argparse
import dlib
 
ap = argparse.ArgumentParser()
ap.add_argument("-c", "--class", required=True,
    help="Path to the CALTECH-101 class images")#要訓練一個物件檢測器的具體CALTECH-101(資料集)類的路徑
ap.add_argument("-a", "--annotations", required=True,
    help="Path to the CALTECH-101 class annotations")#指定我們正在訓練的特定類的邊界框的路徑(caltech101資料集中對應的.mat資料夾)
ap.add_argument("-o", "--output", required=True,
    help="Path to the output detector")#輸出分類器的路徑
args = vars(ap.parse_args())

print("[INFO] gathering images and bounding boxes...")
options = dlib.simple_object_detector_training_options()
images = []
boxes = []
 
for imagePath in paths.list_images(args["class"]):#迴圈輸入需要被訓練的影象
    imageID = imagePath[imagePath.rfind("/") + 1:].split("_")[1]
    imageID = imageID.replace(".jpg", "")
    p = "{}/annotation_{}.mat".format(args["annotations"], imageID)
    annotations = loadmat(p)["box_coord"]#從路徑中提取影象ID,然後使用影象ID ,從磁碟載入相應的 註釋(即邊界框)
 
    bb = [dlib.rectangle(left=long(x), top=long(y), right=long(w), bottom=long(h)) 
            for (y, h, x, w) in annotations]#構 矩形 物件來表示邊界框
    
    boxes.append(bb)
    images.append(io.imread(imagePath))#更新邊界當前影象框和新增圖片到列表中,在DLIB庫將需要的兩個影象和函式載入到訓練分類器中

複製程式碼

 test_detector.py

執行指令:python test_detector.py --detector output/stop_sign_detector.svm --testing stop_sign_testing

作用:測試自定義物件檢測器效果

複製程式碼

from imutils import paths
import argparse
import dlib
import cv2
 
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--detector", required=True, help="Path to trained object detector")#訓練出的SVM線性檢測器
ap.add_argument("-t", "--testing", required=True, help="Path to directory of testing images")#包含停止標誌影象進行測試的目錄的路徑
args = vars(ap.parse_args())
 
detector = dlib.simple_object_detector(args["detector"])

for testingPath in paths.list_images(args["testing"]):#迴圈測試需要測試的影象
    image = cv2.imread(testingPath)
    boxes = detector(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
 
    for b in boxes:
        (x, y, w, h) = (b.left(), b.top(), b.right(), b.bottom())
        cv2.rectangle(image, (x, y), (w, h), (0, 255, 0), 2)
 
    cv2.imshow("Image", image)
    cv2.waitKey(0)

複製程式碼

3.1、影象金字塔  

目錄:

作用:指影象按一定比例縮放,並且返回。

知識點:關鍵字 yield 返回並不結束,理解為延遲返回結果

helper.py:

複製程式碼

import imutils
#自定義金字塔函式
def pyramid(image, scale=1.5, minSize=(30, 30)): #引數1:源影象 引數2:每次縮放比例 引數3:設定最小尺寸
    yield image  #定義為金字塔原影象
 
    while True:
        w = int(image.shape[1] / scale)
        image = imutils.resize(image, width=w) #設定長寬按比例縮放

        if image.shape[0] < minSize[1] or image.shape[1] < minSize[0]:#判斷縮放的圖片是否滿足需求
            break
         yield image
#定義滑動穿口函式
def sliding_window(image, stepSize, windowSize):#引數1:要檢查的物件 引數2:每次跳過多少畫素,引數3:每次視窗要檢查的大小
   for y in xrange(0, image.shape[0], stepSize):     for x in xrange(0, image.shape[1], stepSize):       yield (x, y, image[y:y + windowSize[1], x:x + windowSize[0]])

複製程式碼

test_pyramid.py

示例:python test_pyramid.py --image florida_trip.png --scale 1.5

複製程式碼

#對金字塔函式的使用
from pyimagesearch.object_detection.helpers import pyramid
import argparse
import cv2
 
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True, help="path to the input image")
ap.add_argument("-s", "--scale", type=float, default=1.5, help="scale factor size") #每次影象縮小比例
args = vars(ap.parse_args())
 
image = cv2.imread(args["image"])
 
for (i, layer) in enumerate(pyramid(image, scale=args["scale"])):
    cv2.imshow("Layer {}".format(i + 1), layer)
    cv2.waitKey(0)

複製程式碼

 3.2、滑動窗戶

test_sliding_window.py

作用:金字塔與滑動視窗的聯合的運用

執行指令:python test_sliding_window.py --image florida_trip.png --width 64 --height 64

複製程式碼

from pyimagesearch.object_detection.helpers import sliding_window
from pyimagesearch.object_detection.helpers import pyramid
import argparse
import time
import cv2
 
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True, help="path to the input image")#需要處理的影象
ap.add_argument("-w", "--width", type=int, help="width of sliding window")#滑動視窗的寬度
ap.add_argument("-t", "--height", type=int, help="height of sliding window")#滑動視窗的高度
ap.add_argument("-s", "--scale", type=float, default=1.5, help="scale factor size")#影象金字塔的調整大小因子
args = vars(ap.parse_args())
 
image = cv2.imread(args["image"])
(winW, winH) = (args["width"], args["height"])

for layer in pyramid(image, scale=args["scale"]):
    for (x, y, window) in sliding_window(layer, stepSize=32, windowSize=(winW, winH)):
        
        if window.shape[0] != winH or window.shape[1] != winW:
            continue

        clone = layer.copy()
        cv2.rectangle(clone, (x, y), (x + winW, y + winH), (0, 255, 0), 2)
        cv2.imshow("Window", clone)
 
        cv2.waitKey(1)
        time.sleep(0.025)

複製程式碼

4.1構建自定義檢測框架的6個步驟

哈爾級聯的問題(Viola-Jones探測器(2)):OpenCV中檢測到面孔/人物/物件/任何東西,將花費大量時間調整cv2.detectMultiScale引數。

Viola-Jones探測器不是我們唯一的物體檢測選擇。我們可以使用關鍵點物件檢測,區域性不變描述符和一系列的視覺詞模型。

六步框架:

步驟1:從想要檢測的物件的訓練資料中取樣p個正樣本,並從這些樣本中提取HOG描述符。將提取物件的邊界框(包括影象的訓練資料),然後在該ROI上計算HOG特徵,HOG功能將作為正面例子。

步驟2:負面訓練集不包含任何要檢測的物件,並從這些樣品中提取HOG描述為好。實踐中負面樣本遠遠大於正樣本

步驟3:在正負樣本上訓練線性支援向量機。

步驟4:應用硬陰極開採。對於負面訓練集中的每個影象和每個影象的每個可能的比例(即影象金字塔),應用滑動視窗技術將視窗滑過影象。減少我們最終檢測器中的假陽性數量。

步驟5:採取在硬陰極開採階段發現的假陽性樣本,以其置信度(即概率)進行排序,並使用這些陰性樣本重新訓練分類器

步驟6:分類器現在已經受過培訓,可以應用於測試資料集。再次,就像在步驟4中,對於測試集中的每個影象,並且對於影象的每個比例,應用滑動視窗技術。在每個視窗中,提取HOG描述符並應用分類器。如果分類器以很大的概率檢測到物件,記錄視窗的邊界框。完成掃描影象後,應用非最大抑制來刪除冗餘和重疊的邊界框。

擴充套件和其他方法:

在物體檢測中使用HOG+線性SVM方法簡單易懂。與使用的標準6步框架略有不同。

第一個變化是關於HOG滑動視窗和非最大抑制方法。代替從提取特徵的二者的正和負資料集,所述方法DLIB優化HOG滑動視窗使得上的錯誤的數目  的每個訓練影象。這意味著  整個  訓練影象都用於(1)提取正例,和(2) 從影象的所有其他區域提取  負樣本。這完全減輕 了負面培訓的需要和強烈的消極採礦的要求。這是Max-Margin Object 檢測方法如此之快的原因之一  。

其次,在實際的訓練階段,dlib也考慮到非最大的壓制。我們通常只應用NMS來獲得最終的邊界框,但在這種情況下,我們實際上可以在訓練階段使用NMS。這有助於減少誤報  實質上並再次減輕了硬負開採的需要。

最後,dlib使用非常精確的演算法來找到分離兩個影象類的最優超平面。該方法比許多其他最先進的物件檢測器獲得更高的精度(具有較低的假陽性率)。

5、準備實驗和培訓資料

框架的完整目錄結構:(pyimagesearch同級目錄還有conf目錄存放json檔案,datasets目錄,存放資料集)

實驗配置:運用JSON配置檔案

json配置檔案優勢:

1、不需要明確定義一個永無止盡的命令列引數列表,只需要提供的是我們配置檔案的路徑。

2、配置檔案允許將所有相關引數整合到一個 位置

3、確保我們不會忘記為每個Python指令碼使用哪些命令列選項。所有選項將在我們的配置檔案中定義。

4、允許我們為每個要建立的物件檢測器配置一個配置檔案  。這是一個巨大的優勢,允許我們通過修改單個檔案來定義物件檢測器  。

cars.json:

複製程式碼

{
    #######
    # DATASET PATHS
    #######
    "image_dataset": "datasets/caltech101/101_ObjectCategories/car_side",#我們的“正例”影象的路徑,需要訓練的基礎資料
    "image_annotations": "datasets/caltech101/Annotations/car_side",#包含與image_dataset中每個影象相關聯的邊界框的目錄的路徑
    "image_distractions": "datasets/sceneclass13",#不包含我們想要檢測的物件的任何示例的“否定示例”  
}

複製程式碼

explore_dims.py 

作用:在caltech101資料中提取.mat檔案資訊,遍歷所有圖片輪廓資訊,同時獲取滑動活動視窗尺寸

涉及到知識點:1、處理caltech101資料集方法及提取.mat檔案資訊

2、用用golb.golb()函式遍歷資料夾中的檔案方法

執行指令:python explore_dims.py --conf conf/cars.json

複製程式碼

from __future__ import print_function
from pyimagesearch.utils import Conf
from scipy import io
import numpy as np
import argparse
import glob
 

ap = argparse.ArgumentParser()
ap.add_argument("-c", "--conf", required=True, help="path to the configuration file")
args = vars(ap.parse_args())

conf = Conf(args["conf"])#載入配置檔案
widths = []#初始化檢測物件的寬度
heights = []#初始化檢測物件的高度
 
for p in glob.glob(conf["image_annotations"] + "/*.mat"):#迴圈檢測物件的註釋檔案
    (y, h, x, w) = io.loadmat(p)["box_coord"][0]
    widths.append(w - x)
    heights.append(h - y)#載入每個檢測物件的註釋檔案相關聯的邊界框,並更新相應的寬度和高度列表。
 
#計算平均寬度和高度
(avgWidth, avgHeight) = (np.mean(widths), np.mean(heights))
print("[INFO] avg. width: {:.2f}".format(avgWidth))
print("[INFO] avg. height: {:.2f}".format(avgHeight))
print("[INFO] aspect ratio: {:.2f}".format(avgWidth / avgHeight))

複製程式碼

conf.py:解析命令列引數

作用:解析car.json檔案的類 python內建函式__getitem__的作用:

在類中定義了__getitem__()方法,那麼他的例項物件(假設為P)就可以這樣P[key]取值。當例項物件做P[key]運算時,就會呼叫類中的__getitem__()方法

複製程式碼

import commentjson as json
 
class Conf:
    def __init__(self, confPath):
        conf = json.loads(open(confPath).read())
        self.__dict__.update(conf)
 
    def __getitem__(self, k):
        return self.__dict__.get(k, None)    

複製程式碼

 6、構建HOG描述符

cars.json:

複製程式碼

{
    #######
    # DATASET PATHS
    #######
    "image_dataset": "datasets/caltech101/101_ObjectCategories/car_side",
    "image_annotations": "datasets/caltech101/Annotations/car_side",
    "image_distractions": "datasets/sceneclass13",
 
    #######
    # FEATURE EXTRACTION
    #######
    "features_path": "output/cars/car_features.hdf5",
    "percent_gt_images": 0.5,
    "offset": 5,
    "use_flip": true,
    "num_distraction_images": 500,
    "num_distractions_per_image": 10,
 
    #######
    # HISTOGRAM OF ORIENTED GRADIENTS DESCRIPTOR 使用的方向梯度直方圖
    #######
    "orientations": 9,
    "pixels_per_cell": [4, 4],  #能被滑動視窗尺寸整除
    "cells_per_block": [2, 2],
    "normalize": true,
 
    #######
    # OBJECT DETECTOR 定義滑動視窗大小
    #######
    "window_step": 4,
    "overlap_thresh": 0.3,
    "pyramid_scale": 1.5,
    "window_dim": [96, 32],
    "min_probability": 0.7
}

複製程式碼

 dataset.py

作用:定義h5py資料庫運用的方法

涉及到知識點:1、對h5py資料庫的運用

疑問:create_dataset()函式引數作用:

引數:資料庫的名字,引數2:資料庫維度,引數3:資料型別

擴充套件:h5py檔案是存放兩類物件的容器,資料集(dataset)和組(group),dataset類似陣列類的資料集合,和numpy的陣列差不多。group是像資料夾一樣的容器,它好比python中的字典,有鍵(key)和值(value)。group中可以存放dataset或者其他的group。”鍵”就是組成員的名稱。

複製程式碼

import numpy as np
import h5py
 
#從磁碟上的資料集載入特徵向量和標籤
def dump_dataset(data,labels,path,datasetName,writeMethod="w"):#引數1:要寫入HDF5資料集的特徵向量列表。引數2:標籤,與每個特徵向量相關聯的標籤列表。引數3:HDF5資料集在磁碟上的儲存位置。引數5:HDF5檔案中資料集的名稱。引數5:HDF5資料集的寫入方法
    db = h5py.File(path, writeMethod)
    dataset = db.create_dataset(datasetName, (len(data), len(data[0]) + 1), dtype="float")
    dataset[0:len(data)] = np.c_[labels, data]
    db.close()
 
def load_dataset(path, datasetName):#載入與datasetName相關聯的特徵向量和標籤
    db = h5py.File(path, "r")
    (labels, data) = (db[datasetName][:, 0], db[datasetName][:, 1:])
    db.close()
    return (data, labels)

複製程式碼

helpers.py:

 作用:返回每張圖片的ROI,(最小包圍矩陣)

複製程式碼

import imutils
import cv2
 
def crop_ct101_bb(image, bb, padding=10, dstSize=(32, 32)):
    (y, h, x, w) = bb
    (x, y) = (max(x - padding, 0), max(y - padding, 0))
    roi = image[y:h + padding, x:w + padding]
 
    roi = cv2.resize(roi, dstSize, interpolation=cv2.INTER_AREA)
 
    return roi

複製程式碼

extract_features.py:

作用:提取圖片的hog特徵向量,為SVC資料分類提供資料

 涉及到知識點:1、運用imutils中的paths模組遍歷檔案

疑惑:1、 progressbar模組的作用:

    建立一個進度條顯示物件

    widgets可選引數含義:

    'Progress: ' :設定進度條前顯示的文字

    Percentage() :顯示百分比

    Bar('#') : 設定進度條形狀

    ETA() : 顯示預計剩餘時間

    Timer() :顯示已用時間 

2、HOG函式的詳解

https://blog.csdn.net/zhazhiqiang/article/details/20221143

https://baike.baidu.com/item/HOG/9738560?fr=aladdin

3、random.sample函式作用?

sample(seq, n) 從序列seq中選擇n個隨機且獨立的元素;

4、random.choice函式的作用?

choice(seq) 從序列seq中返回隨機的元素

random模組拓展:

1 )、random() 返回0<=n<1之間的隨機實數n;

 2)、getrandbits(n) 以長整型形式返回n個隨機位; 3)、shuffle(seq[, random]) 原地指定seq序列;

5、sklearn.feature_extraction.image模組中extract_patches_2d函式的作用?

6)提示資訊:Default value of `block_norm`==`L1` is deprecated and will be changed to `L2-Hys` in v0.15  比Py中特徵少?

執行指令:python extract_features.py --conf conf/cars.json

複製程式碼

# import the necessary packages
from __future__ import print_function
from sklearn.feature_extraction.image import extract_patches_2d
from pyimagesearch.object_detection import helpers
from pyimagesearch.descriptors import HOG
from pyimagesearch.utils import dataset
from pyimagesearch.utils import Conf
from imutils import paths
from scipy import io
import numpy as np
import progressbar
import argparse
import random
import cv2

ap = argparse.ArgumentParser()
ap.add_argument("-c", "--conf", required=True, help="path to the configuration file")
args = vars(ap.parse_args())
 
conf = Conf(args["conf"])#載入配置檔案

#呼叫函式初始化HOG描述符
hog = HOG(orientations=conf["orientations"], pixelsPerCell=tuple(conf["pixels_per_cell"]),
    cellsPerBlock=tuple(conf["cells_per_block"]), normalize=conf["normalize"])
data = []
labels = []


#隨機抽取車測試圖
trnPaths = list(paths.list_images(conf["image_dataset"]))
trnPaths = random.sample(trnPaths, int(len(trnPaths) * conf["percent_gt_images"]))
print("[INFO] describing training ROIs...")

widgets = ["Extracting: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()]
pbar = progressbar.ProgressBar(maxval=len(trnPaths), widgets=widgets).start()
#訓練每個影象
for (i, trnPath) in enumerate(trnPaths):
    image = cv2.imread(trnPath)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    imageID = trnPath[trnPath.rfind("_") + 1:].replace(".jpg", "")#提取檔名
 
    p = "{}/annotation_{}.mat".format(conf["image_annotations"], imageID)
    bb = io.loadmat(p)["box_coord"][0]
    roi = helpers.crop_ct101_bb(image, bb, padding=conf["offset"], dstSize=tuple(conf["window_dim"]))
   #確定我們是否應該使用ROI的水平翻轉作為額外的訓練資料
    rois = (roi, cv2.flip(roi, 1)) if conf["use_flip"] else (roi,)
   #中提取HOG特徵,並更新 資料   和 標籤   列表
    for roi in rois:
        features = hog.describe(roi)
        data.append(features)
        labels.append(1)
 
    pbar.update(i)

dstPaths = list(paths.list_images(conf["image_distractions"]))
pbar = progressbar.ProgressBar(maxval=conf["num_distraction_images"], widgets=widgets).start()
print("[INFO] describing distraction ROIs...")
 
#訓練負影象樣本
for i in np.arange(0, conf["num_distraction_images"]):
    image = cv2.imread(random.choice(dstPaths))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    patches = extract_patches_2d(image, tuple(conf["window_dim"]),
        max_patches=conf["num_distractions_per_image"])
 
    for patch in patches:
        features = hog.describe(patch)
        data.append(features)
        labels.append(-1)
 
    pbar.update(i)

pbar.finish()
print("[INFO] dumping features and labels to file...")
dataset.dump_dataset(data, labels, conf["features_path"], "features")

複製程式碼

 7、初始訓練階段

car.json:

複製程式碼

{
    #######
    # DATASET PATHS
    #######
    "image_dataset": "datasets/caltech101/101_ObjectCategories/car_side",
    "image_annotations": "datasets/caltech101/Annotations/car_side",
    "image_distractions": "datasets/sceneclass13",
 
    #######
    # FEATURE EXTRACTION
    #######
    "features_path": "output/cars/car_features.hdf5",
    "percent_gt_images": 0.5,
    "offset": 5,
    "use_flip": true,
    "num_distraction_images": 500,
    "num_distractions_per_image": 10,
 
    #######
    # HISTOGRAM OF ORIENTED GRADIENTS DESCRIPTOR
    #######
    "orientations": 9,
    "pixels_per_cell": [4, 4],
    "cells_per_block": [2, 2],
    "normalize": true,
 
    #######
    # OBJECT DETECTOR
    #######
    "window_step": 4,
    "overlap_thresh": 0.3,
    "pyramid_scale": 1.5,
    "window_dim": [96, 32],
    "min_probability": 0.7,
 
    #######
    # LINEAR SVM
    #######
    "classifier_path": "output/cars/model.cpickle",#分類器被儲存的位置
    "C": 0.01,
}

複製程式碼

train_model.py

作用:對獲取的影象hog特徵向量,運用SVC線性分類處理

疑問:1、sklearn函式模組詳解?

2、args["hard_negatives"]引數作用?

3、numpy.stack()函式作業用:

改變列表資料維度

引數1:列表資料,引數2:設定列表維度

4、numpy.hstack()函式作用

水平(按列順序)把陣列給堆疊起來,vstack()函式正好和它相反

引數tup可以是元組,列表,或者numpy陣列,返回結果為numpy的陣列。

 執行指令:python train_model.py --conf conf/cars.json

複製程式碼

from __future__ import print_function
from pyimagesearch.utils import dataset
from pyimagesearch.utils import Conf
from sklearn.svm import SVC
import numpy as np
import argparse
import cPickle
 
ap = argparse.ArgumentParser()
ap.add_argument("-c", "--conf", required=True,
    help="path to the configuration file")
ap.add_argument("-n", "--hard-negatives", type=int, default=-1,
    help="flag indicating whether or not hard negatives should be used")
args = vars(ap.parse_args())

print("[INFO] loading dataset...")
conf = Conf(args["conf"])
(data, labels) = dataset.load_dataset(conf["features_path"], "features")#抓取提取的特徵向量和標籤
 
if args["hard_negatives"] > 0:
    print("[INFO] loading hard negatives...")
    (hardData, hardLabels) = dataset.load_dataset(conf["features_path"], "hard_negatives")
    data = np.vstack([data, hardData])
    labels = np.hstack([labels, hardLabels])

print("[INFO] training classifier...")
model = SVC(kernel="linear", C=conf["C"], probability=True, random_state=42)
model.fit(data, labels)
 
print("[INFO] dumping classifier...")
f = open(conf["classifier_path"], "w")
f.write(cPickle.dumps(model))#將分類器轉儲成檔
f.close()

複製程式碼

objectdetector.py:

作用:經過滑動視窗和金字塔處理後的影象,提取符合概率的輪廓列表。
疑惑:改變概率引數,符合要求的輪廓數量沒有發生改變?
pyramid函式(3.1)、sliding_window函式(3.2)寫到helpers.py中

複製程式碼

iimport helpers
 
class ObjectDetector:
    def __init__(self, model, desc):
        self.model = model
        self.desc = desc

    def detect(self, image, winDim, winStep=4, pyramidScale=1.5, minProb=0.7):#image:需要檢測的影象,winDim:滑動視窗尺寸大小
        boxes = []
        probs = []
 
        for layer in helpers.pyramid(image, scale=pyramidScale, minSize=winDim):#迴圈金子塔中的影象
            scale = image.shape[0] / float(layer.shape[0])
 
            for (x, y, window) in helpers.sliding_window(layer, winStep, winDim):
                (winH, winW) = window.shape[:2]
 
                if winH == winDim[1] and winW == winDim[0]:
                    features = self.desc.describe(window).reshape(1, -1)
                    prob = self.model.predict_proba(features)[0][1]
 
                    if prob > minProb:
                        (startX, startY) = (int(scale * x), int(scale * y))
                        endX = int(startX + (scale * winW))
                        endY = int(startY + (scale * winH))
 
                        boxes.append((startX, startY, endX, endY))
                        probs.append(prob)
 
        return (boxes, probs)

複製程式碼

test_model_no_nms.py(與pyimagesearch同級目錄下)

作用:測試通過輪廓列表尋找到輪廓是否正確

疑問:

1、sklearn.svm模組中SVC的詳解

引數解釋連結:https://blog.csdn.net/szlcw1/article/details/52336824

2、提示錯誤資訊:Default value of `block_norm`==`L1` is deprecated and will be changed to `L2-Hys` in v0.15

執行指令:python test_model_no_nms.py --conf conf/cars.json--image datasets/caltech101/101_ObjectCategories/car_side/image_0004.jpg

複製程式碼

from pyimagesearch.object_detection import ObjectDetector
from pyimagesearch.descriptors import HOG
from pyimagesearch.utils import Conf
import imutils
import argparse
import cPickle
import cv2
 
ap = argparse.ArgumentParser()
ap.add_argument("-c", "--conf", required=True, help="path to the configuration file")
ap.add_argument("-i", "--image", required=True, help="path to the image to be classified")
args = vars(ap.parse_args())
 
conf = Conf(args["conf"])

model = cPickle.loads(open(conf["classifier_path"]).read()) #SVC線性值
hog = HOG(orientations=conf["orientations"], pixelsPerCell=tuple(conf["pixels_per_cell"]),
    cellsPerBlock=tuple(conf["cells_per_block"]), normalize=conf["normalize"]) #hog特徵向量值提取方法
od = ObjectDetector(model, hog)

image = cv2.imread(args["image"])
image = imutils.resize(image, width=min(260, image.shape[1]))
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 
(boxes, probs) = od.detect(gray, conf["window_dim"], winStep=conf["window_step"],
    pyramidScale=conf["pyramid_scale"], minProb=conf["min_probability"])
 

for (startX, startY, endX, endY) in boxes:
    cv2.rectangle(image, (startX, startY), (endX, endY), (0, 0, 255), 2)
 
cv2.imshow("Image", image)
cv2.waitKey(0)

複製程式碼

問題1:修改感興趣概率,獲取的敏感區域一樣?

8、非最大抑制

作用:解決重疊邊界框,尋找到最佳匹配輪廓

nms.py(object_detection目錄下):

疑問:1、numpy模組中argsort()函式的作用:

  獲取陣列從小到的大索引值

2、numpy模組中concatenate()函式作用:

  對陣列進行拼接

3、while裡面對idx的處理邏輯?語法規則?

講解示例:https://blog.csdn.net/scut_salmon/article/details/79318387

nms.py

複製程式碼

import numpy as np
def non_max_suppression(boxes, probs, overlapThresh):#引數1:邊界框列表,引數2:每個框相關的概率,引數3:重疊的閥值
    if len(boxes) == 0:#判斷邊界框列表是否為空
        return []
 
    if boxes.dtype.kind == "i":
        boxes = boxes.astype("float")#將邊界框資料由整型轉換成浮點型
        #獲取邊界框每個角的座標
    pick = []
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
        
         #獲取邊界框的面積
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    idxs = np.argsort(probs)

        #獲取列表的長度,並將其保留在邊框列表中
    while len(idxs) > 0:
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)
                
        #獲取邊最大座標的界框和最小的座標邊界寬
        xx1 = np.maximum(x1[i], x1[idxs[:last]])
        yy1 = np.maximum(y1[i], y1[idxs[:last]])
        xx2 = np.minimum(x2[i], x2[idxs[:last]])
        yy2 = np.minimum(y2[i], y2[idxs[:last]])
                
        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)
                
        # 計算重疊比例
        overlap = (w * h) / area[idxs[:last]]
 
        idxs = np.delete(idxs, np.concatenate(([last],
            np.where(overlap > overlapThresh)[0])))
 
    return boxes[pick].astype("int") 

複製程式碼

test_model.py(與pyimagesearch目錄同級):

作用:測試解決重疊輪廓的邊界效果

執行指令:python test_model.py --conf conf/cars.json--image datasets/caltech101/101_ObjectCategories/car_side/image_0004.jpg

複製程式碼

from pyimagesearch.object_detection import non_max_suppression
from pyimagesearch.object_detection import ObjectDetector
from pyimagesearch.descriptors import HOG
from pyimagesearch.utils import Conf
import numpy as np
import imutils
import argparse
import cPickle
import cv2
 
ap = argparse.ArgumentParser()
ap.add_argument("-c", "--conf", required=True, help="path to the configuration file")
ap.add_argument("-i", "--image", required=True, help="path to the image to be classified")
args = vars(ap.parse_args())
 
conf = Conf(args["conf"])
 
model = cPickle.loads(open(conf["classifier_path"]).read())
hog = HOG(orientations=conf["orientations"], pixelsPerCell=tuple(conf["pixels_per_cell"]),
    cellsPerBlock=tuple(conf["cells_per_block"]), normalize=conf["normalize"])
od = ObjectDetector(model, hog)

image = cv2.imread(args["image"])
image = imutils.resize(image, width=min(260, image.shape[1]))
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 
(boxes, probs) = od.detect(gray, conf["window_dim"], winStep=conf["window_step"],
    pyramidScale=conf["pyramid_scale"], minProb=conf["min_probability"])
pick = non_max_suppression(np.array(boxes), probs, conf["overlap_thresh"])
orig = image.copy()
 
for (startX, startY, endX, endY) in boxes:
    cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 0, 255), 2)
 
for (startX, startY, endX, endY) in pick:
    cv2.rectangle(image, (startX, startY), (endX, endY), (0, 255, 0), 2)
 
cv2.imshow("Original", orig)
cv2.imshow("Image", image)
cv2.waitKey(0)

複製程式碼

 9、堅硬的負面特徵採集

作用:訓練與需要提取的特徵完全不相關的特徵,一般是物體的背面場景,所以一般應用sceneclass13資料集,訓練負面特徵變數,減少誤判情況。將負面資料也寫入h5py資料庫中。

執行指令:python hard_negative_mine.py --conf conf/cars.json

hard_negative_mine.py

複製程式碼

from __future__ import print_function
from pyimagesearch.object_detection.objectdetector import ObjectDetector
from pyimagesearch.descriptors.hog import HOG
from pyimagesearch.utils import dataset
from pyimagesearch.utils.conf import Conf
from imutils import paths
import numpy as np
import progressbar
import argparse
import cPickle
import random
import cv2

ap = argparse.ArgumentParser()
ap.add_argument("-c", "--conf", required = True, help = "path to the configuration file")
args = vars(ap.parse_args())

conf =Conf(args["conf"])
data =[]

model =cPickle.loads(open(conf["classifier_path"]).read())
hog =HOG(orientations = conf["orientations"],  pixelsPerCell = tuple(conf["pixels_per_cell"]), 
    cellsPerBlock = tuple(conf["cells_per_block"]),  normalize = conf["normalize"])


od = ObjectDetector(model, hog)

dstPaths = list(paths.list_images(conf["image_distractions"]))
dstPaths =random.sample(dstPaths,  conf["hn_num_distraction_images"])

widgets = ["Mining:", progressbar.Percentage(), " ", progressbar.Bar(), "", progressbar.ETA()]
pbar = progressbar.ProgressBar(maxval = len(dstPaths), widgets = widgets).start()
myindex = 0
for (i, imagePath) in enumerate(dstPaths):
    image = cv2.imread(imagePath)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    (boxes, probs) = od.detect(gray, conf["window_dim"], winStep = conf["hn_window_step"], pyramidScale = conf["hn_pyramid_scale"], minProb = conf["hn_min_probability"])
    
    for (prob, (startX, startY, endX, endY)) in zip(probs, boxes):
        roi = cv2.resize(gray[startY:endY, startX:endX], tuple(conf["window_dim"]), interpolation = cv2.INTER_AREA)
        features = hog.describe(roi)
        data.append(np.hstack([[prob], features]))
    pbar.update(i)

pbar.finish()
print("[INFO] sorting by probability...")
data = np.array(data)
data = data[data[:, 0].argsort()[::-1]]

print("[INFO] dmping hard negatives to file...")
dataset.dump_dataset(data[:, 1:], [-1] * len(data), conf["features_path"], "hard_negatives", writeMethod = "a")

複製程式碼

10、重新訓練物件檢測器

作用:將堅硬的負面特徵加入到SVM中,減少虛假輪廓的出現

執行指令:python train_model.py --conf conf/cars.json --hard-negatives 1

train_model.py

複製程式碼

from __future__ import print_function
from pyimagesearch.utils import dataset
from pyimagesearch.utils.conf import Conf
from sklearn.svm import SVC
import argparse
import pickle
import numpy as np

ap  = argparse.ArgumentParser()
ap.add_argument("-c", "--conf", required = True, help = "path to the configuration file")
ap.add_argument("-n", "--hard-negatives", type = int, default = -1, help="flag indicating whether or not hard negatives should be used")
args = vars(ap.parse_args())

print("[INFO] loading dataset...")
conf = Conf(args["conf"])
(data, labels) = dataset.load_dataset(conf["features_path"], "features")

if args["hard_negatives"] > 0:
    print("[INFO] loading hard negatives...")
    (hardData,hardLabels) = dataset.load_dataset(conf["features_path"], "hard_negatives")
    data = np.vstack([data, hardData])
    labels = np.hstack([labels, hardLabels])

print("[INFO] training classifier...")
model = SVC(kernel = "linear", C = conf["C"], probability = True, random_state = 42)
model.fit(data, labels)

print("[INFO] dumping classifier...")
f = open(conf["classifier_path"], "wb")
f.write(pickle.dumps(model))
f.close()

複製程式碼

11、imglab的運用

前期準備工作:生成xml檔案,運用imglab生成器,選取特徵輪廓。

步驟1:imglab -c 資料夾路徑 生成xml檔案路徑  步驟2:imglab xml檔案            手動框選特徵區域

作用:將提取圖片的邊界框,並且將其特徵運用svm分類。

執行指令:python train_detector.py --xml face_detector/faces_annotations.xml --detector face_detector/detector.svm

複製程式碼

from __future__ import print_function
import argparse
import dlib

ap = argparse.ArgumentParser()
ap.add_argument("-x", "--xml", required = True, help = "path to input XML file")
ap.add_argument("-d", "--detector", required = True, help = "path to output director")
args = vars(ap.parse_args())

print("[INFO] training detector....")
options = dlib.simple_object_detector_training_options()
options.C = 1.0
options.num_threads = 4
options.bei_verbose = True
dlib.train_simple_object_detector(args["xml"], args["detector"], options)

print("[INFO] training accuracy:{}".format(dlib.test_simple_object_detector(args["xml"], args["detector"])))

detector = dlib.simple_object_detector(args["detector"])
win = dlib.image_window()
win.set_image(detector)
dlib.hit_enter_to_continue()

複製程式碼

test_detector.py

作用:測試訓練出來SVM線性向量

執行指令:python test_detector.py --detector face_detector/detector.svm--testing face_detector/testing

複製程式碼

from imutils import paths
import argparse
import dlib
import cv2

ap = argparse.ArgumentParser()
ap.add_argument("-d", "--detector", required = True, help = "Path to train object detector")
ap.add_argument("-t", "--testing", required = True, help = "Path to directory of testing images")
args = vars(ap.parse_args())

detector = dlib.simple_object_detector(args["detector"])

for testingPath in paths.list_images(args["testing"]):
    image = cv2.imread(testingPath)
    boxes = detector(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    for b in boxes:
        (x, y, w, h) = (b.left(), b.top(), b.right(), b.bottom())
        cv2.rectangle(image, (x, y), (w, h), (0, 255, 0), 2)

複製程式碼