1. 程式人生 > >Mask Rcnn使用篇-訓練自己的資料集

Mask Rcnn使用篇-訓練自己的資料集

首先膜拜一下何凱明大神,其實首次知道FCN做語義分割的時候,也產生過是否可以與Faster Rcnn結合的想法,不過也就那麼一個念頭閃過而已,沒具體想估計也想不明白。看了Mask Rcnn後有種豁然開朗的感覺,除了膜拜沒別的想法了。這篇只寫怎麼使用,原理後面在寫吧。
必要的開發環境我就不囉嗦了,在程式碼連結裡有,如果只是訓練自己的訓練集,coco tools就不用下了,windows下安裝還挺煩。
一、 資料集標註
使用labelme來標註影象,安裝前需要先安裝pyqt5,然後就ok了。安裝的時候,可能會出現諸如:“XXX不是合法的utf-8編碼”之類的錯誤,這是中文版windows的問題,定位到報錯的那個地方,然後把編碼方式"utf-8’修改成"gb2312"。忘截圖了,如果還報錯,就百度吧。 安裝好後,直接啟動labelme.exe即可。選擇資料集所在的資料夾,然後一張張的手動標註吧,存的時候標籤注意下,如果圖中有同一個類的不同物體,則標註成label_1,label_2。比如兩隻小貓就標註成cat_1,cat_2。我這裡每張圖片只有一個目標,所以沒那麼麻煩了。
在這裡插入圖片描述


二、 資料處理
labelme儲存的都是xxx.json檔案,需要用labelme_json_to_dataset.exe轉換一下,在cmd下定位到labelme_json_to_dataset.exe檔案所在的資料夾下,其中‘E:\code\Tongue_detect\dataset’就是json檔案所在的資料夾

for /r E:\code\Tongue_detect\dataset %i in (*.json) do labelme_json_to_dataset %i  

這樣會生成一個同名資料夾,裡面包含了我們需要的mask檔案,label.png,不過這個檔案是16bit點,而cv2中使用的都是8bit點,所以需要轉換一下。說明:這樣轉換後,開啟轉換後的圖片一片漆黑,如果想看效果可以把"img = Image.fromarray(np.uint8(np.array(img)))"改成“img = Image.fromarray(np.uint8(np.array(img)) * 20 )”,不過這樣不符合mask rcnn的要求,看看效果即可,後面執行還是需要不乘倍數的!

def img_16to8():
    from PIL import Image
    import numpy as np
    import shutil
    import os

    src_dir = r'E:\code\Tongue_detect\train_data\labelme_json'
    dest_dir = r'E:\code\Tongue_detect\train_data\cv2_mask'
    for child_dir in os.listdir(src_dir):
        new_name = child_dir.split(
'_')[0] + '.png' old_mask = os.path.join(os.path.join(src_dir, child_dir), 'label.png') img = Image.open(old_mask) img = Image.fromarray(np.uint8(np.array(img))) new_mask = os.path.join(dest_dir, new_name) img.save(new_mask)

最後,把得到的檔案統一一下,便於後續程式執行,最後資料夾如下:
在這裡插入圖片描述
三、 修改mask rcnn
在mask rcnn的根目錄下,新建兩個資料夾“models”和“logs” ,models用於儲存已經預訓練好的coco模型,可以在這裡下載,logs用於儲存訓練產生的模型。在samples資料夾下新建一個“tongue”資料夾,建立tongue.py,程式碼中的 init_with = “last” 第一次訓練時請改成 init_with = “coco”,程式碼主要參考https://blog.csdn.net/disiwei1012/article/details/79928679 略微修改

import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt

import yaml
from PIL import Image

# Root directory of the project
ROOT_DIR = os.path.abspath("../../")
sys.path.append(ROOT_DIR)  # To find local version of the library

from mrcnn.config import Config
from mrcnn import utils
from mrcnn import model as modellib

# Directory to save logs and trained models
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

iter_num = 0

# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "models\mask_rcnn_coco.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)


class ShapesConfig(Config):
    """Configuration for training on the toy shapes dataset.
    Derives from the base Config class and overrides values specific
    to the toy shapes dataset.
    """
    # Give the configuration a recognizable name
    NAME = "shapes"

    # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
    # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
    GPU_COUNT = 1
    IMAGES_PER_GPU = 4

    # Number of classes (including background)
    NUM_CLASSES = 1 + 1  # background + 1 class

    # Use small images for faster training. Set the limits of the small side
    # the large side, and that determines the image shape.
    IMAGE_MIN_DIM = 256
    IMAGE_MAX_DIM = 256

    # Use smaller anchors because our image and objects are small
    RPN_ANCHOR_SCALES = (8 * 6, 16 * 6, 32 * 6, 64 * 6, 128 * 6)  # anchor side in pixels

    # Reduce training ROIs per image because the images are small and have
    # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
    TRAIN_ROIS_PER_IMAGE = 32

    # Use a small epoch since the data is simple
    STEPS_PER_EPOCH = 100

    # use small validation steps since the epoch is small
    VALIDATION_STEPS = 5

class DrugDataset(utils.Dataset):
    # 得到該圖中有多少個例項(物體)
    def get_obj_index(self, image):
        n = np.max(image)
        return n

    # 解析labelme中得到的yaml檔案,從而得到mask每一層對應的例項標籤
    def from_yaml_get_class(self, image_id):
        info = self.image_info[image_id]
        with open(info['yaml_path']) as f:
            temp = yaml.load(f.read())
            labels = temp['label_names']
            del labels[0]
        return labels

    # 重新寫draw_mask
    def draw_mask(self, num_obj, mask, image, image_id):
        # print("draw_mask-->",image_id)
        # print("self.image_info",self.image_info)
        info = self.image_info[image_id]
        # print("info-->",info)
        # print("info[width]----->",info['width'],"-info[height]--->",info['height'])
        for index in range(num_obj):
            for i in range(info['width']):
                for j in range(info['height']):
                    # print("image_id-->",image_id,"-i--->",i,"-j--->",j)
                    # print("info[width]----->",info['width'],"-info[height]--->",info['height'])
                    at_pixel = image.getpixel((i, j))
                    if at_pixel == index + 1:
                        mask[j, i, index] = 1
        return mask

    # 重新寫load_shapes,裡面包含自己的自己的類別
    # 並在self.image_info資訊中添加了path、mask_path 、yaml_path
    # yaml_pathdataset_root_path = "/tongue_dateset/"
    # img_floder = dataset_root_path + "rgb"
    # mask_floder = dataset_root_path + "mask"
    # dataset_root_path = "/tongue_dateset/"
    def load_shapes(self, count, img_floder, mask_floder, imglist, dataset_root_path):
        """Generate the requested number of synthetic images.
        count: number of images to generate.
        height, width: the size of the generated images.
        """
        # Add classes
        self.add_class("shapes", 1, "tongue")  # 黑色素瘤
        for i in range(count):
            # 獲取圖片寬和高

            filestr = imglist[i].split(".")[0]
            # print(imglist[i],"-->",cv_img.shape[1],"--->",cv_img.shape[0])
            # print("id-->", i, " imglist[", i, "]-->", imglist[i],"filestr-->",filestr)
            # filestr = filestr.split("_")[1]
            mask_path = mask_floder + "/" + filestr + ".png"
            yaml_path = dataset_root_path + "/labelme_json/" + filestr + "_json/info.yaml"
            print(dataset_root_path + "/labelme_json/" + filestr + "_json/img.png")
            cv_img = cv2.imread(dataset_root_path + "/labelme_json/" + filestr + "_json/img.png")

            self.add_image("shapes", image_id=i, path=img_floder + "/" + imglist[i],
                           width=cv_img.shape[1], height=cv_img.shape[0], mask_path=mask_path, yaml_path=yaml_path)

    # 重寫load_mask
    def load_mask(self, image_id):
        """Generate instance masks for shapes of the given image ID.
        """
        global iter_num
        print("image_id", image_id)
        info = self.image_info[image_id]
        count = 1  # number of object
        img = Image.open(info['mask_path'])
        num_obj = self.get_obj_index(img)
        mask = np.zeros([info['height'], info['width'], num_obj], dtype=np.uint8)
        mask = self.draw_mask(num_obj, mask, img, image_id)
        occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
        for i in range(count - 2, -1, -1):
            mask[:, :, i] = mask[:, :, i] * occlusion

            occlusion = np.logical_and(occlusion, np.logical_not(mask[:, :, i]))
        labels = []
        labels = self.from_yaml_get_class(image_id)
        labels_form = []
        for i in range(len(labels)):
            if labels[i].find("tongue") != -1:
                # print "box"
                labels_form.append("tongue")
        class_ids = np.array([self.class_names.index(s) for s in labels_form])
        return mask, class_ids.astype(np.int32)


def get_ax(rows=1, cols=1, size=8):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.

    Change the default size attribute to control the size
    of rendered images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size * cols, size * rows))
    return ax

def train_model():
    # 基礎設定
    dataset_root_path = r"E:\code\Tongue_detect\train_data"
    img_floder = os.path.join(dataset_root_path, "pic")
    mask_floder = os.path.join(dataset_root_path, "cv2_mask")
    # yaml_floder = dataset_root_path
    imglist = os.listdir(img_floder)
    count = len(imglist)

    # train與val資料集準備
    dataset_train = DrugDataset()
    dataset_train.load_shapes(count, img_floder, mask_floder, imglist, dataset_root_path)
    dataset_train.prepare()

    # print("dataset_train-->",dataset_train._image_ids)

    dataset_val = DrugDataset()
    dataset_val.load_shapes(7, img_floder, mask_floder, imglist, dataset_root_path)
    dataset_val.prepare()   

    # Create models in training mode
    config = ShapesConfig()
    config.display()
    model = modellib.MaskRCNN(mode="training", config=config, model_dir=MODEL_DIR)

    # Which weights to start with?
    # 第一次訓練時,這裡填coco,在產生訓練後的模型後,改成last
    init_with = "last"  # imagenet, coco, or last

    if init_with == "imagenet":
        model.load_weights(model.get_imagenet_weights(), by_name=True)
    elif init_with == "coco":
        # Load weights trained on MS COCO, but skip layers that
        # are different due to the different number of classes
        # See README for instructions to download the COCO weights
        model.load_weights(COCO_MODEL_PATH, by_name=True,
                           exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",
                                    "mrcnn_bbox", "mrcnn_mask"])
    elif init_with == "last":
        # Load the last models you trained and continue training
        checkpoint_file = model.find_last()
        model.load_weights(checkpoint_file, by_name=True)

    # Train the head branches
    # Passing layers="heads" freezes all layers except the head
    # layers. You can also pass a regular expression to select
    # which layers to train by name pattern.
    model.train(dataset_train, dataset_val,
                learning_rate=config.LEARNING_RATE,
                epochs=10,
                layers='heads')

    # Fine tune all layers
    # Passing layers="all" trains all layers. You can also
    # pass a regular expression to select which layers to
    # train by name pattern.
    model.train(dataset_train, dataset_val,
                learning_rate=config.LEARNING_RATE / 10,
                epochs=30,
                layers="all")

class TongueConfig(ShapesConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

def predict():
    import skimage.io
    from mrcnn import visualize

    # Create models in training mode
    config = TongueConfig()
    config.display()
    model = modellib.MaskRCNN(mode="inference", config=config, model_dir=MODEL_DIR)
    model_path = model.find_last()

    # Load trained weights (fill in path to trained weights here)
    assert model_path != "", "Provide path to trained weights"
    print("Loading weights from ", model_path)
    model.load_weights(model_path, by_name=True)

    class_names = ['BG', 'tongue']

    # Load a random image from the images folder
    file_names = r'E:\code\Tongue_detect\temp\038.jpg' # next(os.walk(IMAGE_DIR))[2]
    # image = skimage.io.imread(os.path.join(IMAGE_DIR, random.choice(file_names)))
    image = skimage.io.imread(file_names)

    # Run detection
    results = model.detect([image], verbose=1)

    # Visualize results
    r = results[0]
    visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], class_names, r['scores'])

if __name__ == "__main__":
    train_model()
    # predict()


修改 mrcnn/model.py檔案中的第26行,修改成:

from mrcnn import utils
# import utils

再訓練了20個epoch後,總的loss降到0.0524,得到測試影象如下:
在這裡插入圖片描述
如果使用矩形框中的影象做下一步的輸入已經滿足要求了。