1. 程式人生 > >CNN Data Augmentation(資料增強)-旋轉

CNN Data Augmentation(資料增強)-旋轉

1、原始狀態

最初的影象是這個樣子的
 
.xml檔案張下面這個樣子

<annotation>
    <object>
        <name>face</name>
        <difficult>0</difficult>
        <bndbox>
            <xmin>315.307918</xmin>
            <ymin>240.234604</ymin>
            <xmax>693.677419</xmax>
            <ymax>699.683284</ymax>
        </bndbox>
    </object>
</annotation>

2、旋轉影象並修改對應的xml檔案

import cv2
import math
import numpy as np
import os
# pdb僅僅用於除錯,不用管它
import pdb

#旋轉影象的函式
def rotate_image(src, angle, scale=1.):
    w = src.shape[1]
    h = src.shape[0]
    # 角度變弧度
    rangle = np.deg2rad(angle)  # angle in radians
    # now calculate new image width and height
    nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale
    nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale
    # ask OpenCV for the rotation matrix
    rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale)
    # calculate the move from the old center to the new center combined
    # with the rotation
    rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5,0]))
    # the move only affects the translation, so update the translation
    # part of the transform
    rot_mat[0,2] += rot_move[0]
    rot_mat[1,2] += rot_move[1]
    # 仿射變換
    return cv2.warpAffine(src, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))), flags=cv2.INTER_LANCZOS4)

//對應修改xml檔案
def rotate_xml(src, xmin, ymin, xmax, ymax, angle, scale=1.):
    w = src.shape[1]
    h = src.shape[0]
    rangle = np.deg2rad(angle)  # angle in radians
    # now calculate new image width and height
    # 獲取旋轉後圖像的長和寬
    nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale
    nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale
    # ask OpenCV for the rotation matrix
    rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale)
    # calculate the move from the old center to the new center combined
    # with the rotation
    rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5,0]))
    # the move only affects the translation, so update the translation
    # part of the transform
    rot_mat[0,2] += rot_move[0]
    rot_mat[1,2] += rot_move[1]
    # rot_mat是最終的旋轉矩陣
    # 獲取原始矩形的四個中點,然後將這四個點轉換到旋轉後的座標系下
    point1 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymin, 1]))
    point2 = np.dot(rot_mat, np.array([xmax, (ymin+ymax)/2, 1]))
    point3 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymax, 1]))
    point4 = np.dot(rot_mat, np.array([xmin, (ymin+ymax)/2, 1]))
    # 合併np.array
    concat = np.vstack((point1, point2, point3, point4))
    # 改變array型別
    concat = concat.astype(np.int32)
    print concat
    rx, ry, rw, rh = cv2.boundingRect(concat)
    return rx, ry, rw, rh

# 使影象旋轉60,90,120,150,210,240,300度
for angle in (60, 90, 120, 150, 210, 240, 300):
    # 指向圖片所在的資料夾
    for i in os.listdir("/home/username/image"):
        # 分離檔名與字尾
        a, b = os.path.splitext(i)
        # 如果字尾名是“.jpg”就旋轉影象
        if b == ".jpg":
            img_path = os.path.join("/home/username/image", i)
            img = cv2.imread(img_path)
            rotated_img = rotate_image(img, angle)
            # 寫入影象
            cv2.imwrite("/home/yourname/rotate/" + a + "_" + str(angle) +"d.jpg", rotated_img)
            print "log: [%sd] %s is processed." % (angle, i)
        else:
            xml_path = os.path.join("/home/username/xml", i)
            img_path = "/home/guoyana/varied_pose/" + a + ".jpg"
            src = cv2.imread(img_path)
            tree = ET.parse(xml_path)
            root = tree.getroot()
            for box in root.iter('bndbox'):
                xmin = float(box.find('xmin').text)
                ymin = float(box.find('ymin').text)
                xmax = float(box.find('xmax').text)
                ymax = float(box.find('ymax').text)
                x, y, w, h = rotate_xml(src, xmin, ymin, xmax, ymax, angle)
                # 改變xml中的人臉座標值
                box.find('xmin').text = str(x)
                box.find('ymin').text = str(y)
                box.find('ymax').text = str(x+w)
                box.find('ymax').text = str(y+h)
                box.set('updated', 'yes')
            # 寫入新的xml
            tree.write("/home/username/xml/" + a + "_" + str(angle) +".xml")
            print "[%s] %s is processed." % (angle, i)

3、結果

我們來看下旋轉後的影象和修改後的.xml檔案
太多了,這裡只放兩張~
   
修改後的xml檔案如下

<annotation>
    <object>
        <name>face</name>
        <difficult>0</difficult>
        <bndbox updated="yes">
            <xmin>460</xmin>
            <ymin>521</ymin>
            <xmax>693.677419</xmax>
            <ymax>849</ymax>
        </bndbox>
    </object>
</annotation>

效果怎麼樣呢?我們來寫個小程式看一下

import cv2
import os
import xml.etree.ElementTree as ET
import pdb

for img in os.listdir("/home/mi/rotate/"):
    a, b = os.path.splitext(img)
    if b == ".jpg":
        img = cv2.imread("/home/yourname/rotate/" + img)
        tree = ET.parse("/home/yourname/xml/" + a + ".xml")
        root = tree.getroot()
        for box in root.iter('bndbox'):
            x1 = float(box.find('xmin').text)
            y1 = float(box.find('ymin').text)
            x2 = float(box.find('xmax').text)
            y2 = float(box.find('ymax').text)

            x1 = int(x1)
            y1 = int(y1)
            x2 = int(x2)
            y2 = int(y2)
            cv2.rectangle(img, (x1, y1), (x2, y2), [0,255,0], 2)
            cv2.imshow("test", img)
        if 1 == cv2.waitKey(0):
            pass

(end)