ocr影象預處理-影象分割、文字方向校正

阿新 • • 發佈：2018-11-08

說明：文字方向校正(fft方式和放射變換方式)參考了網上的程式碼，只做了少量修改
只針對醫療影像影象，自然場景下的另說
因為處理的影象都很大很大，居然有11000*12000這種解析度的，有90M大小，我也是醉了，絕大部分都是6000左右解析度的影象，這種影象直接送到CTPN裡的話，效果不是太好，太大了而且效率感人，所以必須做一下預處理。大部分的X光影象很簡單，直接縮放送CTPN即可，而CT和MRI影象雖然一張上有很多小影象，但好在要麼有虛線分割要麼中間都會留有空白的地方，於是就可以利用直線檢測和投影檢測來把圖片分割成若干小影象了。(吐槽一下之前老外寫的程式碼，不管三七二十一把所有影象都是分成上下兩部分，然後上下再各分成上下兩部分，四個部分再分別迴圈他的N個演算法，搞的整個系統70%以上的資源都在跑OCR，一張很簡單的圖片最低也要幾分鐘才能出結果，複雜一點的都是10幾分鐘真想知道這是怎麼過驗收的！)
1. 影象分割，思想很簡單有虛線的直接做直線檢測，有空白的做X、Y軸的投影，都沒有的就是X光影象了，直接把整張影象當做ROI送CTPN

def _img_split_with_hough(img, min=100, max=220):
    """
    :param img: 讀入的二值化圖
    :param min: 邊緣檢測閾值
    :param max: 邊緣檢測閾值
    :return: 水平和垂直線的座標集合
    """
    h = img.shape[0]
    w = img.shape[1]
    edges = cv2.Canny(img, min, max)
    lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 30, minLineLength=100 
, maxLineGap=10)
    lines1 = lines[:, 0, :]
    h_line = []
    v_line = []
    for x1, y1, x2, y2 in lines1[:]:
        if y2 == y1:
            flag = False
            for element in h_line:
                if abs(element[1] - y1) < 10:
                    flag = True
                    break
            if 
 flag == False and abs(x1 - x2) > w * 0.5:
                h_line.append((x1, y1, x2, y2))
        elif x1 == x2:
            flag = False
            for element in v_line:
                if abs(element[0] - x1) < 10:
                    flag = True
                    break
            if flag == False and abs(y1 - y2) > h * 0.5:
                v_line.append((x1, y1, x2, y2))
    return h_line, v_line

def _img_split_with_shadow(gray_img, threshold_value=180):
    """
    :param binary_img: 讀入的灰度圖
    :param img_show:
    :return: 水平和垂直線的座標集合
    """
    h = gray_img.shape[0]
    w = gray_img.shape[1]

    # 按行求和
    sum_x = np.sum(gray_img, axis=1)
    # 按列求和
    sum_y = np.sum(gray_img, axis=0)

    h_line_index = np.argwhere(sum_x < 10)
    v_line_index = np.argwhere(sum_y < 10)

    h_line_index = np.reshape(h_line_index, (h_line_index.shape[0],))
    v_line_index = np.reshape(v_line_index, (v_line_index.shape[0],))

    h_line = [(0, h_line_index[0], w - 1, h_line_index[0]), (0, h_line_index[-1], w - 1, h_line_index[-1])] if len(
        h_line_index) > 0 else []
    v_line = [(v_line_index[0], 0, v_line_index[0], h - 1), (v_line_index[-1], 0, v_line_index[-1], h - 1)] if len(
        v_line_index) > 0 else []

    for i in range(len(h_line_index) - 1):
        if h_line_index[i + 1] - h_line_index[i] > 2:
            h_line.append((0, h_line_index[i], w - 1, h_line_index[i]))

    for i in range(len(v_line_index) - 1):
        if v_line_index[i + 1] - v_line_index[i] > 2:
            v_line.append((v_line_index[i], 0, v_line_index[i], h - 1))

    return h_line, v_line


def _combine_rect(h_lines, v_lines, w, h):
    rects = []
    # 新增第一行(列)和最後一行(列)
    x_axis = sorted(set([0, w - 1] + [item[0] for item in v_lines]))
    y_axis = sorted(set([0, h - 1] + [item[1] for item in h_lines]))

    point_list = []
    for y in y_axis:
        point = []
        for x in x_axis:
            point.append((y, x))
        point_list.append(point)

    for y_index in range(len(y_axis) - 1):
        if y_axis[y_index + 1] - y_axis[y_index] <= 10:
            continue
        for x_index in range(len(x_axis) - 1):
            if x_axis[x_index + 1] - x_axis[x_index] <= 10:
                continue
            rects.append((y_axis[y_index], x_axis[x_index],
                          y_axis[y_index + 1], x_axis[x_index + 1]))
    return rects


def img_split(img_file, threshold_value=180, img_show=False):
    """

    :param img_file: 輸入圖片路徑
    :param img_show: 是否顯示
    :return: 分割後的子影象rect列表
    """
    img = cv2.imread(img_file, 1)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gray = color_nomal(gray)
    # ret, binary = cv2.threshold(gray, threshold_value, 255, cv2.THRESH_BINARY)
    h = img.shape[0]
    w = img.shape[1]
    rate = h // w if h > w else w // h

    h_line, v_line = _img_split_with_shadow(gray)
    if len(h_line) < 1 and len(v_line) < 1:
        h_line, v_line = _img_split_with_hough(gray)

    rects = _combine_rect(h_line, v_line, w, h)
    split_imgs = []
    for rect in rects:
        split_imgs.append(img[rect[0]:rect[2], rect[1]:rect[3]])

    if img_show:
        for rect in rects:
            cv2.rectangle(img, (rect[1], rect[0]), (rect[3], rect[2]), (0, 255, 0), 2)        
        img = cv2.resize(img, (int(h * 0.7), int(h * 0.7 / rate)))
        cv2.imshow('cece', img)
        cv2.waitKey()
    return split_imgs

分割結果
這裡寫圖片描述

這裡寫圖片描述

2. 文字方向校正，可以使用FFT變換後校正然後再逆變換回來，也可以直接使用查詢包含文字區域的矩形，旋轉這個矩形，但是這種方法對於垂直的影象就沒效果了，因為會發現包含文字的矩形區域就是方方正正的不用校正。在二值化的時候採用了自適應二值化，這樣做的好處是能更精確的定位文字區域，全域性二值化可能會造成有些地方一團黑。

def rotated_img_with_fft(gray):
    # 影象延擴
    h, w = gray.shape[:2]
    new_h = cv2.getOptimalDFTSize(h)
    new_w = cv2.getOptimalDFTSize(w)
    right = new_w - w
    bottom = new_h - h
    nimg = cv2.copyMakeBorder(gray, 0, bottom, 0, right, borderType=cv2.BORDER_CONSTANT, value=0)

    # 執行傅立葉變換，並過得頻域影象
    f = np.fft.fft2(nimg)
    fshift = np.fft.fftshift(f)

    fft_img = np.log(np.abs(fshift))
    fft_img = (fft_img - np.amin(fft_img)) / (np.amax(fft_img) - np.amin(fft_img))

    fft_img *= 255
    ret, thresh = cv2.threshold(fft_img, 150, 255, cv2.THRESH_BINARY)

    # 霍夫直線變換
    thresh = thresh.astype(np.uint8)
    lines = cv2.HoughLinesP(thresh, 1, np.pi / 180, 30, minLineLength=40, maxLineGap=100)
    try:
        lines1 = lines[:, 0, :]
    except Exception as e:
        lines1 = []

    # 建立一個新影象，標註直線
    # lineimg = np.ones(nimg.shape,dtype=np.uint8)
    # lineimg = lineimg * 255

    piThresh = np.pi / 180
    pi2 = np.pi / 2
    angle = 0
    for line in lines1:
        # x1, y1, x2, y2 = line[0]
        x1, y1, x2, y2 = line
        # cv2.line(lineimg, (x1, y1), (x2, y2), (0, 255, 0), 2)
        if x2 - x1 == 0:
            continue
        else:
            theta = (y2 - y1) / (x2 - x1)
        if abs(theta) < piThresh or abs(theta - pi2) < piThresh:
            continue
        else:
            angle = abs(theta)
            break

    angle = math.atan(angle)
    angle = angle * (180 / np.pi)
    print(angle)
    # cv2.imshow("line image", lineimg)
    center = (w // 2, h // 2)
    height_1 = int(w * fabs(sin(radians(angle))) + h * fabs(cos(radians(angle))))
    width_1 = int(h * fabs(sin(radians(angle))) + w * fabs(cos(radians(angle))))
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    M[0, 2] += (width_1 - w) / 2
    M[1, 2] += (height_1 - h) / 2
    rotated = cv2.warpAffine(gray, M, (width_1, height_1), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    cv2.imshow('rotated', rotated)
    cv2.waitKey(0)
    return rotated


def rotated_img_with_radiation(gray, is_show=False):
    thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
    if is_show:
        cv2.imshow('thresh', thresh)
    # 計算包含了旋轉文字的最小邊框
    coords = np.column_stack(np.where(thresh > 0))

    # 該函式給出包含著整個文字區域矩形邊框，這個邊框的旋轉角度和圖中文字的旋轉角度一致
    angle = cv2.minAreaRect(coords)[-1]
    print(angle)
    # 調整角度
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle
    # 仿射變換
    h, w = gray.shape[:2]
    center = (w // 2, h // 2)
    print(angle)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(gray, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    if is_show:        
        cv2.putText(rotated, 'Angle: {:.2f} degrees'.format(angle), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                    (0, 0, 255), 2)
        print('[INFO] angel :{:.3f}'.format(angle))
        cv2.imshow('Rotated', rotated)
        cv2.waitKey()
    return rotated

這裡寫圖片描述
放射校正結果：

原圖：

放射校正：

fft校正，計算的時候有大概2度的誤差

ocr影象預處理-影象分割、文字方向校正

ocr影象預處理-影象分割、文字方向校正

資料科學和人工智慧技術筆記四、影象預處理

《影象處理、分析與機器視覺》（第4版）閱讀筆記——第五章影象預處理

tf.data.Dataset影象預處理詳解

pytorch 目標檢測影象預處理

影象預處理 | 【附高清經典影象處理書籍下載】

計算機視覺基礎~影象預處理（中）

影象預處理 && C實現

Tensorflow資料輸入---TFRecords詳解\TFRecords影象預處理

halcon影象預處理之影象增強

matlab影象預處理中值濾波y與雙邊濾波

halcon影象預處理之影象銳化

openai/gym中的影象預處理

ResNet--影象預處理

【OpenCV筆記】影象預處理

C++ Opencv——影象預處理——濾波

Tensorflow影象預處理（2）大小調整

Tensorflow常見問題處理 TensorFlow 影象預處理（一）影象編解碼，影象尺寸調整 tensorflow(一)：圖片處理 TensorFlow 處理圖片

計算機視覺（1）影象預處理

用python（PIL庫）影象預處理

ocr影象預處理-影象分割、文字方向校正

相關推薦