1. 程式人生 > >Jacketinsysu的專欄

Jacketinsysu的專欄

最近在知乎上看到一個帖子,總結了各種常見的排序演算法,並用python一一實現了,不過歸併排序的迭代寫法,題主說他不會寫,我就試了一下,其實很簡單。下面會先分析遞迴的時候實際上做了哪些事,然後迭代如何重現這些事。先用C++寫,因為估計看這篇部落格的大部分人對C++比較熟,最後會分享python的版本,實現過程基本一模一樣。

遞迴的時候做了什麼?

先po一下遞迴的虛擬碼:

// 區間[head1, head2-1]和[head2, tail2]都是排好序的,現在需要合併
void mergeSorted(arr, head1, head2, tail2) {
    // balabala...
}

void
mergeSort(arr, left, right) { if (left >= right) return; mid = (left + right) >> 1; mergeSort(arr, left, mid); mergeSort(arr, mid+1, right); mergeSorted(arr, left, mid+1, right); }

可以看出,遞迴的時候,並沒有做什麼特別的事,只是從中間分成兩半,每一半自己去做排序,最後合併起來,是後序遍歷,從葉子節點往回看:
1. 區間的長度都為1,直接返回,不用合併;
2. 區間的長度為2,兩個子區間都排好序了,將它們合併起來;
3. 區間的長度為4,兩個子區間都排好序了,將它們合併起來;
4. ……

迭代怎麼寫?

從上面的分析可以看出,其實只需要列舉步長1,2,4,……,對由每個步長分開的區間,都合併一下。
比如,一開始陣列為[8 7 6 5 4 3 2 1]。
第一遍,步長為1,將相鄰的兩個區間合併(注意加粗黑體):
7 8 6 5 4 3 2 1
7 8 5 6 4 3 2 1
7 8 5 6 3 4 2 1
7 8 5 6 3 4 1 2

第二遍,步長為2,將相鄰的兩個區間合併(注意加粗黑體):
5 6 7 8 3 4 1 2
5 6 7 8 1 2 3 4

第三遍,步長為4,將相鄰的兩個區間合併(注意加粗黑體):
1 2 3 4 5 6 7 8

應該很簡單就寫出來吧?注意一下邊界即可:

// 區間[head1, head2-1]和[head2, tail2]都是排好序的,現在需要合併
void mergeSortHelper(vector<int>& v, int head1, int head2, int tail2) {
    int tail1 = head2 - 1, index = 0, len = tail2 - head1 + 1, start = head1;
    vector<int> tmp(len);
    while (head1 <= tail1 || head2 <= tail2) {
        if (head1 > tail1)
            tmp[index++] = v[head2++];
        else if (head2 > tail2)
            tmp[index++] = v[head1++];
        else {
            if (v[head1] <= v[head2])
                tmp[index++] = v[head1++];
            else
                tmp[index++] = v[head2++];
        }
    }

    for (int i = 0; i < len; ++i)
        v[start+i] = tmp[i];
}

void mergeSort(vector<int>& v) {
    int len = v.size();
    // 倍進列舉步長1,2,4,……
    for (int step = 1; step <= len; step <<= 1) {
        int offset = step + step;
        for (int index = 0; index < len; index += offset)
            mergeSortHelper(v, index, min(index+step, len-1), min(index+offset-1, len-1));
    }
}

總體的測試程式碼:

#include <iostream>
#include <vector>
#include <algorithm>
#include <cmath>
using namespace std;


// 注意被我註釋掉的地方,解開來,很直觀可以看到排序的過程是怎麼做的!
void display(const vector<int>& v) {
    for (int i = 0; i < v.size(); ++i)
        cout << v[i] << ' ';
    cout << endl;
}

bool isSorted(const vector<int>& v) {
    vector<int> sorted(v.begin(), v.end());
    sort(sorted.begin(), sorted.end());
    for (int i = 0; i < v.size(); ++i)
        if (v[i] != sorted[i])
            return false;
    return true;
}

void mergeSortHelper(vector<int>& v, int head1, int head2, int tail2) {
    int tail1 = head2 - 1, index = 0, len = tail2 - head1 + 1, start = head1;
    // cout << "Before " << head1 << ' ' << tail1 << ' ' << head2 << ' ' << tail2 << endl;
    // display(v);
    vector<int> tmp(len);
    while (head1 <= tail1 || head2 <= tail2) {
        if (head1 > tail1)
            tmp[index++] = v[head2++];
        else if (head2 > tail2)
            tmp[index++] = v[head1++];
        else {
            if (v[head1] <= v[head2])
                tmp[index++] = v[head1++];
            else
                tmp[index++] = v[head2++];
        }
    }

    for (int i = 0; i < len; ++i)
        v[start+i] = tmp[i];
    // cout << "After ";
    // display(v);
    // cout << endl;
}

void mergeSort(vector<int>& v) {
    int len = v.size();
    for (int step = 1; step <= len; step <<= 1) {
        int offset = step + step;
        for (int index = 0; index < len; index += offset)
            mergeSortHelper(v, index, min(index+step, len-1), min(index+offset-1, len-1));
    }
}


void gen(vector<int>& v, size_t size) {
    static const int MAX = 99997;
    v = vector<int>(size);
    for (int i = 0; i < size; ++i)
        v[i] = rand() % MAX;
}



int main() {
    // vector<int> v;
    // for (int i = 0; i < 10; ++i)
    //  v.push_back(10-i);
    // mergeSort(v);

    srand(time(0));
    for (size_t size = 0; size < 10000; ++size) {
        vector<int> v;
        gen(v, size);
        mergeSort(v);
        if (!isSorted(v)) {
            cout << "FAIL with size = " << size << endl;
            break;
        } else {
            cout << "GOOD with size = " << size << endl;
        }
    }

    return 0; 
}

用python來實現

實現原理跟上面說的一樣,直接po程式碼了:

# -*- coding:utf-8 -*-
import random

# 合併兩個已排好序的區間:[head1, tail1]與[head2, tail2]
def mergeSortHelper(v, head1, head2, tail2):
    tail1 = head2 - 1
    start = head1
    index = 0
    tmp = [0] * (tail2-head1+1)
    while head1 <= tail1 or head2 <= tail2:
        if head1 > tail1:
            tmp[index] = v[head2]
        elif head2 > tail2:
            tmp[index] = v[head1]
        else:
            if v[head1] <= v[head2]:
                tmp[index] = v[head1]
            else:
                tmp[index] = v[head2]

        if head1 <= tail1 and tmp[index] == v[head1]:
            head1 += 1
        else:
            head2 += 1
        index += 1

    for i in range(start, tail2+1):
        v[i] = tmp[i-start]


def mergeSort(v):
    length = len(v)
    step = 1
    # 步長為1,2,4,8,...,一直合併下去
    while step <= length:
        offset = step << 1
        for index in range(0, length, offset):
            mergeSortHelper(v, index, min(index+step, length-1), min(index+offset-1, length-1))
        step = offset


# 隨機生成大小為size的陣列
def genData(size):
    MAX = 99997
    v = [0] * size
    for i in range(size):
        v[i] = random.randrange(0, MAX)
    return v


# 驗證v是否真的排好序了
def isSorted(v):
    sortedV = sorted(v)
    for i in range(len(v)):
        if v[i] != sortedV[i]:
            return False
    return True


if __name__ == '__main__':
    for size in range(0, 10000):
        v = genData(size)
        mergeSort(v)
        if not isSorted(v):
            print('Fail at size = {0}'.format(size))
        else:
            print('Good at size = {0}'.format(size))