Jacketinsysu的專欄
阿新 • • 發佈:2018-12-23
最近在知乎上看到一個帖子,總結了各種常見的排序演算法,並用python一一實現了,不過歸併排序的迭代寫法,題主說他不會寫,我就試了一下,其實很簡單。下面會先分析遞迴的時候實際上做了哪些事,然後迭代如何重現這些事。先用C++寫,因為估計看這篇部落格的大部分人對C++比較熟,最後會分享python的版本,實現過程基本一模一樣。
遞迴的時候做了什麼?
先po一下遞迴的虛擬碼:
// 區間[head1, head2-1]和[head2, tail2]都是排好序的,現在需要合併
void mergeSorted(arr, head1, head2, tail2) {
// balabala...
}
void mergeSort(arr, left, right) {
if (left >= right)
return;
mid = (left + right) >> 1;
mergeSort(arr, left, mid);
mergeSort(arr, mid+1, right);
mergeSorted(arr, left, mid+1, right);
}
可以看出,遞迴的時候,並沒有做什麼特別的事,只是從中間分成兩半,每一半自己去做排序,最後合併起來,是後序遍歷,從葉子節點往回看:
1. 區間的長度都為1,直接返回,不用合併;
2. 區間的長度為2,兩個子區間都排好序了,將它們合併起來;
3. 區間的長度為4,兩個子區間都排好序了,將它們合併起來;
4. ……
迭代怎麼寫?
從上面的分析可以看出,其實只需要列舉步長1,2,4,……,對由每個步長分開的區間,都合併一下。
比如,一開始陣列為[8 7 6 5 4 3 2 1]。
第一遍,步長為1,將相鄰的兩個區間合併(注意加粗黑體):
7 8 6 5 4 3 2 1
7 8 5 6 4 3 2 1
7 8 5 6 3 4 2 1
7 8 5 6 3 4 1 2
第二遍,步長為2,將相鄰的兩個區間合併(注意加粗黑體):
5 6 7 8 3 4 1 2
5 6 7 8 1 2 3 4
第三遍,步長為4,將相鄰的兩個區間合併(注意加粗黑體):
1 2 3 4 5 6 7 8
應該很簡單就寫出來吧?注意一下邊界即可:
// 區間[head1, head2-1]和[head2, tail2]都是排好序的,現在需要合併
void mergeSortHelper(vector<int>& v, int head1, int head2, int tail2) {
int tail1 = head2 - 1, index = 0, len = tail2 - head1 + 1, start = head1;
vector<int> tmp(len);
while (head1 <= tail1 || head2 <= tail2) {
if (head1 > tail1)
tmp[index++] = v[head2++];
else if (head2 > tail2)
tmp[index++] = v[head1++];
else {
if (v[head1] <= v[head2])
tmp[index++] = v[head1++];
else
tmp[index++] = v[head2++];
}
}
for (int i = 0; i < len; ++i)
v[start+i] = tmp[i];
}
void mergeSort(vector<int>& v) {
int len = v.size();
// 倍進列舉步長1,2,4,……
for (int step = 1; step <= len; step <<= 1) {
int offset = step + step;
for (int index = 0; index < len; index += offset)
mergeSortHelper(v, index, min(index+step, len-1), min(index+offset-1, len-1));
}
}
總體的測試程式碼:
#include <iostream>
#include <vector>
#include <algorithm>
#include <cmath>
using namespace std;
// 注意被我註釋掉的地方,解開來,很直觀可以看到排序的過程是怎麼做的!
void display(const vector<int>& v) {
for (int i = 0; i < v.size(); ++i)
cout << v[i] << ' ';
cout << endl;
}
bool isSorted(const vector<int>& v) {
vector<int> sorted(v.begin(), v.end());
sort(sorted.begin(), sorted.end());
for (int i = 0; i < v.size(); ++i)
if (v[i] != sorted[i])
return false;
return true;
}
void mergeSortHelper(vector<int>& v, int head1, int head2, int tail2) {
int tail1 = head2 - 1, index = 0, len = tail2 - head1 + 1, start = head1;
// cout << "Before " << head1 << ' ' << tail1 << ' ' << head2 << ' ' << tail2 << endl;
// display(v);
vector<int> tmp(len);
while (head1 <= tail1 || head2 <= tail2) {
if (head1 > tail1)
tmp[index++] = v[head2++];
else if (head2 > tail2)
tmp[index++] = v[head1++];
else {
if (v[head1] <= v[head2])
tmp[index++] = v[head1++];
else
tmp[index++] = v[head2++];
}
}
for (int i = 0; i < len; ++i)
v[start+i] = tmp[i];
// cout << "After ";
// display(v);
// cout << endl;
}
void mergeSort(vector<int>& v) {
int len = v.size();
for (int step = 1; step <= len; step <<= 1) {
int offset = step + step;
for (int index = 0; index < len; index += offset)
mergeSortHelper(v, index, min(index+step, len-1), min(index+offset-1, len-1));
}
}
void gen(vector<int>& v, size_t size) {
static const int MAX = 99997;
v = vector<int>(size);
for (int i = 0; i < size; ++i)
v[i] = rand() % MAX;
}
int main() {
// vector<int> v;
// for (int i = 0; i < 10; ++i)
// v.push_back(10-i);
// mergeSort(v);
srand(time(0));
for (size_t size = 0; size < 10000; ++size) {
vector<int> v;
gen(v, size);
mergeSort(v);
if (!isSorted(v)) {
cout << "FAIL with size = " << size << endl;
break;
} else {
cout << "GOOD with size = " << size << endl;
}
}
return 0;
}
用python來實現
實現原理跟上面說的一樣,直接po程式碼了:
# -*- coding:utf-8 -*-
import random
# 合併兩個已排好序的區間:[head1, tail1]與[head2, tail2]
def mergeSortHelper(v, head1, head2, tail2):
tail1 = head2 - 1
start = head1
index = 0
tmp = [0] * (tail2-head1+1)
while head1 <= tail1 or head2 <= tail2:
if head1 > tail1:
tmp[index] = v[head2]
elif head2 > tail2:
tmp[index] = v[head1]
else:
if v[head1] <= v[head2]:
tmp[index] = v[head1]
else:
tmp[index] = v[head2]
if head1 <= tail1 and tmp[index] == v[head1]:
head1 += 1
else:
head2 += 1
index += 1
for i in range(start, tail2+1):
v[i] = tmp[i-start]
def mergeSort(v):
length = len(v)
step = 1
# 步長為1,2,4,8,...,一直合併下去
while step <= length:
offset = step << 1
for index in range(0, length, offset):
mergeSortHelper(v, index, min(index+step, length-1), min(index+offset-1, length-1))
step = offset
# 隨機生成大小為size的陣列
def genData(size):
MAX = 99997
v = [0] * size
for i in range(size):
v[i] = random.randrange(0, MAX)
return v
# 驗證v是否真的排好序了
def isSorted(v):
sortedV = sorted(v)
for i in range(len(v)):
if v[i] != sortedV[i]:
return False
return True
if __name__ == '__main__':
for size in range(0, 10000):
v = genData(size)
mergeSort(v)
if not isSorted(v):
print('Fail at size = {0}'.format(size))
else:
print('Good at size = {0}'.format(size))