1. 程式人生 > >幾大常用排序演算法編寫及正確性、效率測試

幾大常用排序演算法編寫及正確性、效率測試

排序演算法寫了幾遍,總是過段時間就忘,故在此彙總下。
寫排序演算法重要的是理解它的原理,找到如何遍歷及遍歷和終止的條件

插入排序

從左建立有序區,將右側的值依次插入該有序區,有序區中從插入的位置開始依次後移一位;從左往右遍歷

void InsertSort(std::vector<int>& datas)
{
  //等待排序區間,插入到有序區
  for(size_t i = 1; i < datas.size(); ++i)
  {
    //有序區間
    for(size_t j = 0; j < i; ++j)
    {
      //按從小到大的順序,遇到第一個比他小的值即終止,依次後移
if(datas[i] < datas[j]) { int value = datas[i]; for(size_t k = i; k > j; --k) datas[k] = datas[k-1]; datas[j] = value; break; } } } }

氣泡排序

從左到右相鄰的數依次比較,值最大或最小的依次冒出;從右往左遍歷

void BubbleSort(std::vector<int>& datas)
{
  //當前需排序的值
for(size_t i = datas.size(); i > 0 ; --i) { //從第一個位置開始,依次冒出相鄰兩個數 for(size_t j = 0; j < i-1; ++j) { if(datas[j] > datas[j+1]) { datas[j] += datas[j+1]; datas[j+1] = datas[j] - datas[j+1]; datas[j] -= datas[j+1]; } } } }

選擇排序

從第一個位置起,依次選擇出該位置到結束的最小或最大值,放在當前位置

void SelectSort(std::vector<int>& datas)
{
  for(size_t i = 0; i < datas.size(); ++i)
  {
    for(size_t j = i+1; j < datas.size(); ++j)
    {
      if(datas[i] > datas[j])
      {
        datas[i] ^= datas[j];
        datas[j] ^= datas[i];
        datas[i] ^= datas[j];
      }
    }
  }
}

歸併排序

兩兩比較排序;以2的倍數逐漸歸併

void MergeSort(std::vector<int>& datas)
{
  //兩兩比較一次
  for(size_t i = 0; i+1 < datas.size(); i+=2)
  {
    if(datas[i] > datas[i+1])
    {
      int value = datas[i];
      datas[i] = datas[i+1];
      datas[i+1] = value;
    }
  }

  std::vector<int> *new_datas = new std::vector<int>();
  new_datas->assign(datas.size(), 0);

  //逐漸歸併,每次歸併的大小,2,4,8,16...
  for(size_t gap = 2; gap < datas.size(); gap*=2)
  {
    //當gap=2時:1,2與3,4歸併; 5,6與7,8歸併 至結束
    int assign_idx =0;
    for(size_t idx = 0; idx+gap < datas.size(); idx+=2*gap)
    {
      size_t i = 0;
      size_t j = 0;
      //歸併時依次選擇較小的數
      while(i < gap && j < gap && idx+gap+j < datas.size())
      {
        if(datas[idx+i] < datas[idx+gap+j])
          (*new_datas)[assign_idx++] = datas[idx+(i++)];
        else
          (*new_datas)[assign_idx++] = datas[idx+gap+(j++)];
      }
      if(i==gap)
      {
        //注意右側的值不能越界
        for(; j < gap  && idx+gap+j < datas.size();)
          (*new_datas)[assign_idx++] = datas[idx+gap+(j++)];
      }
      else
      {
        for(; i < gap;)
          (*new_datas)[assign_idx++] = datas[idx+(i++)];
      }
    }

    //將當前已歸併的數賦值給data;開始使用swap,但最後一組資料有可能沒賦值給new_datas,會出錯
    for(int i = 0; i < assign_idx; ++i)
      datas[i] = (*new_datas)[i];
  }

  delete new_datas;
}

堆排序

1.第一次時從含葉子節點處開始,建立一次大根堆 2.將堆頂依次與堆底交換,新的堆頂資料與較大值交換,排序lg(n)次,該堆又為有序堆

void HeapOrder(std::vector<int>& data, size_t cur, size_t max)
{
  size_t l = 2*cur;//左節點
  size_t r = 2*cur + 1;//右節點
  size_t large = cur;
  if(l <= max && data[large-1] < data[l-1])
    large = l;
  if(r <= max && data[large-1] < data[r-1])
    large = r;

  //當堆頂比左右節點大,返回;當堆頂為左或者右節點時,遞迴其左或者右節點,直到結束
  if(large != cur)
  {
    int value = data[cur-1];
    data[cur-1] = data[large-1];
    data[large-1] = value;

    HeapOrder(data, large, max);
  }
}


void HeapSort(std::vector<int>& datas)
{
  //第一次堆排序
  for(size_t i = datas.size()/2; i > 0; --i)
    HeapOrder(datas, i, datas.size());

  //當前未排序的堆大小
  for(size_t unsorted_pos = datas.size(); unsorted_pos > 1; --unsorted_pos)
  {
    //將堆中最大或最小值賦值給和未排序堆最後一位交換位置,未排序堆個數將-1
    int value = datas[0];
    datas[0] = datas[unsorted_pos-1];
    datas[unsorted_pos-1] = value;

    HeapOrder(datas, 1, unsorted_pos-1);
  }
}

快速排序

1.從第一個位置開始,從最右往左遍歷(如果從左往右比較的結果將無效):比它大或者等於,右側值遞減;比它小交換兩個數,再從左往右遍歷
2.從左往右遍歷:比它小或者等於,左側值遞增;比它大交換兩個數,再從右往左遍歷
3.直到左右兩個數相等,當前該數左側的值小於等於它,右側的值大於等於它,這個數已排序好
4.遞迴排序它的左側,它的右側

void QuickSort(std::vector<int>& datas, size_t low, size_t high)
{
  if(low >= high)
    return;

  size_t left = low;
  size_t right = high;
  //最開始從右往左開始遍歷
  bool search_from_right = true;
  while(left != right)
  {
    if(search_from_right)
    {
      if(datas[left-1] <= datas[right-1])
        right--;
      else
      {
        int value = datas[left-1];
        datas[left-1] = datas[right-1];
        datas[right-1] = value;
        search_from_right = false;
        left++;
      }
    }
    else
    {
      if(datas[left-1] <= datas[right-1])
        left++;
      else
      {
        int value = datas[right-1];
        datas[right-1] = datas[left-1];
        datas[left-1] = value;
        search_from_right = true;
        right--;
      }
    }
  }

  QuickSort(datas, low, left-1);
  QuickSort(datas, left+1, high);
}

///快速排序
void QuickSort(std::vector<int>& datas)
{
  QuickSort(datas, 1, datas.size());
}

計數排序

分配較大空間數,出現該數遞增,遍歷空間

void CountSort(std::vector<int>& datas, int max)
{
  static std::vector<int> count_array(max, 0);
  count_array.assign(max, 0);

  for(size_t i = 0; i < datas.size(); ++i)
    count_array[datas[i]-1]++;

  int cur_pos = 0;
  for(int i = 0; i < max; ++i)
  {
    if(count_array[i] != 0)
    {
      for(int j = count_array[i]; j != 0; --j)
        datas[cur_pos++] = i+1;
    }
  }
}

//為了方便後續測試,將1000000 作為最大數
void CountSort(std::vector<int>& datas)
{
  static std::vector<int> count_array(1000000, 0);
  count_array.assign(1000000, 0);

  for(size_t i = 0; i < datas.size(); ++i)
    count_array[datas[i]-1]++;

  int cur_pos = 0;
  for(int i = 0; i < 1000000; ++i)
  {
    if(count_array[i] != 0)
    {
      for(int j = count_array[i]; j != 0; --j)
        datas[cur_pos++] = i+1;
    }
  }
}

基數排序

開始分配基數個空間,當前位在哪個基數上,就新增給那個基數空間;從左到右,每次排序後將當前排序值賦值給datas;從低位到高位依次排序

void RadixSort(std::vector<int>& datas, const int radix)
{
  bool is_run = true;
  int radix_pos = 0;
  static vector<vector<int> > radix_vecs(radix);
  radix_vecs.assign(radix, vector<int>());
  while(is_run)
  {
    is_run = false;
    for(size_t i = 0; i < datas.size(); ++i)
    {
      int pow_value = std::pow(radix, radix_pos);
      int div_value = datas[i]/pow_value;
      //當前選擇哪個位置
      int mod_value = div_value%radix;
      //當高位還有值時繼續遍歷,所有高位都為0時,停止遍歷
      if(div_value/radix != 0)
        is_run = true;

      //按位置儲存
      radix_vecs[mod_value].push_back(datas[i]);
    }
    ++radix_pos;

    //將當前位排序好後賦值給datas
    for(int i = 0, idx = 0; i < radix; ++i)
    {
      for(size_t j = 0; j < radix_vecs[i].size(); ++j)
        datas[idx++] = radix_vecs[i][j];
      radix_vecs[i].clear();
    }
  }
}

//為了方便後續測試,將10 作為基數
void RadixSort(std::vector<int>& datas)
{
  bool is_run = true;
  int radix_pos = 0;
  static vector<vector<int> > radix_vecs(10);
  radix_vecs.assign(10, vector<int>());
  while(is_run)
  {
    is_run = false;
    for(size_t i = 0; i < datas.size(); ++i)
    {
      int pow_value = std::pow(10, radix_pos);
      int div_value = datas[i]/pow_value;
      //當前選擇哪個位置
      int mod_value = div_value%10;
      if(div_value/10 != 0)
        is_run = true;

      //按位置儲存
      radix_vecs[mod_value].push_back(datas[i]);
    }
    ++radix_pos;

    //將當前位排序好後賦值給datas
    for(int i = 0, idx = 0; i < 10; ++i)
    {
      for(size_t j = 0; j < radix_vecs[i].size(); ++j)
        datas[idx++] = radix_vecs[i][j];
      radix_vecs[i].clear();
    }
  }
}

測試程式碼

#include <iostream>
#include <vector>
#include <time.h>
#include <algorithm>
#include <limits>
#include <map>
#include <sys/time.h>
#include <functional>
using namespace std;

typedef void (*SortFun)(std::vector<int>&);

//輸出資料
template<class SStream>
SStream& operator << (SStream& os, const vector<int>& vec)
{
  for(size_t idx = 0; idx < vec.size(); ++idx)
    os << vec[idx] << ",";
  os << endl;
  return os;
}

//隨機分配資料,count_max:分配的最大個數 value_max:分配的最大值
void GetData(std::vector<int>& datas, int count_max = 10000, int value_max = 1000000)
{
  int rand_count = rand()%count_max+1;
  int rand_num;
  for(int i = 0; i < rand_count; ++i)
  {
    rand_num = rand()%value_max;
    datas.push_back(rand_num);
  }
}

//檢驗是否是有序
bool CheckSort(const std::vector<int>& datas)
{
  for(size_t idx = 0; idx+1 < datas.size(); ++idx)
  {
    if(datas[idx+1] < datas[idx])
    {
      cout << datas << endl;
      cout << "idx:" << idx << ", " << datas[idx] << ", " << datas[idx+1] << endl;
      return false;
    }
  }
  return true;
}

//檢驗兩結果是否一致
bool CheckResult(const std::vector<int>& datas1, const std::vector<int>& datas2)
{
  if(datas1.size() != datas2.size())
    return false;
  for(size_t i = 0; i < datas1.size(); ++i)
    if(datas1[i] != datas2[i])
      return false;
  return true;
}
int main()
{
  srand(time(NULL));
  //測試多少次
  int test_times = 1000;
  //獲取資料
  vector<vector<int> *>* total_datas = new vector<vector<int> *>();
  //產生1000個這樣的測試資料
  for(int i = 0; i < test_times; ++i)
  {
    vector<int> *tmp = new vector<int>();
    GetData(*tmp);
    total_datas->push_back(tmp);
  }
  cout << "Create Data success!" << endl;

  //檢驗正確性
  for(size_t i = 0; i < total_datas->size(); ++i)
  {
    vector<int> *tmp = new vector<int>();
    vector<int> *tmp1 = new vector<int>();

    //前三種排序速度太慢,可先將test_times設成較小值測試正確性
    //*tmp = *((*total_datas)[i]);
    //InsertSort(*tmp);
    //CheckSort(*tmp);

    //*tmp = *((*total_datas)[i]);
    //BubbleSort(*tmp);
    //CheckSort(*tmp);

    //*tmp = *((*total_datas)[i]);
    //SelectSort(*tmp);
    //CheckSort(*tmp);

    *tmp = *((*total_datas)[i]);
    QuickSort(*tmp);
    CheckSort(*tmp);

    *tmp = *((*total_datas)[i]);
    HeapSort(*tmp);
    //如果資料不正確,可將資料輸出後進行斷點除錯
    if(!CheckSort(*tmp))
    {
      cout << *((*total_datas)[i]) << endl;
    }

    *tmp = *((*total_datas)[i]);
    MergeSort(*tmp);
    CheckSort(*tmp);


    *tmp = *((*total_datas)[i]);
    CountSort(*tmp, 1000000);
    CheckSort(*tmp);


    *tmp = *((*total_datas)[i]);
    RadixSort(*tmp);
    CheckSort(*tmp);

    *tmp = *((*total_datas)[i]);
    QuickSort(*tmp);
    *tmp1 = *((*total_datas)[i]);
    MergeSort(*tmp1);
    //比較兩種排序的結果是否一致
    if(!CheckResult(*tmp, *tmp1))
    {
      cout << *((*total_datas)[i]) << endl;
      break;
    }
    delete tmp;
    delete tmp1;

    cout << i << endl;
  }

  //檢驗時間
  vector<SortFun> funs(5);
  funs[0] = MergeSort;
  funs[1] = RadixSort;
  funs[2] = QuickSort;
  funs[3] = CountSort;
  funs[4] = HeapSort;

  for(size_t idx = 0; idx < funs.size(); ++idx)
  {
    struct timeval start_time, end_time;
    gettimeofday(&start_time, NULL);

    for(size_t i = 0; i < total_datas->size(); ++i)
    {
      vector<int> *tmp = new vector<int>();
      *tmp = *((*total_datas)[i]);
      funs[idx](*tmp);
    }

    gettimeofday(&end_time, NULL);
    cout << "runtime:" << (end_time.tv_sec-start_time.tv_sec)*1000 + (end_time.tv_usec-start_time.tv_usec)/1000 << endl;
  }

  return 0;
}

結果如下:這裡寫圖片描述
速度依次是:快速排序>歸併排序>堆排序>基數排序>計數排序
特殊情況下基數排序和計數排序可能更快,歸併排序和堆排序效率接近相等但都小於快排,其它三種蝸牛排序忽略