1. 程式人生 > >海量不重複資料的生成

海量不重複資料的生成


    前幾天看到了一個專案需求,自己嘗試寫了下3億行資料大概需要20分鐘的時間,普通硬碟應該要慢些,記憶體對映或者記憶體盤應該會更快一點,懶得驗證了,放出程式碼供學習下,演算法是以前偶然看到得很巧的的演算法,二次獲取隨機數是為了資料更隨機(待驗證,我覺得不需要)

#include "stdafx.h"
#include <chrono>
#include <iostream>
#include <memory>
#include <random>

#include <strsafe.h>

using namespace std;


int GetRandomNum(int min, int max,int seed)
{

	//srand((unsigned)time(NULL)); //生成種子
	srand(seed);
	return( rand() % (max - min) + min);
}

int main()
{

	//int nSericalLenth = 10;

	chrono::steady_clock::time_point t1 = chrono::steady_clock::now();
	int64_t nArrayLength = 300000000;
	FILE* pFile = 0;

	TCHAR szBuffer[128];

	memset(szBuffer, 0, sizeof(szBuffer));

	try
	{

		std::random_device rd;
		std::uniform_int_distribution<int> dist(0, 9999999);
		//std::cout << dist(rd) << std::endl;

		_tfopen_s(&pFile, _T("D:\\num.txt"), _T("wt"));

		if (pFile != nullptr)
		{
			unique_ptr<int[]> needArray(new int[nArrayLength]);

			for (int idx = 0; idx < nArrayLength; idx++)
			{
				needArray[idx] = idx+283823423;
			}

			int end = nArrayLength - 1;

			for (int i = 0; i < nArrayLength; i++)
			{
				int num = GetRandomNum(0, end + 1, dist(rd));

				StringCbPrintf(szBuffer, sizeof(szBuffer), _T("%010d\n"), needArray[num]);
				fwrite(szBuffer, 1, _tcslen(szBuffer), pFile);
				//output[i] = needArray[num];
				needArray[num] = needArray[end];
				end--;
			}

			fclose(pFile);
		}

	}
	catch (...)
	{
		
	}


	chrono::steady_clock::time_point t2 = chrono::steady_clock::now();

	cout << chrono::duration_cast<std::chrono::seconds>(t2 - t1).count() << endl;

    return 0;
}