1. 程式人生 > >[原創]大資料:布隆過濾器C#版簡單實現。

[原創]大資料:布隆過濾器C#版簡單實現。

    public class BloomFilter
    {
        public BitArray _BloomArray;
        public Int64 BloomArryLength { get; }
        public Int64 DataArrayLeng { get; }
        public Int64 BitIndexCount { get; }

        /// <summary>
        /// 初始化
        /// </summary>
        /// <param name="BloomArryLength">
布隆陣列的大小</param> /// <param name="DataArrayLeng">資料的長度</param> /// <param name="bitIndexCount">hash數</param> public BloomFilter(int BloomArryLength,int DataArrayLeng,int bitIndexCount) { _BloomArray = new BitArray(BloomArryLength);
this.BloomArryLength = BloomArryLength; this.DataArrayLeng = DataArrayLeng; this.BitIndexCount = bitIndexCount; } public void Add(string str) { var hashCode = GetHashCode(str); Random random = new Random(hashCode);
for (int i = 0; i < BitIndexCount; i++) { var c = random.Next((int)(this.BloomArryLength - 1)); _BloomArray[c] = true; } } public bool isExist(string str) { var hashCode = GetHashCode(str); Random random = new Random(hashCode); for (int i = 0; i < BitIndexCount; i++) { if(!_BloomArray[random.Next((int)(this.BloomArryLength - 1))]) { return false; } } return true; } public int GetHashCode(object value) { return value.GetHashCode(); } public double getFalsePositiveProbability() { // (1 - e^(-k * n / m)) ^ k return Math.Pow((1 - Math.Exp(-BitIndexCount * (double)DataArrayLeng / BloomArryLength)), BitIndexCount); } }

 

        static void Main(string[] args)
        {
            Bloom_Filter.BloomFilter bloom = new Bloom_Filter.BloomFilter(200000000, 50000000, 3);//五千萬條資料

            for (int i = 0; i < bloom.DataArrayLeng; i++)//五千萬條資料
            {
                bloom.Add(i.ToString());
            }
            do
            {
                var c = Console.ReadLine();
                if (c == "e")
                    break;
                Stopwatch sw = new Stopwatch();
                sw.Start();
                var temp=bloom.isExist(c);
                sw.Stop();
                Console.WriteLine($"查詢:{c}\n結果:{temp}\n總耗時:{sw.ElapsedTicks}\n錯誤概率:{bloom.getFalsePositiveProbability()}");
            } while (true);
        }

結果:使用記憶體27MB,查詢結果一般在100毫秒以內。