1. 程式人生 > >gj6 深入python的set和dict

gj6 深入python的set和dict

key display 存在 aps 哈希沖突 gif per 文件 clas

6.1 collections中的abc

from collections.abc import Mapping, MutableMapping
#dict屬於mapping類型

a = {}
print (isinstance(a, MutableMapping))
# True

6.2 dict的常見用法

a = {"lewen1": {"company": "imooc"},
     "lewen2": {"company": "imooc2"}
     }
# clear   Remove all items from D.
# a.clear()
# pass ? # copy, 返回淺拷貝 new_dict = a.copy() new_dict["lewen1"]["company"] = "imooc3" # 淺拷貝,只是拷貝了指向。這裏修改會修改了a 中原來的值 print(new_dict) print(a) ? ?--- {‘lewen1‘: {‘company‘: ‘imooc3‘}, ‘lewen2‘: {‘company‘: ‘imooc2‘}} {‘lewen1‘: {‘company‘: ‘imooc3‘}, ‘lewen2‘: {‘company‘: ‘imooc2‘}}

---
import
copy a = {"lewen1": {"company": "imooc"}, "lewen2": {"company": "imooc2"} } new_dict = copy.deepcopy(a) # 深拷貝,開辟獨立的內存空間,並復制值 new_dict["lewen1"]["company"] = "imooc3" # 這裏修改,並不會影響字典a的值 print(new_dict) print(a)

---
{‘lewen1‘: {‘company‘: ‘imooc3‘}, ‘lewen2‘: {‘company‘: ‘imooc2‘}}
{‘lewen1‘: {‘company‘: ‘imooc‘}, ‘lewen2‘: {‘company‘: ‘imooc2‘}}
---
 
# formkeys
new_list = ["lewen1", "lewen2"]
new_dict = dict.fromkeys(new_list, {"company": "imooc"})
print(new_dict)
?
# new_dict["kevin"]  # KeyError 不存在會拋異常
ret = new_dict.get("kevin","None") # 
print(ret)
?
# items 方法
for key,value in new_dict.items():
    print(key,value)

ret_set = new_dict.setdefault("kevin","new mem")  # 不存在key,就設置並返回值
print(ret_set)
print(new_dict)
?
?new_dict.update(      # update() 括號裏面添加為可叠代對象
    (("lewen", "imooc"),)
    )
---
{‘lewen1‘: {‘company‘: ‘imooc‘}, ‘lewen2‘: {‘company‘: ‘imooc‘}}
None
lewen1 {‘company‘: ‘imooc‘}
lewen2 {‘company‘: ‘imooc‘}
new mem
{‘lewen1‘: {‘company‘: ‘imooc‘}, ‘lewen2‘: {‘company‘: ‘imooc‘}, ‘kevin‘: ‘new mem‘}
?

6.3 dict的子類

#不建議繼承list和dict
class Mydict(dict):
    def __setitem__(self, key, value):
        super().__setitem__(key, value*2)

my_dict = Mydict(one=1)   # value*2 沒有生效
# my_dict["one"] = 1      # 生效了
print (my_dict)
{‘one‘: 1}
---

from collections import UserDict
class Mydict(UserDict):
    def __setitem__(self, key, value):
        super().__setitem__(key, value*2)

my_dict = Mydict(one=1)
# my_dict["one"] = 1
print (my_dict)
{‘one‘: 2}

---
# defaultdict 
from collections import defaultdict

my_dict = defaultdict(dict)
my_value = my_dict["bobby"]  # 沒有則返回空字典
print(my_value)
{}

6.4 set和frozenset

#set 集合 fronzenset (不可變集合) 無序, 不重復
s = set(‘abcdee‘)
print(s)

s2 = set([‘a‘,‘b‘,‘c‘,‘d‘,‘e‘])
print(s2)

s3 = {‘a‘,‘b‘, ‘c‘}
print(type(s3))

s = frozenset("abcde") #frozenset 不可變,以作為dict的key
                       # 不能添加值
print(s)
# ---
{‘a‘, ‘e‘, ‘c‘, ‘d‘, ‘b‘}
{‘a‘, ‘e‘, ‘c‘, ‘d‘, ‘b‘}
<class ‘set‘>
frozenset({‘a‘, ‘e‘, ‘c‘, ‘d‘, ‘b‘})

# ---
#向set添加數據
s = set(‘abcdee‘)
another_set = set("cef")
s.update(another_set)
print(s)

re_set = s.difference(another_set)  # {‘b‘, ‘d‘, ‘a‘}
re_set = s - another_set            # {‘b‘, ‘d‘, ‘a‘}
re_set = s & another_set            # {‘c‘, ‘f‘, ‘e‘}
re_set = s | another_set              # {‘a‘, ‘f‘, ‘c‘, ‘e‘, ‘d‘, ‘b‘}

#set性能很高
# | & -  #集合運算
print(re_set)

print (s.issubset(re_set))
if "c" in re_set:
    print ("i am in set")
# ---
{‘a‘, ‘f‘, ‘e‘, ‘c‘, ‘d‘, ‘b‘}
{‘a‘, ‘f‘, ‘c‘, ‘e‘, ‘d‘, ‘b‘}
True
i am in set

6.5 dict和set實現原理

技術分享圖片
from random import randint


def load_list_data(total_nums, target_nums):
    """
    從文件中讀取數據,以list的方式返回
    :param total_nums: 讀取的數量
    :param target_nums: 需要查詢的數據的數量
    """
    all_data = []
    target_data = []
    file_name = "D:\電子書\Python面試寶典Version8.1.pdf"
    with open(file_name, encoding="utf8", mode="r") as f_open:
        for count, line in enumerate(f_open):
            if count < total_nums:
                all_data.append(line)
            else:
                break

    for x in range(target_nums):
        random_index = randint(0, total_nums)
        if all_data[random_index] not in target_data:
            target_data.append(all_data[random_index])
            if len(target_data) == target_nums:
                break

    return all_data, target_data

def load_dict_data(total_nums, target_nums):
    """
    從文件中讀取數據,以dict的方式返回
    :param total_nums: 讀取的數量
    :param target_nums: 需要查詢的數據的數量
    """
    all_data = {}
    target_data = []
    file_name = "D:\電子書\Python面試寶典Version8.1.pdf"
    with open(file_name, encoding="utf8", mode="r") as f_open:
        for count, line in enumerate(f_open):
            if count < total_nums:
                all_data[line] = 0
            else:
                break
    all_data_list = list(all_data)
    for x in range(target_nums):
        random_index = randint(0, total_nums-1)
        if all_data_list[random_index] not in target_data:
            target_data.append(all_data_list[random_index])
            if len(target_data) == target_nums:
                break

    return all_data, target_data


def find_test(all_data, target_data):
    #測試運行時間
    test_times = 100
    total_times = 0
    import time
    for i in range(test_times):
        find = 0
        start_time = time.time()
        for data in target_data:
            if data in all_data:
                find += 1
        last_time = time.time() - start_time
        total_times += last_time
    return total_times/test_times


if __name__ == "__main__":
    # all_data, target_data = load_list_data(10000, 1000)
    # all_data, target_data = load_list_data(100000, 1000)
    # all_data, target_data = load_list_data(1000000, 1000)


    # all_data, target_data = load_dict_data(10000, 1000)
    # all_data, target_data = load_dict_data(100000, 1000)
    # all_data, target_data = load_dict_data(1000000, 1000)
    all_data, target_data = load_dict_data(2000000, 1000)
    last_time = find_test(all_data, target_data)

view
    #dict查找的性能遠遠大於list
    #在list中隨著list數據的增大 查找時間會增大
    #在dict中查找元素不會隨著dict的增大而增大
    print(last_time)

#1.dict的key或者set的值 都必須是可以hash的
#不可變對象 都是可hash的, str, fronzenset, tuple,自己實現的類 __hash__
#2. dict的內存花銷大(有大量空余的表元),但是查詢速度快, 自定義的對象 或者python內部的對象都是用dict包裝的
# 3. dict的存儲順序和元素添加順序有關
# 4. 添加數據有可能改變已有數據的順序

技術分享圖片

哈希沖突後重新計算位置

在剩余空間小於三分之一時,申請更大的空間,然後數據搬遷,有可能會改變順序

技術分享圖片

gj6 深入python的set和dict