利用python製作馬賽克圖
阿新 • • 發佈:2019-01-01
目錄
先是看到 英雄聯盟大拼圖的文章,裡面用爬蟲爬了LOL官網所有英雄的面板圖片,然後做了一個拼圖,他的拼圖用其他軟體做的,然後我就想到python應該也能做拼圖吧,然後就搜到 使用Python簡單實現馬賽克拼圖這篇技術文,照貓畫虎實踐了一番。
爬蟲
因為目前課題主要做資料清洗、視覺化,對爬蟲沒什麼要求,就暫時沒深入學習,這段程式碼也沒徹底看懂。。。233,因此只對部分程式碼做筆記整理
import requests
import json
import time
import re
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
def get_hero(url):
response = requests.get(url=url, headers=headers)
html = response.text
result = re.findall('if.*?champion=(.*?)data', html, re.S)
data = json. loads(result[0][:-2] + '}')
for key, value in data['keys'].items():
hero = value
get_skin(hero)
time.sleep(2) #推遲執行的秒數,應該是為了防止被反爬
def get_skin(hero):
url = 'http://lol.qq.com/biz/hero/' + hero + '.js'
response = requests.get(url=url, headers=headers)
html = response. text
result = re.findall('"id":"(\d{4,6})","num"', html, re.S)
for i in range(len(result)):
url_image = "http://ossweb-img.qq.com/images/lol/web201310/skin/big" + result[i] + ".jpg"
res = requests.get(url=url_image, headers=headers).content
with open("images" + str(result[i]) + ".jpg", "wb") as ob:
ob.write(res)
ob.close()
print(str(result[i]) + "is Done")
def main():
print('start the work')
url = 'http://lol.qq.com/biz/hero/champion.js'
get_hero(url)
if __name__ == '__main__':
main()
re.findall(‘if.?champion=(.?)data’, html, re.S)
re 正則模組很多地方都用到,很重要,長文詳解python正則表示式這篇很詳細,深入淺出,很棒。
compile函式、match函式、search函式、findall函式、finditer函式、split函式、sub函式、subn函式小結:
- 函式辨析:match和search的區別
re.match只匹配字串的開始,如果字串開始不符合正則表示式,則匹配失敗,函式返回None;
re.search匹配整個字串,直到找到一個匹配。- 函式辨析:3個匹配函式match、search、findall
match 和 search 只匹配一次 ,匹配不到返回None,findall 查詢所有匹配結果。- 函式返回值
函式re.finditer 、 re.match和re.search 返回匹配物件,而findall、split返回列表。- re.compile函式是個謎。
對於小資料來說,re.search 比re.match 好用
re.split 在不同的資料提取裡面相對有用
如之前要對2017-1-2 這樣的時間序列做處理,只提取當中的月份‘1’,可以用split函式
dom = df[['Date', 'AQI']]
list1 = []
for j in dom['Date']:
time = j.split('-')[1] #對2017-1-2以 - 分割,然後取第1個數據
list1.append(time)
df['month'] = list1
time.sleep(t) 函式,t代表秒數
time.sleep(2) #推遲執行的秒數,應該是為了防止被反爬
拼圖
程式碼非常多,有些細節也沒仔細理解,大概的思路是先將要處理的目標圖片分割成若干小圖片,然後根據若干小圖片的灰度值或RGB可以得到唯一的雜湊值,然後去圖片庫中找雜湊值近似的圖片代替就可以了
但是這個程式碼有一個小問題,只能用jpg格式的圖片,但很多時候我們處理的圖片都是png格式的,這就需要我們手動轉化一下,可以考慮用格式工廠轉化。具體案例如下:
原圖
處理後的馬賽克圖片
完整程式碼:
import os
import time
from functools import reduce
from threading import Thread
from PIL import Image
class MosaicMaker(object):
# 內部類,執行多執行緒拼圖的任務類
class __SubTask:
def __init__(self, n, cur_sub_im, new_im, m, box):
self.n = n
self.cur_sub_im = cur_sub_im
self.new_im = new_im
self.m = m
self.box = box
def work(self):
# print("正在拼第%d張素材" % self.n)
# 計算key值(灰度值,平均RGB,hash值,三選一)
cur_sub_key = self.m.cal_key(self.cur_sub_im)
# 搜尋最匹配圖片(灰度值,平均RGB,hash值,三選一)
fit_sub = self.m.find_key(cur_sub_key)
self.new_im.paste(fit_sub, self.box)
# 內部類,執行多執行緒讀取相簿的任務類
class __ReadTask:
def __init__(self, n, full_path, fin_w, fin_h, m):
self.n = n
self.full_path = full_path
self.fin_w = fin_w
self.fin_h = fin_h
self.m = m
def read(self):
print("開始讀取第%d張圖片" % self.n)
cur = Image.open(self.full_path)
# 計算key值(灰度值,平均RGB,hash值,三選一)
key = self.m.cal_key(cur)
# 將素材縮放到目標大小
cur = cur.resize((self.fin_w, self.fin_h), Image.ANTIALIAS)
self.m.get_all_img().update({key: cur})
# 相簿目錄 目標檔案 輸出路徑 子圖尺寸 最小畫素單位 拼圖模式 預設尺寸
def __init__(self, db_path, aim_path, out_path, sub_width=64, sub_height=64, min_unit=5, mode="RGB", default_w=1600,
default_h=1280):
self.__db_path = db_path
self.__aim_path = aim_path
self.__out_path = out_path
self.__sub_width = sub_width
self.__sub_height = sub_height
self.__min_unit = min_unit
self.__mode = mode
self.__default_w = default_w
self.__default_h = default_h
self.__all_img = dict()
# 對外提供的介面
def make(self):
aim_im = Image.open(self.__aim_path)
aim_width = aim_im.size[0]
aim_height = aim_im.size[1]
print("計運算元圖尺寸")
if not self.__divide_sub_im(aim_width, aim_height):
print("使用預設尺寸")
aim_im = aim_im.resize((self.__default_w, self.__default_h), Image.ANTIALIAS)
aim_width = aim_im.size[0]
aim_height = aim_im.size[1]
print("讀取相簿")
start = time.time()
self.__read_all_img(self.__db_path, self.__sub_width, self.__sub_height)
print("耗時:%f秒" % (time.time() - start))
self.__core(aim_im, aim_width, aim_height)
def __core(self, aim_im, width, height):
new_im = Image.new("RGB", (width, height))
# 每行每列的圖片數
w = width // self.__sub_width
print("原始檔尺寸為:(w:%d h:%d)" % (width, height))
print("子圖的尺寸為:(w:%d h:%d)" % (self.__sub_width, self.__sub_height))
print("w:%d" % w)
print("開始拼圖,請稍等...")
start = time.time()
n = 1
thread_list = list()
for i in range(w):
task_list = list()
for j in range(w):
# 多執行緒版
left = i * self.__sub_width
up = j * self.__sub_height
right = (i + 1) * self.__sub_width
down = (j + 1) * self.__sub_height
box = (left, up, right, down)
cur_sub_im = aim_im.crop(box)
t = self.__SubTask(n, cur_sub_im, new_im, self, box)
task_list.append(t)
n += 1
thread = Thread(target=self.__sub_mission, args=(task_list,))
thread_list.append(thread)
for t in thread_list:
t.start()
for t in thread_list:
t.join()
print("拼圖完成,共耗時%f秒" % (time.time() - start))
# 將原圖與拼圖合併,提升觀感
new_im = Image.blend(new_im, aim_im, 0.35)
new_im.show()
new_im.save(self.__out_path)
# 拼相簿執行緒執行的具體函式
@staticmethod
def __sub_mission(missions):
for task in missions:
task.work()
# 計運算元圖大小
def __divide_sub_im(self, width, height):
flag = True
g = self.__gcd(width, height)
if g < 20:
flag = False
width = self.__default_w
height = self.__default_h
g = 320
if g == width:
g = 320
self.__sub_width = self.__min_unit * (width // g)
self.__sub_height = self.__min_unit * (height // g)
return flag
# 讀取全部圖片,按(灰度值,平均RGB,hash值)儲存 fin_w,fin_h素材最終尺寸
def __read_all_img(self, db_path, fin_w, fin_h):
files_name = os.listdir(db_path)
n = 1
# 開啟5個執行緒載入圖片
ts = list()
for i in range(5):
ts.append(list())
for file_name in files_name:
full_path = db_path + "\\" + file_name
if os.path.isfile(full_path):
read_task = self.__ReadTask(n, full_path, fin_w, fin_h, self)
ts[n % 5].append(read_task)
n += 1
tmp = list()
for i in ts:
t = Thread(target=self.__read_img, args=(i,))
t.start()
tmp.append(t)
for t in tmp:
t.join()
# 讀取相簿執行緒執行的具體函式
@staticmethod
def __read_img(tasks):
for task in tasks:
task.read()
# 計算key值
def cal_key(self, im):
if self.__mode == "RGB":
return self.__cal_avg_rgb(im)
elif self.__mode == "gray":
return self.__cal_gray(im)
elif self.__mode == "hash":
return self.__cal_hash(im)
else:
return ""
# 獲取key值
def find_key(self, im):
if self.__mode == "RGB":
return self.__find_by_rgb(im)
elif self.__mode == "gray":
return self.__find_by_gray(im)
elif self.__mode == "hash":
return self.__find_by_hash(im)
else:
return ""
# 計算灰度值
@staticmethod
def __cal_gray(im):
if im.mode != "L":
im = im.convert("L")
return reduce(lambda x, y: x + y, im.getdata()) // (im.size[0] * im.size[1])
# 計算平均rgb值
@staticmethod
def __cal_avg_rgb(im):
if im.mode != "RGB":
im = im.convert("RGB")
pix = im.load()
avg_r, avg_g, avg_b = 0, 0, 0
n = 1
for i in range(im.size[0]):
for j in range(im.size[1]):
r, g, b = pix[i, j]
avg_r += r
avg_g += g
avg_b += b
n += 1
avg_r /= n
avg_g /= n
avg_b /= n
return str(avg_r) + "-" + str(avg_g) + "-" + str(avg_b)
# 計算hash
def __cal_hash(self, im):
im = im.resize((8, 8), Image.ANTIALIAS)
im = im.convert("L")
avg_gray = self.__cal_gray(im)
k = ""
_0 = "0"
_1 = "1"
for i in im.getdata():
if i < avg_gray:
k += _0
else:
k += _1
return k
# 輾轉相除法求最大公約數
@staticmethod
def __gcd(a, b):
while a % b:
a, b = b, a % b
return b
# 獲取最佳素材(按灰度)
def __find_by_gray(self, gray):
m = 255
k = 0
for key in self.__all_img.keys():
cur_dif = abs(key - gray)
if cur_dif < m:
k = key
m = cur_dif
return self.__all_img[k]
# 獲取最佳素材(按pHash)
def __find_by_hash(self, sub_hash):
m = 65
k = 0
for key in self.__all_img.keys():
cur_dif = self.__dif_num(sub_hash, key)
if cur_dif < m:
k = key
m = cur_dif
return self.__all_img[k]
@staticmethod
def __dif_num(hash1, hash2):
n = 0
for i in range(64):
if hash1[i] != hash2[i]:
n += 1
return n
# # 獲取最佳素材(按平均rgb)
def __find_by_rgb(self, sub_rgb):
sub_r, sub_g, sub_b = sub_rgb.split("-")
m = 255
k = ""
for key in self.__all_img.keys():
src_r, src_g, src_b = key.split("-")
cur_dif = abs(float(sub_r) - float(src_r)) + abs(float(sub_g) - float(src_g)) + abs(
float(sub_b) - float(src_b))
if cur_dif < m:
m = cur_dif
k = key
return self.__all_img[k]
def get_all_img(self):
return self.__all_img
if __name__ == '__main__':
m = MosaicMaker("D:\\思維導圖\\LOL英雄圖片", r"C:\Users\hao\Desktop\程式設計\趣味案例\**.jpg",
"方形3.jpg")
m.make()
pass