Python核心程式設計第三版第一章答案
阿新 • • 發佈:2018-12-16
包含了本人不斷嘗試更加嚴謹的過程: 1-1 re.match(’[bh][aiu]t’, ‘bat’).group() 1-2 re.match(’[A-Z][a-z]+ [A-Z][a-z]+’, ‘Xhen Fiagu’).group() 1-3 re.match(’[A-Z][a-z-]+, [A-Z]’, ‘Fia, X’).group() 1-4 re.match(’^[a-zA-Z_]\w+’, ‘sendMssage’).group() 1-5 >>> a = ‘\d+(\s\w+)+’ >>> b = ‘2254 ddfv Jdwk JNs JKNB’ >>> re.match(a, b) 1-6
import re
import webbrowser as web
with open('1-30.html', 'w') as f:
f.write(
'\
<!DOCTYPE html>\
<html lang="en">\
<head>\
<meta charset="UTF-8">\
<title>Document</title>\
</head>\
<body>\
<a href="http://www.ouc.edu.cn">Ocean University of China</a>\
<a href="http://www.jandan.com/ooxx">Sister\'s picture</a>\
<a href="http://www.w3school.com.cn">w3shcool</a>\
\
</body>'
)
web.open('1-30.html')
1-31 不做,理由:不玩推特 1-32 在之前寫電影排行榜爬蟲的基礎上做了些修改,質量不是很高但基本功能可以實現
import requests
import bs4
import re
def open_url(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134'}
res = requests.get(url, headers=headers)
return res
def find_books(res):
soup = bs4.BeautifulSoup(res.text, 'html.parser')
books = []
targets = soup.find_all("div", class_="a-section a-spacing-small")
for each in targets:
code = str(each.img)
if code != None:
code = re.search('alt="(.+)" height', code).group()
code = code.split('"')
print(code[1])
books.append(code[1])
#code = re.split(code[1], '\" height')
#books.append(re.search(code, 'img alt="(.+)"'))
ranks = []
targets = soup.find_all("div", class_="a-row a-size-small")
for each in targets:
ranks.append(' 作者:%s ' % each.text)
messages = []
targets = soup.find_all("div", class_="hd")
for each in targets:
try:
messages.append(each.p.text.split('\n')[1].strip() + \
each.p.text.split('\n')[2].strip())
except:
continue
result = []
length = len(books)
for i in range(length):
result.append(str(i+1) + '.' + books[i] + ranks[2*i] + '\n')
return result
def find_depth(res):
soup = bs4.BeautifulSoup(res.text, 'html.parser')
depth = soup.find('li', class_='a-last').previous_sibling.text
#return int(depth)
return 1
def main():
host = "https://www.amazon.cn/gp/bestsellers/books/"
res = open_url(host)
depth = find_depth(res)
result = []
for i in range(depth):
url = host + '/?start=' + str(25 * i)
res = open_url(url)
result.extend(find_books(res))
with open("圖書排名.txt", 'w', encoding = "utf-8") as f:
for each in result:
f.write(each)
if __name__ == "__main__":
main()