1. 程式人生 > >Python核心程式設計第三版第一章答案

Python核心程式設計第三版第一章答案

包含了本人不斷嘗試更加嚴謹的過程: 1-1 re.match(’[bh][aiu]t’, ‘bat’).group() 1-2 re.match(’[A-Z][a-z]+ [A-Z][a-z]+’, ‘Xhen Fiagu’).group() 1-3 re.match(’[A-Z][a-z-]+, [A-Z]’, ‘Fia, X’).group() 1-4 re.match(’^[a-zA-Z_]\w+’, ‘sendMssage’).group() 1-5 >>> a = ‘\d+(\s\w+)+’ >>> b = ‘2254 ddfv Jdwk JNs JKNB’ >>> re.match(a, b) 1-6

>>> b = ‘http://www.yahoo.com/’ >>> a = ‘http[s]??/www.(\w+.)+com/’ >>> re.match(a, b) <re.Match object; span=(0, 21), match=‘http://www.yahoo.com/’> >>> re.match(a, b).group() 選做: >>> a = ‘http[s]??/www.(\w+.)+\w+/’ >>> b = ‘https://www.ewid.dewo.edu.cn/
’ >>> re.match(a, b).group() ‘https://www.ewid.dewo.edu.cn/’ >>> b = ‘http://127.0.0.1:88/zhandian/index.html’ >>> re.match(a, b).group() ‘http://127.0.0.1:88/zhandian/’ >>> a = ‘\w+://(\w+|\d+[./])(\w+|\d+)(:\d+/)?(\w+|\d+[./])(/)?’ >>> re.match(a, b).group() ‘http://127.0.0.1:88/zhandian/
1-7 re.match(’[±]?\d+’, ‘-110’) 1-8 1-9 書後面 1-10 >>> b = 2+4j >>> a = ‘(\d*[±])?(\d+)?j?’ >>> re.match(a, b) Traceback (most recent call last): File “<pyshell#12>”, line 1, in re.match(a, b) File “D:\python\lib\re.py”, line 173, in match return _compile(pattern, flags).match(string) TypeError: expected string or bytes-like object >>> b = ‘2+4j’ >>> re.match(a, b) <re.Match object; span=(0, 4), match=‘2+4j’> >>> b = ‘-2j’ >>> re.match(a, b) <re.Match object; span=(0, 3), match=’-2j’> >>> b = ‘5’ >>> re.match(a, b) <re.Match object; span=(0, 1), match=‘5’> 1-11 >>> b = ‘[email protected]’ >>> a = ‘[\w\b][email protected]([\w\b]+.)+[\w\b]+’ >>> re.match(a, b) <re.Match object; span=(0, 17), match=‘[email protected]’> 1-12同1-6 1-13 >>> a = ‘’’ >>> re.split(a, ‘<class ‘builtin_function_or_method’>’)[1] ‘builtin_function_or_method’ 1-14 >>> b = ‘11’ >>> a = ‘1[0-2]|0?[1-9]’ >>> re.match(a, b) <re.Match object; span=(0, 2), match=‘11’> >>> b = ‘10’ >>> re.match(a, b) <re.Match object; span=(0, 2), match=‘10’> >>> b = ‘1’ >>> re.match(a, b) <re.Match object; span=(0, 1), match=‘1’> >>> b = ‘05’ >>> re.match(a, b) <re.Match object; span=(0, 2), match=‘05’> 1-15 >>> b = ‘6259-0637-5561-1733’ >>> a = ‘\d\d\d\d-\d\d\d\d-\d\d\d\d-\d\d\d\d|\d\d\d\d-\d\d\d\d\d\d-\d\d\d\d\d’ >>> re.match(a, b).group() ‘6259-0637-5561-1733’ >>> b = ‘6259-063722-55612-’ >>> re.match(a, b).group() ‘6259-063722-55612’ 1-16~1-27 參見 https://blog.csdn.net/python_dream/article/details/78669906 1-28會了1-29自然就會了 1-29 >>> b = ‘(800)555-1212’ >>> a = ‘((\d\d\d-)?\d\d\d-\d\d\d\d)|(((\d{3}))?\d\d\d-\d\d\d\d)’ >>> re.match(a, b).group() ‘(800)555-1212’ >>> b = ‘800-555-1212’ >>> re.match(a, b).group() ‘800-555-1212’ >>> b = ‘555-1212’ >>> re.match(a, b).group() ‘555-1212’ 1-30

import re
import webbrowser as web

with open('1-30.html', 'w') as f:
    f.write(
        '\
        <!DOCTYPE html>\
<html lang="en">\
<head>\
	<meta charset="UTF-8">\
	<title>Document</title>\
</head>\
<body>\
	<a href="http://www.ouc.edu.cn">Ocean University of China</a>\
	<a href="http://www.jandan.com/ooxx">Sister\'s picture</a>\
	<a href="http://www.w3school.com.cn">w3shcool</a>\
\
</body>'
        )
            
web.open('1-30.html')

1-31 不做,理由:不玩推特 1-32 在之前寫電影排行榜爬蟲的基礎上做了些修改,質量不是很高但基本功能可以實現

import requests
import bs4
import re

def open_url(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134'}
    res = requests.get(url, headers=headers)
    return res

def find_books(res):
    soup = bs4.BeautifulSoup(res.text, 'html.parser')

    books = []
    targets = soup.find_all("div", class_="a-section a-spacing-small")

    for each in targets:
        
        code = str(each.img)

        if code != None:
            code = re.search('alt="(.+)" height', code).group()
            code = code.split('"')
            print(code[1])
            books.append(code[1])
        
        #code = re.split(code[1], '\" height')
        #books.append(re.search(code, 'img alt="(.+)"'))
        

    ranks = []
    targets = soup.find_all("div", class_="a-row a-size-small")

    for each in targets:
        ranks.append(' 作者:%s ' % each.text)
    
        
    messages = []
    targets = soup.find_all("div", class_="hd")
    for each in targets:
        try:
            messages.append(each.p.text.split('\n')[1].strip() + \
                            each.p.text.split('\n')[2].strip())
        except:
            continue

    result = []
    length = len(books)

    for i in range(length):

        result.append(str(i+1) + '.' + books[i] + ranks[2*i] + '\n')

    return result

def find_depth(res):
    soup = bs4.BeautifulSoup(res.text, 'html.parser')
    depth = soup.find('li', class_='a-last').previous_sibling.text

    #return int(depth)
    return 1

def main():
    host = "https://www.amazon.cn/gp/bestsellers/books/"
    res = open_url(host)
    depth = find_depth(res)

    result = []
    for i in range(depth):
        url = host + '/?start=' + str(25 * i)
        res = open_url(url)
        result.extend(find_books(res))

    with open("圖書排名.txt", 'w', encoding = "utf-8") as f:
        for each in result:
            f.write(each)

if __name__ == "__main__":
    main()