1. 程式人生 > >使用BeautifulSoup爬取github內容示例

使用BeautifulSoup爬取github內容示例

#!/usr/bin/env python
import urllib.request
import re
from bs4 import BeautifulSoup

def getdata(url="http://github.com/racaljk/hosts/blob/master/hosts"):
    try:
        data = urllib.request.urlopen(url).read()
        z_data = data.decode('UTF-8')
        soup = BeautifulSoup(z_data, 'lxml')
        a = soup.select(
            'table > tr > td')
        hostsfile = open('/etc/hosts', 'w', newline='')
        for i in a:
            hostsfile.write(i.get_text() + "\n")
        hostsfile.close()
        print('hosts重新整理成功')
    except Exception as err:
        print(str(err))

if __name__=="__main__":

getdata()