1. 程式人生 > >Python爬蟲小案例

Python爬蟲小案例

'''
    模組註釋
'''
from urllib import request
import re

class Spider():
    '''
    類的註釋,註釋寫在類下面
    '''
    url = 'https://www.panda.tv/cate/lol'
    rootPattern = '<div class="video-info">([\s\S]*?)</div>'
    namePattern = '</i>([\s\S]*?)</span>'
    numberPattern = '<span class="video-number">([\s\S]*?)</span>'
def __fetchContent(self): ''' 獲取html內容,寫在方法下面 ''' r = request.urlopen(Spider.url) # bytes htmls = r.read() htmls = str(htmls, encoding='utf-8') return htmls def __analysis(self, htmls): ''' 分析出列表資料 '''
rootHtml = re.findall(Spider.rootPattern, htmls) anchors = [] for html in rootHtml: name = re.findall(Spider.namePattern, html) number = re.findall(Spider.numberPattern, html) anchor = {'name': name, "number": number} anchors.append(anchor) return
anchors def __refine(self, anchors): ''' 精簡列表,格式化字串 ''' l = lambda anchor : {'name': anchor['name'][0].strip(), 'number': anchor['number'][0]} return map(l, anchors) def __sort(self, anchors): ''' 排序 ''' r = sorted(anchors, key=self.__sortSeed, reverse=True) return r def __sortSeed(self, anchor): ''' 設定元組比較的欄位 ''' num = re.findall('(\d*)', anchor['number']) number = float(num[0]) if '萬' in anchor['number']: number *= 10000 return number def __show(self, anchors): ''' 展示資料 ''' for index in range(0, len(anchors)): print(str(index + 1) + ':' + anchors[index]['name'] + '---------' + anchors[index]['number']) def do(self): '''總控''' htmls = self.__fetchContent() anchors = self.__analysis(htmls) anchors = list(self.__refine(anchors)) anchors = self.__sort(anchors) self.__show(anchors) spider = Spider() spider.do()