1. 程式人生 > >Python爬取雙色球資料進行分析

Python爬取雙色球資料進行分析

突發奇想,是否可以從雙色球的歷史資訊中分析出現次數最多的組合,那就從1+1的組合開始分析,嘿嘿,經常中這種組合也挺不錯哦!現在我們開始獲取資料,應該從中彩網的爬取資料比較靠譜,下面是指令碼。

# -*- coding:utf-8 -*-
import requests, bs4
import os, time
import operator
from itertools import combinations, permutations

class DoubleColorBall(object):
    def __init__(self):
        self.balls = {}
        self.baseUrl = 'http://tubiao.zhcw.com/tubiao/ssqNew/ssqJsp/ssqZongHeFengBuTuAsc.jsp'
self.dataFile = 'd:\\balls_data.txt' def getHtml(self, url): headers = { 'Referer':'http://tubiao.zhcw.com/tubiao/ssqNew/ssqInc/ssqZongHeFengBuTuAsckj_year=2016.html', 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
} self.session = requests.Session() response = self.session.get(url, headers=headers) return response.text def getBall(self): for year in range(2003, 2018): url = self.baseUrl + '?kj_year=%s' % (year, ) print(url) html = self.getHtml(url) self.bs = bs4.BeautifulSoup(html, 'html.parser'
) if self.bs: data = self.bs.find_all(class_='hgt') self.parseBall(data) def parseBall(self, data): self.balls = {} for row in data: if not isinstance(row, bs4.element.Tag): continue center = row.find(class_="qh7").string.strip() print(center) if center.startswith("模擬"): break redBalls = row.find_all(class_="redqiu") blueBall = row.find(class_="blueqiu3").string.strip() self.balls[center] = [r.string for r in redBalls] + [blueBall] self.saveBall(self.balls) def saveBall(self, data): with open(self.dataFile, 'a+') as f: for r in sorted(data,reverse=True): f.write(str(r) + ' ' + ' '.join(data[r]) + '\n') class Analysis(object): def __init__(self): self.redrst = {} self.bluerst = {} self.redbluerst = {} def run(self): with open('d:\\balls_data.txt', 'r') as f: for r in f.readlines(): rList = r.split(' ') redBalls = rList[1:-1] blueBall = rList[-1] for i in combinations(redBalls, 1): if self.redbluerst.get('-'.join(i)+'-'+str(blueBall)): self.redbluerst['-'.join(i)+'-'+str(blueBall)] += 1 else: self.redbluerst['-'.join(i)+'-'+str(blueBall)] = 1 sorted_redblue=sorted(self.redbluerst.items(),key=operator.itemgetter(1)) print(sorted_redblue[-3:]) if __name__ == '__main__': ball = DoubleColorBall() ball.getBall() anal =Analysis() anal.run()

輸出結果:
[(‘20-9\n’, 38), (‘8-16\n’, 39), (‘1-12\n’, 40)]

總共爬取了2184期的資料,結果1+1組合中最多出現的也只有40次,概率為0.018。O(∩_∩)O哈哈~