1. 程式人生 > >爬蟲練習--爬取股票資料

爬蟲練習--爬取股票資料

爬取股票資料

步驟

  • 從東方財富網找到上市公司的股票程式碼並儲存
  • 根據股票程式碼去百度股市通去查詢相關股票的具體資訊並儲存

程式碼

#-*- coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup
import traceback
import re
import xlwt
def getHTMLText(url, code="utf-8"):
    try:
        r = requests.get(url)
        r.raise_for_status()
        r.encoding = code
        return
r.text except: traceback.print_exc() return "" #獲取股票列表 def getStockList(lst, stockURL): html = getHTMLText(stockURL,"GB2312") soup = BeautifulSoup(html, 'html.parser') a = soup.find_all('a') for i in a: try: href = i.attrs['href'] lst.append(re.findall(r"[s][hz]\d{6}"
, href)[0]) except: continue #根據股票的代號查詢股票的交易資訊,並將結果儲存到相關檔案 def getStockInfo(lst, stockURL): #建立EXCEL檔案 book=xlwt.Workbook(encoding='utf-8') sheet1=book.add_sheet('sheet1',cell_overwrite_ok=True) heads=['股票名稱','最高','最低','今開','昨收','成交額','成交量','淨值','折價率'] num=0 for
head in heads: sheet1.write(0,num,head) num=num+1 book.save('gupiao.xls') count = 1 length=len(lst) #每次將一個查詢的資料輸出到EXCEl表中 for stock in lst: url = stockURL + stock + ".html" html = getHTMLText(url) try: if html=="": continue infoDict = {} soup = BeautifulSoup(html, 'html.parser') stockInfo = soup.find('div',attrs={'class':'stock-bets'}) #查詢股票名稱 if stockInfo: name = stockInfo.find_all(attrs={'class':'bets-name'})[0] infoDict.update({'股票名稱': name.text.split()[0]}) #尋找所有鍵和值(最高、最低) keyList = stockInfo.find_all('dt') valueList = stockInfo.find_all('dd') for i in range(len(keyList)): key = keyList[i].text val = valueList[i].text infoDict[key] = val j=0 for i in heads: if i in infoDict: sheet1.write(count,j,infoDict[i]) j=j+1 else: pass book.save('gupiao.xls') print("\r當前進度: {:.2f}%".format(count*100/length),end="") count=count+1 except: print("\r當前進度: {:.2f}%".format(count * 100 / length), end="") count = count + 1 continue if __name__=='__main__': #找尋將資料靜態寫在html頁面的網頁 stock_list_url = 'http://quote.eastmoney.com/stocklist.html' stock_info_url = 'https://gupiao.baidu.com/stock/' slist=[] getStockList(slist, stock_list_url) getStockInfo(slist, stock_info_url)

執行結果

  • 程式執行後將結果儲存在EXCEL中,部分結果截圖如下所示
    這裡寫圖片描述