1. 程式人生 > >python爬取新浪股票資料—繪圖【原創分享】

python爬取新浪股票資料—繪圖【原創分享】

目標:不做蠟燭圖,只用折線圖繪圖,繪出四條線之間的關係。

注:未使用介面,僅爬蟲學習,不做任何違法操作。

 

  1 """
  2     新浪財經,爬取歷史股票資料
  3 """
  4 
  5 # -*- coding:utf-8 -*-
  6 
  7 import numpy as np
  8 import urllib.request, lxml.html
  9 from urllib.request import urlopen
 10 from bs4 import BeautifulSoup
 11 import re, time
 12 import
matplotlib.pyplot as plt 13 from datetime import datetime 14 # 繪圖顯示中文設定 15 plt.rcParams['font.sans-serif'] = ['SimHei'] 16 plt.rcParams['axes.unicode_minus'] = False 17 18 19 # 公共模組,請求頭資訊 20 def public(link): 21 r = urllib.request.Request(link) 22 23 ug = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0
' 24 25 r.add_header('User-Agent', ug) 26 27 cookie = "SUB=_2AkMsqZjif8NxqwJRmfkRxG7nZYpzyg_EieKa9Wk5JRMyHRl-yD83qkJatRB6Bym2DDqPE870e3uMsySIjHjrMbMNxNqk; " \ 28 "SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9WFXmxLGpAG5k05lCJw6qgYe; " \ 29 "SINAGLOBAL=172.16.92.24_1542789082.401113;
" \ 30 "Apache=172.16.92.24_1542789082.401115; UOR=www.baidu.com,blog.sina.com.cn,; " \ 31 "ULV=1542789814434:1:1:1:172.16.92.24_1542789082.401115:; U_TRS1=000000d1.1f4d3546.5bf53673.955fa32e; " \ 32 "U_TRS2=000000d1.1f593546.5bf53673.736853cc; FINANCE2=661413ac85cadaab72ec7e3d842d6a3a; _s_upa=1" 33 34 r.add_header("Cookie", cookie) 35 36 html = urllib.request.urlopen(r, timeout=500).read() 37 38 bsObj = BeautifulSoup(html, "lxml") # 將html物件轉化為BeautifulSoup物件 39 40 return bsObj 41 42 43 # 獲取股票價格 44 def shares_price(code, year, quarter): 45 link = "http://money.finance.sina.com.cn/corp/go.php/vMS_MarketHistory/stockid/%s.phtml?year=%d&jidu=%d" % (code, year, quarter) 46 47 bsObj = public(link) 48 # print(bsObj) 49 50 a = 0 51 # date_list為日期列表,open_list為開盤價列表,high_list為最高價列表,close_list為收盤價列表,low_list為最低價列表 52 price_list, date_list, open_list, high_list, close_list, low_list = [], [], [], [], [], [] 53 # 獲取股票資訊 54 jpg_title = re.findall("(.*?\))", bsObj.title.text) 55 56 prices_bs = bsObj.find_all(name='div', attrs={"align": 'center'}) 57 # 獲取並處理價格資訊 58 for price_bs in prices_bs: 59 # 去除空格 60 price_bs_1 = price_bs.text.replace("\n\r\n\t\t\t", "") 61 price_bs_2 = price_bs_1.replace("\t\t\t\n", "") 62 63 # 6個字串為一個列表 64 if a != 6: 65 price_list.append(price_bs_2) 66 a = a + 1 67 else: 68 date_list.append(price_list[0]) 69 open_list.append(price_list[1]) 70 high_list.append(price_list[2]) 71 close_list.append(price_list[3]) 72 low_list.append(price_list[4]) 73 a = 0 74 price_list = [] 75 # 刪除列表頭 76 for b in (date_list, open_list, high_list, close_list, low_list): 77 b.pop(0) 78 79 # 全部倒序排列(由日期遠到近,從左到右排列) 80 for c in (date_list, open_list, high_list, close_list, low_list): 81 c.reverse() 82 83 return date_list, open_list, high_list, close_list, low_list, jpg_title 84 85 86 # 輸入股票程式碼,年份,季度 87 code = "002925" 88 year = "2018" 89 quarter = 4 90 # 以下為手動輸入模式,因除錯方便預設上面固定模式。 91 # code = input("code:") # 002925 92 # year = input("year:") # 2018 93 # quarter = int(input("quarter:")) 94 95 # 列表字串轉為數值date 96 x = [datetime.strptime(d, '%Y-%m-%d').date() for d in shares_price(code, int(year), quarter)[0]] 97 # 將爬取的資料(字串)轉化為浮點型 98 open_list = [float(i) for i in shares_price(code, int(year), quarter)[1]] 99 high_list = [float(i) for i in shares_price(code, int(year), quarter)[2]] 100 close_list = [float(i) for i in shares_price(code, int(year), quarter)[3]] 101 low_list = [float(i) for i in shares_price(code, int(year), quarter)[4]] 102 103 # 線條設定 104 plt.plot(x, open_list, label='open', linewidth=1, color='red', marker='o', markerfacecolor='blue', markersize=2) 105 plt.plot(x, high_list, label='high', linewidth=1, color='green', marker='o', markerfacecolor='blue', markersize=2) 106 plt.plot(x, close_list, label='close', linewidth=1, color='blue', marker='o', markerfacecolor='blue', markersize=2) 107 plt.plot(x, low_list, label='low', linewidth=1, color='black', marker='o', markerfacecolor='blue', markersize=2) 108 109 # 取數列最大數值與最小值做圖表的邊界值。 110 plt.ylim(min(low_list)-1, max(high_list)+1) 111 plt.gcf().autofmt_xdate() # 自動旋轉日期標記 112 113 # 打印表頭 114 plt.xlabel('time') 115 plt.ylabel('price') 116 # shares_price(code, int(year), quarter)[5][0]為title中的股票名稱與程式碼 117 plt.title('gp_1_{0}.jpg'.format(shares_price(code, int(year), quarter)[5][0])) 118 plt.legend() 119 plt.show()

 

 

效果如下:

 

是不是有另一種看法的感覺?如:黑線下跌後向上的第一個大拐點為買入點。