1. 程式人生 > >Python selenium 爬取天天基金網股票型基金

Python selenium 爬取天天基金網股票型基金

對於股票市場長期的判斷,普通上班族沒有多少時間和資料可以分析。那麼,就應該藉助基金機構選擇的股票來分析,藉助各基金經理管理的股票基金來統計,哪些股票是基金經理或團隊分析購買的。所以選擇的是股票型基金,最終將分析得出哪類股票是機構最多選擇的,那隻股票是機構購買最多的。利用基金經理們分析的結果,我們可選擇相應的幾隻股票進行長期投資。

先看看股票型基金,然後遍歷某基金的股票持倉。



所以選擇股票型別的基金後,除了讀取頁面資訊,還應該讀取連結網址。比較好的是,在分頁的最右邊有一個“不分頁”的選項,點選後所有資料都在一頁中顯示,這就方便很多了!


以下是讀取頁面資訊的指令碼:

# -*- coding: utf-8 -*-
# python 3.5

import re
import time
import pymssql
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class FUND(object):
	def __init__(self):
		self.url = 'http://fund.eastmoney.com/data/fundranking.html'
		self.driver = webdriver.PhantomJS()
		#self.driver = webdriver.Chrome("D:/Python35/selenium/webdriver/chromedriver/chromedriver.exe")
		self._conn = self.GetConnect()  
		if(self._conn):  
			self._cur = self._conn.cursor() 
	
	#資料庫連線
	def GetConnect(self):
		conn = False
		try:
			conn = pymssql.connect(host="HZC",user="kk",password="kk",database ="StockDB")
		except Exception as err:
			print("連線資料庫失敗, %s" % err)
		else:
			return conn	
	
	#執行語句
	def ExecNonQuery(self, sql):
		flag = False
		try:
			self._cur.execute(sql)
			self._conn.commit()
			flag = True
		except Exception as err:
			flag = False
			self._conn.rollback()
			print("執行失敗, %s" % err)
		else:
			return flag
			
			
	def GetURL(self):
		print("[-] 開啟網址: %s" % self.url)
		self.driver.get(self.url)
	
	def SetURL(self,url):
		print("[-] 設定網址: %s" % url)
		self.url = url
		
	#股票型(641) & 不分頁
	def GetSelectStockType(self):
		typetext = None
		displaytext = None
		typetext = self.driver.find_element_by_xpath("//ul[@id='types']/li[@class='at']").text.strip()
		elems = self.driver.find_elements_by_xpath("//div[@id='pagebar']")
		for e in elems:
			displaytext = e.get_attribute('style')
		return typetext,displaytext
	
	def DoSelectStockType(self):
		print("[-] 選擇股票型別")
		try:
			#self.driver.find_element_by_xpath("//ul[@id='types']/li[2]").click()
			element = WebDriverWait(self.driver,10).until(EC.presence_of_element_located((By.XPATH,"//ul[@id='types']/li[2]")))
			element.click()
			time.sleep(3)
		except:
			pass
	
	def DoSelectShowAll(self):
		print("[-] 顯示所有")
		try:
			#self.driver.find_element_by_xpath("//input[@id='showall']").click()
			element = WebDriverWait(self.driver,10).until(EC.presence_of_element_located((By.ID,"showall")))
			element.click()
			time.sleep(3)
		except:
			pass
	
	def GetBaseInfo(self):
		print("[-] 基本資訊")
		dict = {} 
		typetext,displaytext = self.GetSelectStockType()
		while not re.match("股票型", typetext) or re.match("block", displaytext):
			print("   waiting……")
			time.sleep(1)
			typetext,displaytext = self.GetSelectStockType()
		#print(typetext,displaytext)
		table = self.driver.find_element_by_xpath("//table[@id='dbtable']/tbody")
		for row in table.find_elements_by_xpath(".//tr"):
			col = row.find_elements(By.TAG_NAME, "td")
			id = col[1].text
			code = col[2].text
			name = col[3].text
			link = col[3].find_element(By.TAG_NAME, "a").get_attribute("href")
			date = col[4].text
			dwjz = col[5].text
			ljjz = col[6].text
			rzzl = col[7].text
			jyz = col[8].text
			jsy = col[9].text
			jly = col[10].text
			jyn = col[11].text
			jln = col[12].text
			jsn = col[13].text
			jnl = col[14].text
			cll = col[15].text
			zdy = col[16].text
			sxf = col[17].text
			sql = """INSERT INTO [TTStocks]([id],[code],[name],[link],[date],[dwjz],[ljjz],[rzzl],[jyz],[jsy],[jly],[jyn],[jln],[jsn],[jnl],[cll],[zdy],[sxf])
			VALUES (%s, '%s', '%s', '%s', '%s', %s, %s, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')""" % \
			(id,code,name,link,date,dwjz,ljjz,rzzl,jyz,jsy,jly,jyn,jln,jsn,jnl,cll,zdy,sxf)
			self.ExecNonQuery(sql)
			dict[code] = link
			print(id,name)
		self.GetDetialStocks(dict)
		
	def GetDetialStocks(self,dict):
		for k,v in dict.items():
			#print('%s = %s' % (k,v))
			self.SetURL(v)
			self.GetURL()
			table = self.driver.find_element_by_xpath("//li[@id='position_shares']/div[@class='poptableWrap']/table/tbody")
			text = table.find_element_by_xpath(".//tr[2]").text.strip()
			if text != "暫無資料":
				for row in table.find_elements_by_xpath(".//tr"):
					col = row.find_elements(By.TAG_NAME, "td")
					if len(col)!=0:
						print(k,col[0].text)
						sql = "INSERT INTO TTStocksDetial(code,name,cczb) VALUES ('%s','%s','%s')" %(k,col[0].text,col[1].text)
						self.ExecNonQuery(sql)
						
if __name__ == "__main__":
	f = FUND()  
	f.GetURL()
	f.DoSelectStockType()
	f.DoSelectShowAll()
	f.GetBaseInfo()

接下來這些資料將存入資料庫,讀取每條記錄的同時,也將模擬開啟連結網址,讀取 “股票持倉”情況。