python網路爬蟲--抓取股票資訊到Mysql

阿新 • • 發佈：2019-01-05

1.建表

mysql -u root -p 123456
create database test default character set utf8;

create table stocks --a股
(
code varchar(10) comment '程式碼',
name varchar(30) comment '名稱',
score varchar(5) comment '分數',
advise varchar(10) comment '建議',
survey varchar(500) comment '概述',
trend varchar(500) comment '趨勢',
tec_score varchar(5) comment '技術面分數',
tec_content varchar(500) comment '技術面概述',
funds_score decimal(3,1) comment '資金面分數',
funds_content varchar(500) comment '資金面概述',
msg_score varchar(5) comment '訊息面分數',
msg_content varchar(500) comment '訊息面概述',
trade_score decimal(3,1) comment '行業面分數',
trade_content varchar(500) comment '行業面概述',
basic_score varchar(5) comment '基本面分數',
basic_content varchar(500) comment '基本面概述',
opt_trend varchar(500) comment '選股動向',
tec_form varchar(500) comment '技術形態',
buy_signal varchar(500) comment '買入訊號'
) ENGINE=InnoDB DEFAULT CHARSET="utf8";

create table syl --市盈率
(
id varchar(5) comment '序號',
code varchar(10) comment '程式碼',
name varchar(30) comment '名稱',
sy_j varchar(10) comment '市盈率',
sy_d varchar(10) comment '市盈率(動)',
price varchar(10) comment '價格',
zdf varchar(10) comment '漲跌幅(%)',
cha_rate varchar(10) comment '換手率(%)'

) ENGINE=InnoDB DEFAULT CHARSET="utf8";

2.抓取牛叉診股資訊

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# author: xiaobao  time: 2018/4/5
import urllib.request
import re
from bs4 import BeautifulSoup
import pymysql
import traceback
import time

start = time.clock()
db = pymysql.connect(host='localhost', user='root' 
, passwd='123456', db='test', charset='utf8')
cursor = db.cursor()
cursor.execute("truncate table stocks")
cursor.execute("set names utf8")

l = urllib.request.urlopen('http://quote.eastmoney.com/stocklist.html#sz')
bs = BeautifulSoup(l,"html.parser")
stock = bs.find_all('a',attrs={'target':'_blank'},href 
=re.compile("quote"))
stocks = []
for s in stock:
    t = s.get_text()
    if len(re.findall(r'[^()]+', t)) == 2 :
         id = re.findall(r'[^()]+', t)[1]
         if id.startswith('0') or id.startswith('6') :
             stocks.append(id)
         else :
             pass
    else:
        pass
for stock in stocks:
    try:
        url = 'http://doctor.300033.info/' + stock
        page = urllib.request.urlopen(url)
        soup = BeautifulSoup(page,"html.parser")
        stockname = soup.find('div',attrs={'class':'stockname'}).get_text()
        code = re.findall(r'[^（）]+', stockname)[1]
        name = re.findall(r'[^（）]+', stockname)[0]
        stockvalue = soup.find('div',attrs={'class':'stockvalue'}).get_text()
        cur = soup.find('span',attrs={'class':'cur'}).get_text()
        title = soup.find('strong',attrs={'class':'title'}).get_text()
        temp = title.split('，')[1:]
        total = ','.join(temp)[:-10]
        short = soup.find('li',attrs={'class':'short'}).get_text()
        mid = soup.find('li',attrs={'class':'mid'}).get_text()
        content = soup.find_all('p',attrs={'class':'content'})
        dim = []
        for i in content:
            j = i.get_text()
            dim.append(j)
        label = soup.find_all('div',attrs={'class':'label'})
        grade = []
        for n in label:
            m = n.get_text().replace('分','')
            grade.append(m)
        techcont = soup.find('div',attrs={'class':'techcont','display':''}).get_text()
        tec = techcont.replace('選股動向：','').replace('技術形態：','').replace('買入訊號：','').strip().split('\n\n\n')

        score = stockvalue
        advise = cur
        survey = total
        trend = short[5:] + mid[5:]
        tec_score = grade[0]
        tec_content = dim[0]
        funds_score = grade[1]
        funds_content = dim[1]
        msg_score = grade[2]
        msg_content = dim[2]
        trade_score = grade[3]
        trade_content = dim[3]
        basic_score = grade[4]
        basic_content = dim[4]
        opt_trend = tec[0].replace('\n',',')
        tec_form = ''
buy_signal = ''
if len(tec) > 1:
            tec_form = tec[1].replace('\n', ',')[1:]
        if len(tec) > 2:
            buy_signal = tec[2].replace('\n', ',')[1:]

        sql = "insert into stocks values('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s')" % (code, name, score, advise, survey, trend, tec_score, tec_content, funds_score, funds_content, msg_score, msg_content, trade_score, trade_content, basic_score, basic_content, opt_trend, tec_form, buy_signal)
        try:
            cursor.execute(sql)
            db.commit()
        except:
            db.rollback()

    except:
         pass
db.close()
end = time.clock()
print(end - start)

3.抓取個股市盈率

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# author: xiaobao  time: 2018/4/6
import os
import sys
import time
import codecs
import pymysql
import traceback
from bs4 import BeautifulSoup
from urllib import request,parse
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

pages = ['http://data.10jqka.com.cn/market/ggsyl/']

for url in pages:
    chrome_options = Options()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')
    chrome_options.add_experimental_option("excludeSwitches",["ignore-certificate-errors"])
    chrome_options.binary_location = r'C:\\Program Files (x86)\\Google\\Chrome\\Application'
browser = webdriver.Chrome(chrome_options=chrome_options)
    browser.get(url)
    html = browser.page_source
    soup = BeautifulSoup(html,'html.parser')
    for link in soup.find_all('a',text='尾頁'):
        end = int(link.get('page'))

    tmp = 'D:\\temp.txt'
f = open(tmp,'w')
    f.truncate()
    f.close()

    count = 1
with open(tmp,"a+") as  f:
        sys.stdout = f
        while count <= end:
            try:
                browser.find_element_by_link_text('下一頁').click()
                html = browser.page_source
                soup = BeautifulSoup(html, 'html.parser')
            except:
                pass
time.sleep(3)
            tables = soup.find_all('table')
            tab = tables[1]
            for tr in tab.find_all('tr'):
                for td in tr.find_all('td'):
                    print(td.get_text(), end=',')
                print()
            count = count + 1
browser.quit()
    f.close()

    if url == 'http://data.10jqka.com.cn/market/ggsyl/':
        result = 'D:\\syl.txt'
f1 = open(tmp,'r')
    f2 = codecs.open(result,'w','utf-8')
    f2.truncate()
    for line in f1:
        data=line.strip()
        if len(data)!=0:
             f2.write(data[:-1])
             f2.write('\r\n')
    f1.close()
    f2.close()
    os.remove(tmp)

db = pymysql.connect(host='localhost', user='root', passwd='123456', db='test', charset='utf8')
cursor = db.cursor()
cursor.execute("truncate table syl")

try:
    cursor.execute('load data infile "D:/syl.txt" into table syl fields terminated by "," lines terminated by "\n" ')
    db.commit()
except:
    # traceback.print_exc()
db.rollback()

db.close()

4.結果分析

select
a.code '程式碼',
a.name '名稱',
score '分數',
advise '建議',
survey '概述',
trend '趨勢',
tec_score '技術面分數',
tec_content '技術面概述',
funds_score '資金面分數',
funds_content '資金面概述',
msg_score '訊息面分數',
msg_content '訊息面概述',
trade_score '行業面分數',
trade_content '行業面概述',
basic_score '基本面分數',
basic_content '基本面概述',
opt_trend '選股動向',
tec_form '技術形態',
buy_signal '買入訊號',
b.price '價格',
sy_j '市盈率',
sy_d '市盈率(動)',
zdf '漲跌幅(%)',
cha_rate '換手率(%)'
from stocks a
left join syl b
on a.code = b.code;

python網路爬蟲--抓取股票資訊到Mysql

python網路爬蟲--抓取股票資訊到Mysql

Python網路爬蟲抓取動態網頁並將資料存入資料庫MYSQL

python 網路爬蟲抓取圖片

python網路爬蟲爬取房價資訊

Python網路爬蟲之製作股票資料定向爬蟲以及爬取的優化可以顯示進度條！

python網路爬蟲爬取汽車之家的最新資訊和照片

Python爬蟲---爬取股票資訊

java演算法-網路爬蟲抓取網頁並儲存

用python 通過12306api抓取列車資訊

學會用python網路爬蟲爬取鬥圖網的表情包，聊微信再也不怕鬥圖了

java之 25天網路爬蟲抓取圖片(二)

Python selenium爬蟲抓取船舶網站資料（動態頁面）

搜尋引擎—網路爬蟲抓取策略

Python網路爬蟲之抓取訂餐資訊

Python爬蟲抓取東方財富網股票數據並實現MySQL數據庫存儲

Python網路爬蟲之股票資料Scrapy爬蟲例項介紹，實現與優化！（未成功生成要爬取的內容！）

Python網路爬蟲實戰：抓取和分析天貓胸罩銷售資料

[Python]網路爬蟲（一）：抓取網頁的含義和URL基本構成

[Python]網路爬蟲（二）：利用urllib2通過指定的URL抓取網頁內容

Python爬蟲抓取大資料崗位招聘資訊（51job為例）

python網路爬蟲--抓取股票資訊到Mysql

相關推薦