1. 程式人生 > >Python3.x:定時獲取頁面數據存入數據庫

Python3.x:定時獲取頁面數據存入數據庫

oda 關閉 font mysq 發生 one web jinfo tex

Python3.x:定時獲取頁面數據存入數據庫

import pymysql
import urllib.request
from bs4 import BeautifulSoup

# 數據入庫處理
def doDataWlpc(jjdm, jjmc, dwjz, dwjzrq):
    r_code = 0
    print(基金信息: + jjdm + , + jjmc + , + dwjz + , + dwjzrq)
    try:
        # 打開數據庫連接
        conn = pymysql.connect(host=localhost, user=
root, passwd=lizm, db=pythondb, port=3306, charset=utf8) # 獲取一個遊標 cursor = conn.cursor() # 查詢數據是否已經存在 sql_check = """SELECT * FROM pythondb.t_x01_wlpc WHERE dwjz=‘""" + dwjz + """‘ and dwjzrq=‘""" + dwjzrq + """‘;""" print
(sql_check>>>: + sql_check) cursor.execute(sql_check) results = cursor.fetchall() # 判斷是否有記錄數 if len(results) == 0: check_code = 0 else: check_code = 1 if check_code == 0: sql = """INSERT INTO pythondb.t_x01_wlpc (jjdm,jjmc,dwjz,dwjzrq,oprdate) VALUES(‘
""" + jjdm + """‘,‘""" + jjmc + """‘,‘""" + dwjz + """‘,‘""" + dwjzrq + """‘,sysdate());""" try: print(sql>>>: + sql) # 執行sql語句 cursor.execute(sql) # 提交到數據庫執行 conn.commit() r_code = 0 except: # 如果發生錯誤則回滾 conn.rollback() r_code = 1 else: r_code = 0 print(基金 + jjmc + 數據已存在) cursor.close() # 關閉遊標 conn.close() # 釋放數據庫資源 except: r_code = 1 print("失敗,異常") return r_code # 獲取基金的信息 def getJjInfor(header_, url_): # 返回數組 r_info = [] req = urllib.request.Request(url=url_, headers=header_) res = urllib.request.urlopen(req) html = res.read().decode(utf-8) soup = BeautifulSoup(html, html.parser) # css的class獲取值 jjdm = soup.find( div, class_=fundDetail-tit).find(span, class_=ui-num) r_info.append(jjdm.get_text()) #print(‘基金代碼:‘ + jjdm.get_text()) title_name = soup.find(div, class_=fundDetail-tit) r_info.append(title_name.text.split(()[0]) #print(‘基金名稱:‘ + title_name.text.split(‘(‘)[0]) # 獲取估算凈值、單位凈值、累計凈值 for dataNums in soup.find_all(dd, class_=dataNums): for jzs_ in dataNums.find_all(span, class_=ui-font-large ui-color-red ui-num): r_info.append(jzs_.text) #print(‘‘ + jzs_.text) gz_gztime = soup.find(id=gz_gztime) r_info.append(gz_gztime.text.replace((, ‘‘).replace(), ‘‘)) #print(‘估算凈值日期:‘ + gz_gztime.text.replace(‘(‘, ‘‘).replace(‘)‘, ‘‘)) # 輸出class為‘dataItem02‘標簽裏面的第一個p元素 dwjzrq_s = soup.find(dl, class_=dataItem02).p r_info.append(dwjzrq_s.text.split(()[1].split())[0]) #print(‘單位凈值日期:‘ + dwjzrq_s.text.split(‘(‘)[1].split(‘)‘)[0]) return r_info # 測試 if __name__ == __main__: url = rhttp://fund.eastmoney.com/340007.html?spm=search headers = { User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36} jj_infor = [] jj_infor = getJjInfor(headers, url) return_code = doDataWlpc(jj_infor[0], jj_infor[ 1], jj_infor[3], jj_infor[6]) if return_code ==0: print(執行成功) else: print(執行失敗)

定時功能後續增加;

Python3.x:定時獲取頁面數據存入數據庫