Python3.x:定時獲取頁面數據存入數據庫
阿新 • • 發佈:2018-01-01
oda 關閉 font mysq 發生 one web jinfo tex
Python3.x:定時獲取頁面數據存入數據庫
import pymysql import urllib.request from bs4 import BeautifulSoup # 數據入庫處理 def doDataWlpc(jjdm, jjmc, dwjz, dwjzrq): r_code = 0 print(‘基金信息:‘ + jjdm + ‘,‘ + jjmc + ‘,‘ + dwjz + ‘,‘ + dwjzrq) try: # 打開數據庫連接 conn = pymysql.connect(host=‘localhost‘, user=‘root‘, passwd=‘lizm‘, db=‘pythondb‘, port=3306, charset=‘utf8‘) # 獲取一個遊標 cursor = conn.cursor() # 查詢數據是否已經存在 sql_check = """SELECT * FROM pythondb.t_x01_wlpc WHERE dwjz=‘""" + dwjz + """‘ and dwjzrq=‘""" + dwjzrq + """‘;""" print(‘sql_check>>>:‘ + sql_check) cursor.execute(sql_check) results = cursor.fetchall() # 判斷是否有記錄數 if len(results) == 0: check_code = 0 else: check_code = 1 if check_code == 0: sql = """INSERT INTO pythondb.t_x01_wlpc (jjdm,jjmc,dwjz,dwjzrq,oprdate) VALUES(‘""" + jjdm + """‘,‘""" + jjmc + """‘,‘""" + dwjz + """‘,‘""" + dwjzrq + """‘,sysdate());""" try: print(‘sql>>>:‘ + sql) # 執行sql語句 cursor.execute(sql) # 提交到數據庫執行 conn.commit() r_code = 0 except: # 如果發生錯誤則回滾 conn.rollback() r_code = 1 else: r_code = 0 print(‘基金‘ + jjmc + ‘數據已存在‘) cursor.close() # 關閉遊標 conn.close() # 釋放數據庫資源 except: r_code = 1 print("失敗,異常") return r_code # 獲取基金的信息 def getJjInfor(header_, url_): # 返回數組 r_info = [] req = urllib.request.Request(url=url_, headers=header_) res = urllib.request.urlopen(req) html = res.read().decode(‘utf-8‘) soup = BeautifulSoup(html, ‘html.parser‘) # css的class獲取值 jjdm = soup.find( ‘div‘, class_=‘fundDetail-tit‘).find(‘span‘, class_=‘ui-num‘) r_info.append(jjdm.get_text()) #print(‘基金代碼:‘ + jjdm.get_text()) title_name = soup.find(‘div‘, class_=‘fundDetail-tit‘) r_info.append(title_name.text.split(‘(‘)[0]) #print(‘基金名稱:‘ + title_name.text.split(‘(‘)[0]) # 獲取估算凈值、單位凈值、累計凈值 for dataNums in soup.find_all(‘dd‘, class_=‘dataNums‘): for jzs_ in dataNums.find_all(‘span‘, class_=‘ui-font-large ui-color-red ui-num‘): r_info.append(jzs_.text) #print(‘‘ + jzs_.text) gz_gztime = soup.find(id=‘gz_gztime‘) r_info.append(gz_gztime.text.replace(‘(‘, ‘‘).replace(‘)‘, ‘‘)) #print(‘估算凈值日期:‘ + gz_gztime.text.replace(‘(‘, ‘‘).replace(‘)‘, ‘‘)) # 輸出class為‘dataItem02‘標簽裏面的第一個p元素 dwjzrq_s = soup.find(‘dl‘, class_=‘dataItem02‘).p r_info.append(dwjzrq_s.text.split(‘(‘)[1].split(‘)‘)[0]) #print(‘單位凈值日期:‘ + dwjzrq_s.text.split(‘(‘)[1].split(‘)‘)[0]) return r_info # 測試 if __name__ == ‘__main__‘: url = r‘http://fund.eastmoney.com/340007.html?spm=search‘ headers = { ‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36‘} jj_infor = [] jj_infor = getJjInfor(headers, url) return_code = doDataWlpc(jj_infor[0], jj_infor[ 1], jj_infor[3], jj_infor[6]) if return_code ==0: print(‘執行成功‘) else: print(‘執行失敗‘)
定時功能後續增加;
Python3.x:定時獲取頁面數據存入數據庫