1. 程式人生 > >關於如何只用python獲取網頁天氣(數據)的方法

關於如何只用python獲取網頁天氣(數據)的方法

pytho 解析 dsta ads parser 導入 3.0 根據 lang

獲取網頁數據無非就三步!

第一步:首先通過python獲取到前端html完整代碼!(需要用到request模塊)

第二步:通過獲取到的html代碼進行過濾,獲取到有用天氣數據 (需要用到bs4模塊下的BeautifulSoup)

第三步:獲取到的天氣數據,進行本地化保存

PS:其它用到的模塊:time、random、socket、csv、http.client

廢話不多說,直接上代碼!

首先,導入引用模塊

from bs4 import BeautifulSoup
import requests, time, random, socket, csv
import http.client

第一步:首先通過python獲取到前端html完整代碼!(需要用到request模塊)

技術分享圖片
# 獲取請求網址的完整HTML代碼
def htmlcontent(url, data=None):
    header = {
        Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8,
        Accept-Encoding: gzip, deflate, sdch,
        Accept-Language: zh-CN,zh;q=0.8,
        
Connection: keep-alive, User-Agent: Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.235 } # request 的請求頭 timeout = random.choice(range(80, 180)) while True: try: rep = requests.get(url, headers=header, timeout=timeout) #
請求url地址,獲得返回response信息 rep.encoding = utf-8 break except socket.timeout as e: print(3:, e) time.sleep(random.choice(range(8, 15))) except socket.error as e: print(4:, e) time.sleep(random.choice(range(20, 60))) except http.client.BadStatusLine as e: print(5:, e) time.sleep(random.choice(range(30, 80))) except http.client.IncompleteRead as e: print(6:, e) time.sleep(random.choice(range(5, 15))) return rep.text # 返回的Html全部代碼
第一步

第二步:通過獲取到的html代碼進行過濾,獲取到有用天氣數據 (需要用到bs4模塊下的BeautifulSoup)

技術分享圖片
# 過濾篩選有用數據
def weatherdata(html_text):
    data_al = []
    bs = BeautifulSoup(html_text, "html.parser")   # 創建BeautifulSoup對象並以html.parser方式解析
    li = bs.body.find(div, {id: 7d}).find(ul).find_all(li)   # 根據前端HTML代碼的標簽獲取具體天氣數據

    for data in li:
        temp = []
        date = data.find(h1).string
        inf = data.find_all(p)
        weather = inf[0].string   # 天氣
        temperature_highest = inf[1].find(span).string    # 最高溫度
        temperature_low = inf[1].find(i).string   # 最低溫度
        temp.append(date)   # 添加日期
        temp.append(weather)    # 添加天氣
        temp.append(temperature_low)    # 添加最低溫度
        temp.append(temperature_highest)    # 添加最高溫度
        data_al.append(temp)  # 數據全部儲存在一個列表中
    return data_al
第二步

第三步:獲取到的天氣數據,進行本地化保存

技術分享圖片
# 把數據寫入本地文件
def writedata(data, name):
    with open(name, a, errors=ignore, newline=‘‘) as f:
        f_csv = csv.writer(f)
        f_csv.writerows(data)
第三步

最後,進行調用

if __name__ == __main__:
    url = http://www.weather.com.cn/weather/101010100.shtml   # 獲取天氣數據的網址
    html = htmlcontent(url)    # 獲取網頁信息
    result = weatherdata(html)    # 解析網頁信息,拿到需要的數據
    writedata(result, C:/Users/LoveCounter/Desktop/天氣test.csv)  # 數據寫入到 csv文檔中

完整性代碼,如下:

技術分享圖片
from bs4 import BeautifulSoup
import requests, time, random, socket, csv
import http.client


# 獲取請求網址的完整HTML代碼
def htmlcontent(url, data=None):
    header = {
        Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8,
        Accept-Encoding: gzip, deflate, sdch,
        Accept-Language: zh-CN,zh;q=0.8,
        Connection: keep-alive,
        User-Agent: Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.235
    }   # request 的請求頭
    timeout = random.choice(range(80, 180))
    while True:
        try:
            rep = requests.get(url, headers=header, timeout=timeout)   # 請求url地址,獲得返回response信息
            rep.encoding = utf-8
            break
        except socket.timeout as e:
            print(3:, e)
            time.sleep(random.choice(range(8, 15)))

        except socket.error as e:
            print(4:, e)
            time.sleep(random.choice(range(20, 60)))

        except http.client.BadStatusLine as e:
            print(5:, e)
            time.sleep(random.choice(range(30, 80)))

        except http.client.IncompleteRead as e:
            print(6:, e)
            time.sleep(random.choice(range(5, 15)))
    return rep.text   # 返回的Html全部代碼

# 過濾篩選有用數據
def weatherdata(html_text):
    data_al = []
    bs = BeautifulSoup(html_text, "html.parser")   # 創建BeautifulSoup對象並以html.parser方式解析
    li = bs.body.find(div, {id: 7d}).find(ul).find_all(li)   # 根據前端HTML代碼的標簽獲取具體天氣數據

    for data in li:
        temp = []
        date = data.find(h1).string
        inf = data.find_all(p)
        weather = inf[0].string   # 天氣
        temperature_highest = inf[1].find(span).string    # 最高溫度
        temperature_low = inf[1].find(i).string   # 最低溫度
        temp.append(date)   # 添加日期
        temp.append(weather)    # 添加天氣
        temp.append(temperature_low)    # 添加最低溫度
        temp.append(temperature_highest)    # 添加最高溫度
        data_al.append(temp)  # 數據全部儲存在一個列表中
    return data_al


# 把數據寫入本地文件
def writedata(data, name):
    with open(name, a, errors=ignore, newline=‘‘) as f:
        f_csv = csv.writer(f)
        f_csv.writerows(data)


if __name__ == __main__:
    url = http://www.weather.com.cn/weather/101010100.shtml   # 獲取天氣數據的網址
    html = htmlcontent(url)    # 獲取網頁信息
    result = weatherdata(html)    # 解析網頁信息,拿到需要的數據
    writedata(result, C:/Users/LoveCounter/Desktop/天氣test.csv)  # 數據寫入到 csv文檔中
獲取天氣完整性代碼

關於如何只用python獲取網頁天氣(數據)的方法