python3 爬蟲—爬取天氣預報多個城市七天資訊（三）

阿新 • • 發佈：2019-01-10

一、內容：

利用BeautifulSoup抓取中國天氣網各個城市7天的時間天氣狀態最高溫最低溫的相關資訊，並記錄儲存在本地csv表格檔案中。

爬取的頁面截圖：

html獲取資訊截圖：

二、原理：

1.利用requests獲取請求、BeautifulSoup抓取資料。

2.通過readlines()讀取city.txt檔案的天氣介面來生成各城市的url

3.通過第三方庫csv將抓取的到的資料寫入weather.csv表格

獲取URL：

#  獲取每個城市對應天氣的url
def get_url(city_name):
    url = 'http://www.weather.com.cn/weather/'
    with open('D:\py_project\weather\city.txt', 'r', encoding='UTF-8') as fs:
        lines = fs.readlines()
        for line in lines:
            if(city_name in line):
                code = line.split('=')[0].strip()
                return url + code + '.shtml'
    raise ValueError('invalid city name')

傳送URL GET請求，獲取response：

#  對網頁獲取get請求，得到的是response物件
def get_content(url, data=None):
    #  模擬瀏覽器訪問
    header = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, sdch',
        'Accept-Language': 'zh-CN,zh;q=0.8',
        'Connection': 'keep-alive',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.235'
    }
    #  超時，取隨機數是因為防止被網站認定為網路爬蟲
    timeout = random.choice(range(80, 180))
    while True:
        try:
            #  獲取請求資料
            rep = requests.get(url, headers=header, timeout=timeout)
            rep.encoding = 'utf-8'
            break
        except socket.timeout as e:
            print('3:', e)
            time.sleep(random.choice(range(8, 15)))
        except socket.error as e:
            print('4:', e)
            time.sleep(random.choice(range(20, 60)))
        except http.client.BadStatusLine as e:
            print('5:', e)
            time.sleep(random.choice(range(30, 80)))
        except http.client.BadStatusLine as e:
            print('6:', e)
            time.sleep(random.choice(range(5, 15)))

    return rep.text

抓取天氣資訊：

# 獲取html中我們所需要的欄位：
def get_data(html_text, city_name):
    #  final元組存放七天的資料
    final = []
    t = []
    t.append(city_name)
    final.append(t)
    bs = BeautifulSoup(html_text, "html.parser")  # 建立BeautifulSoup物件，解析器為：html.parser
    body1 = bs.body  # 獲取body部分

    #  print(body1)
    data = body1.find('div', {'id': '7d'})  # 找到id為7d的div
    ul = data.find('ul')  # 獲取ul部分
    li = ul.find_all('li')  # 獲取所有的li

    for day in li:   # 對每個li標籤中的內容進行遍歷
        # temp代存每日的資料
        temp = []
        #  新增日期
        data = day.find('h1').string   # 找到日期
        temp.append(data)  # 新增到temp中

        inf = day.find_all('p')  # 找到li中的所有p標籤
        #  新增天氣狀況
        temp.append(inf[0].string)  # 第一個p標籤中的內容（天氣狀況）加到temp中
        #  新增最高氣溫
        if inf[1].find('span') is None:
            temperature_highest = None  # 天氣當中可能沒有最高氣溫（傍晚）
        else:
            temperature_highest = inf[1].find('span').string  # 找到最高氣溫
            temperature_highest = temperature_highest.replace('℃', '')
        temp.append(temperature_highest)  # 將最高溫新增進去
        # 新增最低氣溫
        temperature_lowest = inf[1].find('i').string  # 找到最低溫
        temperature_lowest = temperature_lowest.replace('℃', '')  # 最低溫度後面有個℃，去掉這個符號
        temp.append(temperature_lowest)  # 將最低溫新增上去

        final.append(temp)  # 將temp 加到final中

    return final

儲存資訊到CSV中：

# 將抓取出來的資料寫入檔案
def write_data(city_name, data, file_name):
    with open(file_name, 'a', errors='ignore', newline='') as f:
        f_csv = csv.writer(f)
        f_csv.writerows(data)
        print('%s 天氣已新增成功' % city_name)

三、完整程式碼：

import requests
import csv
import random
import time
import socket
import http.client
from bs4 import BeautifulSoup


#  獲取每個城市對應天氣的url
def get_url(city_name):
    url = 'http://www.weather.com.cn/weather/'
    with open('D:\py_project\weather\city.txt', 'r', encoding='UTF-8') as fs:
        lines = fs.readlines()
        for line in lines:
            if(city_name in line):
                code = line.split('=')[0].strip()
                return url + code + '.shtml'
    raise ValueError('invalid city name')


#  對網頁獲取get請求，得到的是response物件
def get_content(url, data=None):
    #  模擬瀏覽器訪問
    header = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, sdch',
        'Accept-Language': 'zh-CN,zh;q=0.8',
        'Connection': 'keep-alive',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.235'
    }
    #  超時，取隨機數是因為防止被網站認定為網路爬蟲
    timeout = random.choice(range(80, 180))
    while True:
        try:
            #  獲取請求資料
            rep = requests.get(url, headers=header, timeout=timeout)
            rep.encoding = 'utf-8'
            break
        except socket.timeout as e:
            print('3:', e)
            time.sleep(random.choice(range(8, 15)))
        except socket.error as e:
            print('4:', e)
            time.sleep(random.choice(range(20, 60)))
        except http.client.BadStatusLine as e:
            print('5:', e)
            time.sleep(random.choice(range(30, 80)))
        except http.client.BadStatusLine as e:
            print('6:', e)
            time.sleep(random.choice(range(5, 15)))

    return rep.text


# 獲取html中我們所需要的欄位：
def get_data(html_text, city_name):
    #  final元組存放七天的資料
    final = []
    t = []
    t.append(city_name)
    final.append(t)
    bs = BeautifulSoup(html_text, "html.parser")  # 建立BeautifulSoup物件，解析器為：html.parser
    body1 = bs.body  # 獲取body部分

    #  print(body1)
    data = body1.find('div', {'id': '7d'})  # 找到id為7d的div
    ul = data.find('ul')  # 獲取ul部分
    li = ul.find_all('li')  # 獲取所有的li

    for day in li:   # 對每個li標籤中的內容進行遍歷
        # temp代存每日的資料
        temp = []
        #  新增日期
        data = day.find('h1').string   # 找到日期
        temp.append(data)  # 新增到temp中

        inf = day.find_all('p')  # 找到li中的所有p標籤
        #  新增天氣狀況
        temp.append(inf[0].string)  # 第一個p標籤中的內容（天氣狀況）加到temp中
        #  新增最高氣溫
        if inf[1].find('span') is None:
            temperature_highest = None  # 天氣當中可能沒有最高氣溫（傍晚）
        else:
            temperature_highest = inf[1].find('span').string  # 找到最高氣溫
            temperature_highest = temperature_highest.replace('℃', '')
        temp.append(temperature_highest)  # 將最高溫新增進去
        # 新增最低氣溫
        temperature_lowest = inf[1].find('i').string  # 找到最低溫
        temperature_lowest = temperature_lowest.replace('℃', '')  # 最低溫度後面有個℃，去掉這個符號
        temp.append(temperature_lowest)  # 將最低溫新增上去

        final.append(temp)  # 將temp 加到final中

    return final


# 將抓取出來的資料寫入檔案
def write_data(city_name, data, file_name):
    with open(file_name, 'a', errors='ignore', newline='') as f:
        f_csv = csv.writer(f)
        f_csv.writerows(data)
        print('%s 天氣已新增成功' % city_name)


if __name__ == '__main__':

    cities = input('請輸入城市名稱（一個或多個，以空格隔開）： ').split(' ')
    for city in cities:
        url = get_url(city)  # 獲取城市天氣的url
        html = get_content(url)  # 獲取網頁html
        result = get_data(html, city)  # 爬去城市的資訊
        write_data(city, result, 'D:\py_project\weather\weather.csv')  # 將爬取得資訊填入表格檔案

四、執行結果：

python3 爬蟲—爬取天氣預報多個城市七天資訊（三）

一、內容：利用BeautifulSoup抓取中國天氣網各個城市7天的時間天氣狀態最高溫最低溫的相關資訊，並記錄儲存在本地csv表格檔案中。爬取的頁面截圖： html獲取資訊截圖：二、原理： 1.利用requests獲取請求

Java實現簡單爬蟲爬取天氣預報

爬蟲爬取網頁的主要流程是： 1.向目標網頁發起請求； 2.對於獲取到的html檔案進行解析； 3.對解析後的資料進行儲存。本次主要是爬取全國城市未來7天的天氣預報，爬取物件為中國天氣網，爬取的資料存入文字中。對於html檔案的解析採用Jsoup結合正則表示式。地區程

如何利用Python網絡爬蟲爬取微信朋友圈動態--附代碼（下）

CA external 令行 sta 項目程序 str 輸入 tar 前天給大家分享了如何利用Python網絡爬蟲爬取微信朋友圈數據的上篇（理論篇），今天給大家分享一下代碼實現（實戰篇），接著上篇往下繼續深入。一、代碼實現 1、修改Scrapy項目中的ite

python：爬蟲爬取資料的處理之Json字串的處理（2）

#Json字串的處理 Json字串轉化為Python資料型別 import json JsonStr ='{"name":"sunck","age":"18","hobby":["money","power","English"],"parames":{"a":1,"b":2}}' Js

python簡單爬蟲：爬取並統計自己部落格頁面的資訊（一）

1. 什麼是爬蟲也叫網路爬蟲，簡單來說，爬蟲就是從一個根網站出發，根據某種規則獲得更多的相關網站的url，自動下載這些網頁並自動解析這些網頁的內容，從中獲取需要的資料。例如爬取某種圖片、某類文字資訊等。爬蟲還可以用於編纂搜尋引擎的網路索引。爬蟲所涉及的知

python3實現爬取淘寶頁面的商品的資料資訊（selenium+pyquery+mongodb）

1.環境須知做這個爬取的時候需要安裝好python3.6和selenium、pyquery等等一些比較常用的爬取和解析庫，還需要安裝MongoDB這個分散式資料庫。 2.直接上程式碼 spider.py import re from config

Python3爬蟲爬取淘寶商品數據

表格 name 錯誤處理 from [0 https iat turn 感覺這次的主要的目的是從淘寶的搜索頁面獲取商品的信息。其實分析頁面找到信息很容易，頁面信息的存放都是以靜態的方式直接嵌套的頁面上的，很容易找到。主要困難是將信息從HTML源碼中剝離出來，數據和網頁源碼

Python爬取天氣預報

exc res http tee parser ror .cn date req 將持續更新…… 1.實現爬取一天的天氣預報 from urllib.request import urlopen from bs4 import BeautifulSoup import re

python3爬蟲爬取煎蛋網妹紙圖片

port 商業技術分享爬取其中 lar c函數 base 技術其實之前實現過這個功能，是使用selenium模擬瀏覽器頁面點擊來完成的，但是效率實際上相對來說較低。本次以解密參數來完成爬取的過程。首先打開煎蛋網http://jandan.net/ooxx，查看網頁

python3爬蟲 -----爬取鬥圖息-------www.doutula.com

run __init__ args gin uid == utf-8 date src 普通爬取： 1 # -*- coding:utf-8 -*- 2 # author:zxy 3 # Date:2018-10-21 4 import requests 5 f

python3爬蟲 -----爬取百思不得姐信息-------http://www.budejie.com/

chrom tree www cti mozilla from tar 2-0 sum 1 # -*- coding:utf-8 -*- 2 # author:zxy 3 # Date:2018-10-21 4 5 import request 6 from

python3爬蟲--爬取豆瓣Top250的圖書

from lxml import etree import requests import csv fp = open('doubanBook.csv', 'wt', newline='', encoding='utf-8') writer = csv.writer(fp) writer.

python3爬蟲爬取網頁圖片簡單示例

本人也是剛剛開始學習python的爬蟲技術，然後本來想在網上找點教程來看看，誰知道一搜索，大部分的都是用python2來寫的，新手嘛，一般都喜歡裝新版本。於是我也就寫一個python3簡單的爬蟲，爬蟲一下貼吧的圖片吧。話不多說，我們開始。首先簡單來說說一下知識。一

爬蟲基本介紹 && python3 爬蟲爬取網易新聞排行榜

爬蟲基本介紹 1. 什麼是爬蟲？爬蟲是請求⽹網站並提取資料的⾃自動化程式 2. 爬蟲的基本流程發起請求通過HTTP庫向目標站點發起請求，即傳送一個Request，請求可以包含額外的headers等資訊，等待伺服器器響應。解析內容

Python爬取天氣預報資料，並存入到本地EXCEL中

近期忙裡偷閒，搞了幾天python爬蟲，基本可以實現常規網路資料的爬取，比如糗事百科、豆瓣影評、NBA資料、股票資料、天氣預報等的爬取，整體過程其實比較簡單，有一些HTML+CSS+DOM樹等知識就很easy，我就以天氣預報資料的爬取為例，整理出來。需求：採

python3爬蟲 -----爬取大學資訊並通過matplotlib與numpy繪製結果-----from最好大學網

爬取大學資訊並通過matplotlib與numpy繪製多指標柱形圖就某一因素繪製餅圖並突出其中一個物件（本例中為江西理工大學） 1 # -*- coding:utf-8 -*- 2 # author:zxy 3 # date:2018-12-24 4 # upda

python3爬蟲爬取金庸小說所有角色

# -*- coding:utf-8 -*- import requests from bs4 import BeautifulSoup url = 'http://www.jinyongwang.c

python3爬蟲爬取圖片，爬取新聞網站文章並儲存到資料庫

2017年9月16日零基礎入門Python，第二天就給自己找了一個任務，做網站文章的爬蟲小專案，因為實戰是學程式碼的最快方式。所以從今天起開始寫Python實戰入門系列教程，也建議大家學Python時一定要多寫多練。目標 1，學習Python爬蟲 2

python3 爬蟲爬取深圳公租房輪候庫（深圳房網）

深圳公租房輪候庫已經朝著幾十萬人的規模前進了，這是截至16年10月之前的資料了，貼上來大家體會下所以17年已更新妥妥的10W+ 今天就拿這個作為爬蟲的練手專案 1、環境準備：作業系統：win10 python版本：python3.5.3 開發工具：sublime 3 python需要安裝的庫：　

python3爬蟲-爬取新浪新聞首頁所有新聞標題

準備工作：安裝requests和BeautifulSoup4。開啟cmd，輸入如下命令 pip install requests pip install BeautifulSoup4 按F12開啟開發人員工具，點選左上角的圖片，然後再頁面中點選你想檢

python3 爬蟲—爬取天氣預報多個城市七天資訊（三）

相關推薦