python BeautifulSoup的簡單用法

阿新 • • 發佈：2019-01-22

from bs4 import BeautifulSoup
import re
html = """
<html><head><title>The Dormouse's story</title></head>
<body>
The Dormouse's story
Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1"></a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.
...
"""

soup = BeautifulSoup(html,"html.parser")#""中指的是解析器，是bs4中自帶的

#repr()函式，返回的值可以給直譯器呼叫，具體用法後續再研究，跟str()好像差不多.str()對使用者更友好一些？不能再次給直譯器呼叫
#print soup.a
# print type(soup)
#print soup.a.string
#子節點.contents .children 屬性
#tag 的.contents屬性可以將tag的子節點以列表的形式輸出
#print soup.head.contents#輸出列表[<title>The Dormouse's story</title>]
#print soup.head.contents[0]#拿出列表的第一個元素
#children = soup.a.children #輸出一個list生成器物件<listiterator object at 0x00000000020DDD30>
# print child
#.descendants屬性，子孫節點
#for child in soup.descendants:#跟children類似，需要用for in 打印出
# print child
#print type(soup.a.children)
#如果一個tag裡有多個標籤,name .string就會返回none,如果一個標籤裡只有一個標籤，那麼string返回最裡面內容
#for string in soup.strings:#.strings的用法，需要遍歷。
# print repr(string)
#for string in soup.stripped_strings:#stripped_strings的方法去除空格可能會用的比較多
# print string#不是用print repr(string)，只使用print string也可以輸出
#5::父節點.parent屬性
#p = soup.p
#print p.parent.name#結果是body
#print soup.p.parent.name#這種寫法也可以輸出........幹嘛教程用上面的方法教？？？
#print soup.head.title.string.parent.name#這種寫法太長了。。。。還是教程的方法好?
#6:parents的用法，可以遞迴得到元素左右父節點
#content = soup.head.title.string#注意要用元素string
#for parent in content.parents:#要用遍歷的方法
# print parent.name
#7:兄弟節點next_sibling,previous_sibling屬性
#可以理解為獲取該節點的同一級節點.
#print soup.p.next_sibling#此處為空白，next_sibling獲取該節點的下一個兄弟節點，下一個是空白會返回空白
#print soup.p.prev_sibling#此處為none,previous_sibling獲取該節點的前一個節點，如果之前沒節點返回none
#8:全部兄弟節點next_siblings,previous_siblings。
#對當前節點的兄弟節點迭代輸出
#print type(soup.p.next_siblings)#屬性是generator????發生器。待理解
#for sibling in soup.p.next_siblings:#也是用遍歷的方式。
# print repr(sibling)#
#前後節點next_element previous_element.並不針對兄弟節點，而是前後節點都可以輸出，不分層級
#print soup.head.next_element#之前的節點title並不是一個層級依然輸出.
#print soup.head.previous_element#注意，這裡是用previous_element,不像之前用previous_sibling.而且輸出了所有內容？？？？？
#所有前後節點，next_elements,previous_elements屬性
#通過next_elements和previous_elements的迭代器就可以向前或向後輸出解析內容,注意輸出的是內容
#for element in soup.head.next_elements:#s輸出之後的內容
# print repr(element)
#for element in soup.head.previous_elements:
# print repr(element)
#for element in last_a_tag.next_elements:#last_a_tag一直報警，好像是少引入什麼東西了
# print element

#find_all(name,attrs,recursive,text,**kwargs)方法
#搜尋當前tag的所有tag子節點，並判斷是否符合過濾器條件
#name引數，可以查詢所有名字為name的tag，字串物件會被忽略
#傳字串
#print soup.find_all('b')#[The Dormouse's story],得到所有的標籤和內容
#傳正則表示式
#for tag in soup.find_all(re.compile("^b")):#傳入正則表示式
# print tag.name
#傳列表
#print soup.find_all(["a","b"])#任意一個與列表引數匹配的都會返回
#傳true
#for tag in soup.find_all(True):#傳不傳True的結果一樣
# print tag.name
#傳方法
#def has_class_but_no_id(tag):
# return tag.has_attr('class') and not tag.has_attr('id')#has_attr()是判斷物件是否含這個屬性，返回值true和false
#print soup.find_all(has_class_but_no_id)
#keyword引數搜尋tag中特定屬性使用，
#print soup.find_all(id='link2')#結果：[<a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>]
#print soup.find_all(href=re.compile("elsie"))#傳入正則
#print soup.find_all(href=re.compile("elsie"),id='link1')#傳入兩個屬性活著多個屬性
#print soup.find_all('a',class_="sister")#class是python關鍵詞，在class後面加下劃線可以用
#data不能用，但是可以定義字典來使用，，例如print soup.find_all(attrs={"data-foo":"value"})
#text引數
#通過text引數可以搜尋文件中字串內容,與name引數一樣可以傳入字串，正則，引數，True
#print soup.find_all(text="Elsie")#Elsie在註釋中，是打印不出來的。。。。。例子有錯
#print soup.find_all(text=["Tillie","Elise","Lacie"])
#print soup.find_all(text=re.compile("Dormouse"))#傳入正則
#limit引數
#限制傳回的數量
#print soup.find_all("a",limit=2)#只傳回兩個
#recursive引數
#當呼叫find_all()方法時，會檢索所有子孫tag,呼叫recursive引數可以只搜尋子節點
#print soup.html.find_all('title',recursive=False)

#find( name , attrs , recursive , text , **kwargs )

#它與 find_all() 方法唯一的區別是 find_all() 方法的返回結果是值包含一個元素的列表,而 find() 方法直接返回結果

#find_parents() find_parent()

#find_all() 和 find() 只搜尋當前節點的所有子節點,孫子節點等. find_parents() 和 find_parent() 用來搜尋當前節點的父輩節點,搜尋方法與普通tag的搜尋方法相同,搜尋文件搜尋文件包含的內容

#find_next_siblings() find_next_sibling()

#這2個方法通過 .next_siblings 屬性對當 tag 的所有後面解析的兄弟 tag 節點進行迭代, find_next_siblings() 方法返回所有符合條件的後面的兄弟節點,find_next_sibling() 只返回符合條件的後面的第一個tag節點

#find_previous_siblings() find_previous_sibling()

#這2個方法通過 .previous_siblings 屬性對當前 tag 的前面解析的兄弟 tag 節點進行迭代, find_previous_siblings() 方法返回所有符合條件的前面的兄弟節點, find_previous_sibling() 方法返回第一個符合條件的前面的兄弟節點

#find_all_next() find_next()

#這2個方法通過 .next_elements 屬性對當前 tag 的之後的 tag 和字串進行迭代, find_all_next() 方法返回所有符合條件的節點, find_next() 方法返回第一個符合條件的節點

#find_all_previous() 和 find_previous()

#這2個方法通過 .previous_elements 屬性對當前節點前面的 tag 和字串進行迭代, find_all_previous() 方法返回所有符合條件的節點, find_previous()方法返回第一個符合條件的節點

#css選擇器
#我們在寫 CSS 時，標籤名不加任何修飾，類名前加點，id名前加 #，在這裡我們也可以利用類似的方法來篩選元素，用到的方法是 soup.select()，返回型別是 list

#print soup.select('title')#通過標籤名查詢
#print soup.select('.sister')#通過類名查詢，記得加句號
#print soup.select('#link1')#通過id名查詢，記得加#

#組合查詢，用空格隔開
#print soup.select('p #link1')#p中的id為link1的內容
#print soup.select("head > title")#直接子標籤查詢

#屬性查詢屬性需要用中括號括起來，注意屬性和標籤屬於同一節點，所以中間不能加空格，否則會無法匹配到。

#print soup.select('a[class="sister"]')
#select 方法返回的結果都是列表形式，可以遍歷形式輸出，然後用 get_text() 方法來獲取它的內容
#print soup.select('title')[0].get_text()#第一種輸出方法

#for title in soup.select('title'):#第二種輸出方法
# print title.text

python BeautifulSoup的簡單用法

Python BeautifulSoup 簡單筆記

python BeautifulSoup的簡單用法

python爬蟲--BeautifulSoup的簡單用法

Python:第三方模組BeautifulSoup的安裝及簡單用法

python argpare 模塊的簡單用法

python lambda表達式簡單用法

Python圖形介面之Tkinter簡單用法

python datetime.datetime.strptime 模組的簡單用法

python中logging模組的一些簡單用法

爬蟲入門，爬蟲簡單的入門庫Beautifulsoup庫,解析網頁，簡單用法-案例篇（5）

Python使用BeautifulSoup簡單實現爬取妹子mm圖片--初級篇

python中turtle(海龜)模組的簡單用法

beautifulsoup的簡單用法

python-requests簡單介紹及用法

python BeautifulSoup的簡單使用

Python學習：時間處理工具--dateutil兩個簡單用法

（數據科學學習手劄54）Python中retry的簡單用法

【python】pandas的簡單用法

Python BeautifulSoup庫的用法

python 下載儲存圖片的urllib.urlretrieve()函式簡單用法

python BeautifulSoup的簡單用法

相關推薦