爬取動態網站資料(soup的css方式處理資料)
阿新 • • 發佈:2018-12-26
import requests
from bs4 import BeautifulSoup
url = 'https://knewone.com/discover?page='
def get_info(url,data=None):
wd_data = requests.get(url)
soup = BeautifulSoup(wd_data.text,'lxml')
titles = soup.select('section.content > h4 > a')
imgs = soup.select('a.cover-inner > img' )
links = soup.select('section.content > h4 > a')
for title,img,link in zip(titles,imgs,links):
data = {
'title':title.get('title'),
'img':img.get('src'),
'link':link.get('href')
}
print(data)
def get_more(start,end):
for one in range(start,end):
get_info(url+str(one))
get_more(1,5)