1. 程式人生 > >爬取動態網站資料(soup的css方式處理資料)

爬取動態網站資料(soup的css方式處理資料)

import requests
from bs4 import BeautifulSoup

url = 'https://knewone.com/discover?page='

def get_info(url,data=None):
    wd_data = requests.get(url)
    soup = BeautifulSoup(wd_data.text,'lxml')
    titles = soup.select('section.content > h4 > a')
    imgs = soup.select('a.cover-inner > img'
) links = soup.select('section.content > h4 > a') for title,img,link in zip(titles,imgs,links): data = { 'title':title.get('title'), 'img':img.get('src'), 'link':link.get('href') } print(data) def get_more(start,end): for
one in range(start,end): get_info(url+str(one)) get_more(1,5)