用python爬取新浪微博資料 (無需手動獲取cookie)
阿新 • • 發佈:2019-02-18
從java 轉為python
from selenium import webdriver
import seleniumfrom selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from bs4 import BeautifulSoup
import requests
import logging
import re
import time
from urllib.parse import quote
import random
def parse_list(url=None):
#=========================登入模組==============================
driver = webdriver.PhantomJS()
driver.get("http://login.weibo.cn/login/")
time.sleep(3)
driver.maximize_window() # 瀏覽器全屏顯示
#通過使用者名稱密碼登陸
driver.find_element_by_name("mobile").send_keys("微博帳號")
driver.find_element_by_xpath("//input[@type='password']").send_keys("密碼")
#勾選儲存密碼
driver.find_element_by_name("remember").click()
time.sleep(1)
#點選登陸按鈕
driver.find_element_by_name("submit").click()
#=========================爬蟲模組==============================
driver.get(url)
html=driver.page_source
if not html:
return None
soup = BeautifulSoup(html)
ls = soup.select('.c')
# print (html)
# print (ls)
link_list = []
for item in ls:
item_dict = {}
print (item.text)
for a in item.select('a'):
print (a['href']+"-------"+a.text)
# item_dict['title'] = item.a.text
# item_dict['link'] = item.a['href']
# item_dict['time']=item.select('.s-p')[0].text
return None
if __name__ == '__main__':
parse_list("http://weibo.cn/u/1789834424?page=1");