1. 程式人生 > >爬取淘寶評論時出現list index out of range問題

爬取淘寶評論時出現list index out of range問題

list index out of range  列表越界

Traceback (most recent call last):
  File "G:/workSpace/Python/TB_Crawler/Crawler_train.py", line 71, in <module>
    print(getCommentsList(ItemURL2))
  File "G:/workSpace/Python/TB_Crawler/Crawler_train.py", line 46, in getCommentsList
    comment = getComment(newURL, i)['content']
  File "G:/workSpace/Python/TB_Crawler/Crawler_train.py", line 27, in getComment
    comment = jd['comments'][num]

IndexError: list index out of range

提示錯誤在getComment方法的comment = jd['comments'][num]中

因為淘寶評論每一頁最多20條評論,所以爬取評論時對每頁的20條評論進行遍歷。當某頁評論小於20條時,num仍然是從0到20遍歷,所以就會出現列表越界的問題。

解決辦法:當遍歷到最後一條評論時break,跳出迴圈。

# 獲取商品id
def getItemID(url):
    m = re.search('id=(.+)', url)
    itemID = m.group(1)[0:12]
    return itemID

# 獲取評論數
def getCommentCount(url): # countURL = 'https://rate.taobao.com/detailCount.do?_ksTS=1516697185953_173&callback=jsonp174&itemId={}' countURL = 'https://rate.taobao.com/feedRateList.htm?auctionNumId={}&userNumId=352740130&currentPageNum=1' res = requests.get(countURL.format(getItemID(url))) # jd = json.loads(res.text.strip('jsonp174()'))
jd = json.loads(res.text.strip().strip('()')) return jd['total'] # 獲取每條評論及時間 def getComment(url, num): result = {} res = requests.get(url) jd = json.loads(res.text.strip().strip('()')) comment = jd['comments'][num] # print(comment) # result['date'] = comment['date'] result['content'] = comment['content'] return result # 將評論填入列表 def getCommentsList(url): commentList = [] page = 1 maxCount = getCommentCount(url) count = 0 # num = 0 while count < maxCount: commentURL = 'https://rate.taobao.com/feedRateList.htm?auctionNumId={}&userNumId=352740130&currentPageNum=1' newURL = commentURL.format(getItemID(url))[:-1] + str(page) page = page + 1 for i in range(0, 20): comment = getComment(newURL, i)['content'] if comment != '15天內買家未作出評價' and comment != '評價方未及時做出評價,系統預設好評!' and comment != '此使用者沒有填寫評價。': commentList.append(getComment(newURL, i)) # num = num+1 # print(comment, num) # 獲取追加評論 # if comment['append'] is not None: # commentList.append(comment['append']['content']) count = count + 1 if count >= maxCount: break return commentList