1. 程式人生 > >python 爬蟲抓豆瓣電影,並存入資料庫

python 爬蟲抓豆瓣電影,並存入資料庫

import urllib.request
import json    
import codecs  
class info(object):  
	#@classmethod
	def moviedown(url):
		#網址  
		url = "https://movie.douban.com/j/new_search_subjects?sort=T&range=0,10&tags=%E7%94%B5%E5%BD%B1&start=9960"  
  
		#請求  
		request = urllib.request.Request(url)  
  
		#爬取結果  
		response = urllib.request.urlopen(request)  
		data = response.read()   
		#設定解碼方式  
		data = data.decode('utf-8')  

		data1 = []  
		data1=json.loads(data)#這一步pyrhon  轉成字典
		#data1['data']------>這是list      
		#data1['data'][0]-------->這是dict
		print(data1['data'][0]['rate']);
		str = "\r\n"  
		for item in data1['data']:  
   			 #print json.dumps(item)  
   			str = str + "insert into tencent(title,rate) values "  
   			str = str + "('%s','%s');\r\n" % (item['title'],item['rate'])  
  
		file_object = codecs.open('tencent.sql', 'a' ,"utf-8")  
		file_object.write(str)  
		file_object.close()  
		print ("success") ; 
test = info()
test.moviedown()##例項呼叫方法

這個只是普通實現,做個例子,會繼續優化下去

上面只實現了20個電影的存入,下面做了一個近萬電影的存入,功能可以做綜藝,電視劇等等

import urllib.request  
import json      
import codecs    
class info(object):    
    #@classmethod  
    def moviedown(url):  
        #網址    
        url = "https://movie.douban.com/j/new_search_subjects?sort=T&range=0,10&tags=%E7%94%B5%E5%BD%B1&start="    
        #地址更改的情況下 還能存入小說 電視劇  綜藝等等     
             
               
        #設定解碼方式    
        
        count=0
                
        data1 = []
        final=[]             
        while (int(count) < 9961): #這裡獲取了9980個電影,裡面有電影名稱,卡司,評分,圖片,海報,icon等內容 我這邊存入資料的只有評分和名稱
            a='%d'%count
            print(url+a);            
            request = urllib.request.Request(url+a)    
        
            #爬取結果    
            response = urllib.request.urlopen(request)
            data = response.read()
            data = data.decode('utf-8')                  
            data1=json.loads(data)#這一步pyrhon  轉成字典
            final=final+data1['data']
            count=count+20            
            #data1['data']------>這是list        
            #data1['data'][0]-------->這是dict  
        print(final);  
        str = "\r\n"    
        for item in final:    
             #print json.dumps(item)    
            str = str + "insert into mx_movie(title,rate) values "    
            str = str + "('%s','%s');\r\n" % (item['title'],item['rate'])    
    
        file_object = codecs.open('mx_movie.sql', 'a' ,"utf-8")    
        file_object.write(str)    
        file_object.close()    
        print ("success") ;   
test = info()  
test.moviedown()##例項呼叫方法