1. 程式人生 > >批量下載matplotlib網站例項原始碼python指令碼

批量下載matplotlib網站例項原始碼python指令碼

模組功能描述:
該模組是為了批量下載matplotlib網站例項原始碼而設計。
getUrlList():函式是為了獲取每個例項的url列表

GetDemoDownload():下載每個例項原始碼包括py與ipynb檔案

#coding=utf8 
'''
作者:ewang
日期:2017/8/14
模組功能描述:
    該模組是為了批量下載matplotlib網站例項原始碼而設計。
getUrlList():函式是為了獲取每個例項的url列表
GetDemoDownload():下載每個例項原始碼包括py與ipynb檔案

'''
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait 
import os
import urllib2,re

PATH=lambda p:os.path.abspath(os.path.join(
    os.path.dirname(__file__), p))
class downMatplotlibDemo():
    def __init__(self):
        self.urlList=self.getUrlList()
        self.driver=webdriver.Chrome() 
        self.driver.maximize_window() 
        self.GetDemoDownload()
        self.driver.close()   
        
    def getUrlList(self):
        try:
            url="http://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.subplots.html#matplotlib.pyplot.subplots"    
            matutl="http://matplotlib.org/devdocs/gallery/"
            pageContent=urllib2.urlopen(url).read()
            if pageContent:
                linkList=re.findall('class="reference internal" href="../../gallery/(.*?)"><span class="std std-ref">(.*?)</span></a>', pageContent, re.S) 
                charList=[matutl+var[0] for var in linkList if len(linkList)>0]
                return charList
        except Exception,e:
            print "Create UrlList Error:",e
               
    def GetDemoDownload(self):
        count=0     
        for url in self.urlList:     
            self.driver.get(url)
            js="var q=document.body.scrollTop=200000"
            self.driver.execute_script(js)
            try:
                downLoadBtnList=WebDriverWait(self.driver,5).until(lambda driver:driver.find_elements_by_partial_link_text('Download'))
            except Exception,e:
                print "Download not exist:",e
            
            try:
                if len(downLoadBtnList)>0:
                    for downLoad in downLoadBtnList:  
                        downurl=downLoad.get_attribute("href")
                        if downurl:
                            fileName=downurl.split("/")[-1]
                            if fileName:
                                filePath=PATH('./sourceCode/')
                                if os.path.exists(filePath):
                                    pass
                                else:
                                    os.mkdir(filePath)
                                fileWithPath=PATH(filePath+'\\'+fileName)
                                if not os.path.exists(fileWithPath):
                                    with open(fileWithPath,"wb+") as FH:                   
                                        pageConet=urllib2.urlopen(downurl).read() 
                                        FH.write(pageConet)
                                else:
                                    print "the file with path is exists...."
                            else:
                                print "The file name is null!"
                        else:
                            print "the download url is null!"
            except Exception,e:
                print "Download List:",e              
            count+=1      
            print count,"\t url=",downurl
                                               
  
if __name__=="__main__":
    downMatplotlibDemo()