1. 程式人生 > >python3基礎教程 專案3:萬能的XML

python3基礎教程 專案3:萬能的XML

模組介紹:

在python中使用sax方式處理xml要先引入xml.sax中的parse函式,還有xml.sax.handler中的ContentHandler

parse函式:用於解析xml檔案

幾個注意點:

getattr()函式:用於返回一個物件屬性值。

callable() 函式:用於檢查一個物件是否是可呼叫的。

os.join()函式:使用正確的分隔符(‘/’)將多條路徑合二為一。

os.makedirs()函式:在指定的路徑中建立必要的目錄。

以下是程式碼及註釋:

from xml.sax.handler import ContentHandler
from xml.sax import parse
import os

class Dispatcher: #分派器類,該類負責為指定的需處理的事件查詢與其對應的處理程式

    def dispatch(self, prefix, name, attrs=None):
        #負責查詢合適的處理程式、建立引數元素並使用這些引數呼叫處理程式
        mname = prefix + name.capitalize()
        dname = 'default' + prefix.capitalize()
        method = getattr(self, mname, None)
        if callable(method): args = ()
        else:
            method = getattr(self, dname, None)
            args = name,
        if prefix == 'start':args += attrs,
        if callable(method): method(*args)

    #以下兩條為基本的事件處理程式,它們只是呼叫方法dispatch
    def startElement(self, name, attrs):
        self.dispatch('start', name, attrs)

    def endElement(self, name):
        self.dispatch('end', name)

class WebsiteConstructor(Dispatcher, ContentHandler):

    passthrough = False     #利用passthrough確定當前是否在某一元素(xml文字塊)內

    def __init__(self, directory):
        self.directory = [directory]
        self.ensureDirectory()

    def ensureDirectory(self):
        path = os.path.join(*self.directory)
        os.makedirs(path, exist_ok = True)      #在指定的路徑中建立必要的目錄

    def characters(self, chars):    #遇到字串自動呼叫
        if self.passthrough: self.out.write(chars)

    def defaultStart(self, name, attrs):    #處理除了標題和檔案頭以外的xml塊
        if self.passthrough:
            self.out.write('<' + name)
            for key,val in attrs.items():
                self.out.write(' {}="{}"'.format(key,val))
            self.out.write('>')
    def defaultEnd(self, name):
        if self.passthrough:
            self.out.write('</{}>'.format(name))

    def startDirectory(self, attrs):
        self.directory.append(attrs['name'])
        self.ensureDirectory()

    def endDirectory(self):
        self.directory.pop()

    def startPage(self, attrs):
        filename = os.path.join(*self.directory + [attrs['name'] + '.html'])
        self.out = open(filename, 'w')
        self.writeHeader(attrs['title'])
        self.passthrough = True

    def endPage(self):
        self.passthrough = False
        self.writeFooter()
        self.out.close()

    def writeHeader(self, title):   #將首部寫入檔案
        self.out.write('<html>\n <head>\n   <title>')
        self.out.write(title)
        self.out.write('</title>\n </head>\n <body>\n')

    def writeFooter(self):      #將尾部寫入檔案
        self.out.write('\n </body>\n</html>\n')

parse('website.xml',WebsiteConstructor('public_html'))

website.xml:

<website>
    <page name = "index" title = "Home Page">
        <h1> Welcome to My Home Page</h1>

        <p>Hi, there. My name is Mr.Gumby, and this is my home page.
            Here are some of my interests
        </p>

        <ul>
            <li><a href = "interests/shouting.html">Shouting</a></li>
            <li><a href = "interests/sleeping.html">Sleeping</a></li>
            <li><a href = "interests/eating.html">Eating</a></li>
        </ul>
    </page>
    <directory name="interests">
        <page name="shouting" title="Shoutin">
            <h1>Mr.Gumby's Shouting Page</h1>
            <p>...</p>
        </page>
        <page name="sleeping" title="Sleeping">
            <h1>Mr.Gumby's Sleeping Page</h1>
            <p>...</p>
        </page>
        <page name="eating" title="Eating">
            <h1>Mr.Gumby's Eating Page</h1>
            <p>...</p>
        </page>
    </directory>
</website>