python解析万能的xml

原创
2012/04/28 21:13
阅读数 448
xml文件
<website>
    <page name="index" title="Home Page">
	<h1>Welcome to My Home Page</h1>
	<p>Hi,there My name is Mr. Gumby, and this is my home page. here are some of my interests:</p>
	<ul>
	    <li><a href="interests/shouting.html">Shouting</a></li>
	    <li><a href="interests/sleeping.html">Sleeping</a></li>
	    <li><a href="interests/eating.html">Eating</a></li>
        </ul>
    </page>
    <directory name="interests">
	<page name="shouting" title="Shouting">
	    <h1>Mr. Gumby's Shouting Page</h1>
	    <p>...</p>
	</page>
	<page name="sleeping" title="Sleeping">
	    <h1>Mr. Gumby Sleeping</h1>
	    <p>...</p>
	</page>
	<page name="eating" title="Eating">
	    <h1>Mr. Gumby Eating</h1>
	    <p>...</p>
	</page>
    </directory>
</website>

python代码

from xml.sax.handler import ContentHandler
from xml.sax import parse
import os

class Dispatcher:
    def dispatch(self,prefix,name,attrs=None):
        mname = prefix + name.capitalize()
        dname = 'default' + prefix.capitalize()
        method = getattr(self,mname,None)
        if callable(method):
	    args = ()
        else:
	    method=getattr(self,dname,None)
	    args=name,
	if prefix == 'start':
	    args += attrs,
   	if callable(method):
             method(*args)
    def startElement(self,name,attrs):
	self.dispatch('start',name,attrs)
    def endElement(self,name):
	self.dispatch('end',name)
class WebsiteConstructor(Dispatcher,ContentHandler):
    passthrough = False
    def __init__(self,directory):
        self.directory = [directory]
	self.ensureDirectory()
    def ensureDirectory(self):
	path=os.path.join(*self.directory)
	if not os.path.isdir(path):
	    os.makedirs(path)
    def charaters(self,chars):
	if self.passthrough:
	    self.out.write(chars)
    def defaultStart(self,name,attrs):
	if self.passthrough:
	    self.out.write('<'+name)
	    for key,val in attrs.items():
	        self.out.write('%s="%s"' % (key,val))
	    self.out.write('>')
    def defaultEnd(self,name):
	if self.passthrough:
	    self.out.write('<%s>' % name)
    def startDirectory(self,attrs):
	print attrs.__dict__
	self.directory.append(attrs['name'])
	self.ensureDirectory()
    def endDirectory(self):
	self.directory.pop()
    def startPage(self,attrs):
	filename = os.path.join(*self.directory+[attrs['name']+'.html'])
	self.out=open(filename,'w')
	self.writeHeader(attrs['title'])
	self.passthrough=True
    def endPage(self):
	self.passthrough=False
	self.writeFooter()
	self.out.close()
    def writeHeader(self,title):
	self.out.write('<html><head><title>')
	self.out.write(title)
	self.out.write('</title></head><body>')
    def writeFooter(self):
	self.out.write('</body></html>')
parse('website.xml',WebsiteConstructor('public_html'))

 

 

 

 

展开阅读全文
加载中

作者的其它热门文章

打赏
0
0 收藏
分享
打赏
0 评论
0 收藏
0
分享
返回顶部
顶部