|
有如下的xml文件:
复制代码 代码如下: <?xml version="1.0" encoding="utf-8" ?> <root> <childs> <child name='first' >1</child> <child value="2">2</child> </childs> </root>
下面介绍python解析xml文件的几种方法,使用python模块实现。
方式1,python模块实现自动遍历所有节点:
复制代码 代码如下: #!/usr/bin/env python # -*- coding: utf-8 -*- from xml.sax.handler import ContentHandler from xml.sax import parse class TestHandle(ContentHandler): def __init__(self,inlist): self.inlist = inlist
def startElement(self,name,attrs): print 'name:','attrs:',attrs.keys()
def endElement(self,name): print 'endname',name
def characters(self,chars): print 'chars',chars self.inlist.append(chars)
if __name__ == '__main__': lt = [] parse('test.xml',TestHandle(lt)) print lt
结果: [html] view plaincopy name: root attrs: [] chars
name: childs attrs: [] chars
name: child attrs: [u'name'] chars 1 endname child chars
name: child attrs: [u'value'] chars 2 endname child chars
endname childs chars
endname root [u'n',u'n',u'1',u'2',u'n']
方式2,python模块实现获取根节点,按需查找指定节点:
复制代码 代码如下: #!/usr/bin/env python # -*- coding: utf-8 -*- from xml.dom import minidom xmlstr = '''''<?xml version="1.0" encoding="UTF-8"?> <hash> <request name='first'>/2/photos/square/type.xml</request> <error_code>21301</error_code> <error>auth faild!</error> </hash> ''' def doxml(xmlstr): dom = minidom.parseString(xmlstr) print 'Dom:' print dom.toxml()
root = dom.firstChild print 'root:' print root.toxml()
childs = root.childNodes for child in childs: print child.toxml() if child.nodeType == child.TEXT_NODE: pass else: print 'child node attribute name:',child.getAttribute('name') print 'child node name:',child.nodeName print 'child node len:',len(child.childNodes) print 'child data:',child.childNodes[0].data print '=======================================' print 'more help info to see:' for med in dir(child): print help(med)
if __name__ == '__main__': doxml(xmlstr)
结果: [html] view plaincopy Dom: <?xml version="1.0" ?><hash> <request name="first">/2/photos/square/type.xml</request> <error_code>21301</error_code> <error>auth faild!</error> </hash> root: <hash> <request name="first">/2/photos/square/type.xml</request> <error_code>21301</error_code> <error>auth faild!</error> </hash>
<request name="first">/2/photos/square/type.xml</request> child node attribute name: first child node name: request child node len: 1 child data: /2/photos/square/type.xml ======================================= more help info to see: 两种方法各有其优点,python的xml处理模块太多,目前只用到这2个。
=====补充分割线================ 实际工作中发现python的mimidom无法解析其它编码的xml,只能解析utf-8的编码,而其xml文件的头部申明也必须是utf-8,为其它编码会报错误。 网上的解决办法都是替换xml文件头部的编码申明,然后转换编码为utf-8再用minidom解码,实际测试为可行,不过有点累赘的感觉。
本节是 python解析xml模块封装代码 的第二部分。 ====写xml内容的分割线=========
复制代码 代码如下: #!ursbinenv python #encoding: utf-8 from xml.dom import minidom
class xmlwrite: def __init__(self,resultfile): self.resultfile = resultfile self.rootname = 'api' self.__create_xml_dom()
def __create_xml_dom(self): xmlimpl = minidom.getDOMImplementation() self.dom = xmlimpl.createDocument(None,self.rootname,None) self.root = self.dom.documentElement
def __get_spec_node(self,xpath): patharr = xpath.split(r'/') parentnode = self.root exist = 1 for nodename in patharr: if nodename.strip() == '': continue if not exist: return None spcindex = nodename.find('[') if spcindex > -1: index = int(nodename[spcindex+1:-1]) else: index = 0 count = 0 childs = parentnode.childNodes for child in childs: if child.nodeName == nodename[:spcindex]: if count == index: parentnode = child exist = 1 break count += 1 continue else: exist = 0 return parentnode
def write_node(self,parent,nodename,value,attribute=None,CDATA=False): node = self.dom.createElement(nodename) if value: if CDATA: nodedata = self.dom.createCDATASection(value) else: nodedata = self.dom.createTextNode(value) node.appendChild(nodedata) if attribute and isinstance(attribute,dict): for key,value in attribute.items(): node.setAttribute(key,value) try: parentnode = self.__get_spec_node(parent) except: print 'Get parent Node Fail,Use the Root as parent Node' parentnode = self.root parentnode.appendChild(node)
def write_start_time(self,time): self.write_node('/','StartTime',time)
def write_end_time(self,'EndTime',time)
def write_pass_count(self,count): self.write_node('/','PassCount',count)
def write_fail_count(self,'FailCount',count)
def write_case(self): self.write_node('/','Case',None)
def write_case_no(self,index,value): self.write_node('/Case[%s]/' % index,'No',value)
def write_case_url(self,'URL',value)
def write_case_dbdata(self,'DBData',value)
def write_case_apidata(self,'APIData',value)
def write_case_dbsql(self,'DBSQL',CDATA=True)
def write_case_apixpath(self,'APIXPath',value)
def save_xml(self): myfile = file(self.resultfile,'w') self.dom.writexml(myfile,encoding='utf-8') myfile.close()
if __name__ == '__main__': xr = xmlwrite(r'D:test.xml') xr.write_start_time('2223') xr.write_end_time('444') xr.write_pass_count('22') xr.write_fail_count('33') xr.write_case() xr.write_case() xr.write_case_no(0,'0') xr.write_case_url(0,'http://www.google.com') xr.write_case_url(0,'http://www.google.com') xr.write_case_dbsql(0,'select * from ') xr.write_case_dbdata(0,'dbtata') xr.write_case_apixpath(0,'/xpath') xr.write_case_apidata(0,'apidata') xr.write_case_no(1,'1') xr.write_case_url(1,'http://www.baidu.com') xr.write_case_url(1,'http://www.baidu.com') xr.write_case_dbsql(1,'select 1 from ') xr.write_case_dbdata(1,'dbtata1') xr.write_case_apixpath(1,'/xpath1') xr.write_case_apidata(1,'apidata1') xr.save_xml()
以上封装了minidom,支持通过xpath来写节点,不支持xpath带属性的匹配,但支持带索引的匹配。 比如:/root/child[1],表示root的第2个child节点。 (编辑:安卓应用网)
【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!
|