|
SAX将dd.xml解析成html。当然啦,如果得到了xml对应的xsl文件可以直接用libxml2将其转换成html。
复制代码 代码如下: #!/usr/bin/env python # -*- coding: utf-8 -*- #--------------------------------------- # 程序:XML解析器 # 版本:01.0 # 作者:mupeng # 日期:2013-12-18 # 语言:Python 2.7 # 功能:将xml解析成对应的html # 注解:该程序用xml.sax模块的parse函数解析XML,并生成事件 # 继承ContentHandler并重写其事件处理函数 # Dispatcher主要用于相应标签的起始、结束事件的派发 #--------------------------------------- from xml.sax.handler import ContentHandler from xml.sax import parse
class Dispatcher: def dispatch(self,prefix,name,attrs=None): mname = prefix + name.capitalize() dname = 'default' + prefix.capitalize() method = getattr(self,mname,None) if callable(method): args = () else: method = getattr(self,dname,None) #args = name #if prefix == 'start': args += attrs if callable(method): method()
def startElement(self,attrs): self.dispatch('start',attrs)
def endElement(self,name): self.dispatch('end',name)
class Website(Dispatcher,ContentHandler):
def __init__(self): self.fout = open('ddt_SAX.html','w') self.imagein = False self.desflag = False self.item = False self.title = '' self.link = '' self.guid = '' self.url = '' self.pubdate = '' self.description = '' self.temp = '' self.prx = '' def startChannel(self):
self.fout.write('''<html>n<head>n<title> RSS-''')
def endChannel(self): self.fout.write(''' <tr><td height="20"></td></tr> </table> </center> <script> function GetTimeDiff(str) { if(str == '') { return ''; }
var pubDate = new Date(str); var nowDate = new Date(); var diffMilSeconds = nowDate.valueOf()-pubDate.valueOf(); var days = diffMilSeconds/86400000; days = parseInt(days);
diffMilSeconds = diffMilSeconds-(days*86400000); var hours = diffMilSeconds/3600000; hours = parseInt(hours);
diffMilSeconds = diffMilSeconds-(hours*3600000); var minutes = diffMilSeconds/60000; minutes = parseInt(minutes);
diffMilSeconds = diffMilSeconds-(minutes*60000); var seconds = diffMilSeconds/1000; seconds = parseInt(seconds);
var returnStr = "±±¾©¢²¼Ê±¼ä£º" + pubDate.toLocaleString();
if(days > 0) { returnStr = returnStr + " £¨¾àÀëÏÖÔÚ" + days + "Ìì" + hours + "Сʱ" + minutes + "ÖÖÓ£©"; } else if (hours > 0) { returnStr = returnStr + " £¨¾àÀëÏÖÔÚ" + hours + "Сʱ" + minutes + "ÖÖÓ£©"; } else if (minutes > 0) { returnStr = returnStr + " £¨¾àÀëÏÖÔÚ" + minutes + "ÖÖÓ£©"; }
return returnStr;
}
function GetSpanText() { var pubDate; var pubDateArray; var spanArray = document.getElementsByTagName("span");
for(var i = 0; i < spanArray.length; i++) { pubDate = spanArray[i].innerHTML; document.getElementsByTagName("span")[i].innerHTML = GetTimeDiff(pubDate); } }
GetSpanText(); </script> </body> </html> ''') self.fout.close()
def characters(self,chars): if chars.strip(): #chars = chars.strip() self.temp += chars #print self.temp
def startTitle(self):
if self.item: self.fout.write(''' <tr bgcolor="#eeeeee">n<td style="padding-top:5px;padding-left:5px;" height="30">n<B> ''')
def endTitle(self):
if not self.imagein and not self.item: self.title = self.temp self.temp = '' self.fout.write(self.title.encode('gb2312'))
#self.title = self.temp self.fout.write(''' </title>n</head>n<body>n<center>n <script>n
function copyLink() { clipboardData.setData("Text",window.location.href); alert("RSSÁ´½ÓÒѾ¸´ÖƵ½¼ôÌù°å"); }
function subscibeLink() { var str = window.location.pathname; while(str.match(/^//)) { str = str.replace(/^//,""); } window.open("http://rss.sina.com.cn/my_sina_web_rss_news.html?url=" + str,"_self");
} </script>n <table width="750" cellpadding="0" cellspacing="0">n <tr>n <td align="right" style="padding-right:15px;" valign="bottom">n ''')
if self.item: self.title = self.temp self.temp = '' self.fout.write(self.title.encode('gb2312')) self.fout.write(''' </B> </td> </tr> <tr bgcolor="#eeeeee"> <td style="padding-left:5px;"> ''')
def startImage(self): self.imagein = True
def endImage(self): self.imagein = False
def startLink(self): if self.imagein: self.fout.write('''<A href=" ''')
def endLink(self): self.link = self.temp self.temp = '' if self.imagein: self.fout.write(self.link.encode('gb2312')) self.fout.write('''" target="_blank">n ''') elif self.item: #self.link = self.temp pass else: self.fout.write(self.link) self.fout.write(''' " target=" _blank "> ''') self.fout.write(self.title.encode('gb2312')) self.fout.write(''' </A></B></td> </tr> <tr><td colspan="2" align="center"> ''') self.fout.write(self.description.encode('gb2312')) self.fout.write(''' </td></tr> <tr style="font-size:12px;" bgcolor="#eeeeff"><td colspan="2" style="font-size:14px;padding-top:5px;padding-bottom:5px;"><b><a href="javascript:copyLink();">¸´ÖÆ´ËÒ³Á´½Ó</a> <a href="javascript:subscibeLink();">ÎÒҪǶÈë¸ÃÐÂÎÅÁÐ±íµ½ÎÒµÄÒ³Ãæ£¨¼òµ¥¡¢¿ìËÙ¡¢ÊµÊ±¡¢ÃâÑ£©</a></b></td></tr> </table> <table width="750" cellpadding="0" cellspacing="0"> ''')
def startUrl(self): if self.imagein: self.fout.write('''<IMG src=" ''') def endUrl(self): self.url = self.temp self.temp = '' if self.imagein: self.fout.write(self.url.encode('gb2312')) self.fout.write('''" border="0">n </A> </td> <td align="left" valign="bottom" style="padding-bottom:8px;"><B><A href=" ''') if self.item: #self.url = self.temp pass
def defaultStart(self): pass def defaultEnd(self): self.temp = '' def startDescription(self): pass def endDescription(self): self.description = self.temp self.temp = '' if self.item: #self.fout.write('¡¡¡¡') self.fout.write(self.description.encode('gb2312'))
def endGuid(self): self.guid = self.temp def endPubdate(self): if not self.temp.startswith('http'): self.pubdate = self.temp self.temp = '' else: self.pubdate = '' def startItem(self): self.item = True def endItem(self): self.item = False self.fout.write(''' </td> </tr> <tr bgcolor="#eeeeee"> <td style="padding-top:5px;padding-left:5px;"> <A href="''') self.fout.write(self.link) self.fout.write(''' " target="_blank"> ''') self.fout.write(self.guid) self.fout.write(''' </A> </td> </tr> <tr bgcolor="#eeeeee"> <td style="padding-top:5px;padding-left:5px;padding-bottom:5px;"><span>''') self.fout.write(self.pubdate) self.fout.write('''</span></td> </tr> <tr height="10"><td></td></tr>''')
#程序入口 if __name__ == '__main__': parse('ddt.xml',Website())
(编辑:安卓应用网)
【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!
|