重庆分公司,新征程启航
为企业提供网站建设、域名注册、服务器等服务
这期内容当中小编将会给大家带来有关简单的rss阅读工具,文章内容丰富且以专业的角度为大家分析和叙述,阅读完这篇文章希望大家可以有所收获。
创新互联2013年开创至今,是专业互联网技术服务公司,拥有项目成都网站建设、做网站网站策划,项目实施与项目整合能力。我们以让每一个梦想脱颖而出为使命,1280元宣威做网站,已为上家服务,为宣威各地企业和个人服务,联系电话:13518219792
#!usr/bin/env python # -*- coding:UTF-8 -*- import re from lxml import etree from bs4 import BeautifulSoup as sp import requests import urllib2 import StringIO import sys reload(sys) sys.setdefaultencoding("utf-8") headers={'User-Agent' : 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'} def urlread(url): try: req=requests.get(url,headers=headers) req.encoding="utf-8" return req.text.encode("utf-8") except: req=urllib2.Request(url,headers=headers) response=urllib2.urlopen(req) return response.read().encode("utf-8") class Item: def __init__(self,title,link,date,description): self.title=title.strip() self.link=link.strip() self.pubDate=date.strip() self.decription=self.filter(description).strip() def filter(self,description): description=re.sub("<.*?>",'',description) description=re.sub("\r",'',description) description=re.sub("\n",'',description) description=re.sub(" "," ",description) if len(description)>240: description=description[:240]+'...' return description def __str__(self): return "%s\n%s\n%s\n<%s>\n" % ( self.title, self.link, self.decription, self.pubDate ) __repr__=__str__ class BSParser(object): #url='' def __init__(self,url): xml=urlread(url) self.reset(xml) def reset(self,xml=None): if xml==None: self.soup=sp("") else: self.soup=sp(xml,"xml") def callback(self,method,obj,tags): rst=None attr=method.lower() for tag in tags: try: rst=getattr(obj,attr)(tag) except: continue if rst: break return rst def getfields(self,tags=["item",'entry']): return self.callback(method="FIND_ALL", obj=self.soup, tags=tags) def gettitle(self,obj,tags=["title"]): return self.callback("FIND",obj,tags).text def getlink(self,obj,tags=["link"]): rst=self.callback("FIND",obj,tags).text if not rst: rst=self.callback("FIND",obj,tags).get("href") return rst def getdate(self,obj,tags=["pubDate","published"]): return self.callback("FIND",obj,tags).text def getdescription(self,obj,tags=["description","content"]): return self.callback("FIND",obj,tags).text def run(self): for item in self.getfields(): title=self.gettitle(item) link=self.getlink(item) date=self.getdate(item) description=self.getdescription(item) newsitem=Item(title,link,date,description) yield newsitem def test(): parser=Parser() for item in parser.run(): print item if __name__=="__main__": test()
上述就是小编为大家分享的简单的rss阅读工具了,如果刚好有类似的疑惑,不妨参照上述分析进行理解。如果想知道更多相关知识,欢迎关注创新互联行业资讯频道。