summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSamuel Gaist <samuel.gaist@edeltech.ch>2016-07-07 23:56:01 +0200
committerTero Kojo <tero.kojo@qt.io>2016-08-02 09:15:21 +0000
commitb58da2fc21b061a6490a6a97be9148d69f580510 (patch)
treee8c895990fa507375ae46701b639c3254abc0727
parentf20f1041dce657ca84f900b8e28164ce6ac6a5f7 (diff)
Fix isFeedLink check
This patch fixes the .atom feed detection failing because of the hardcoded value used to extract the extension. Change-Id: Ic7f840debd4a1286ea88132d08f28626b7524bfd Reviewed-by: Tero Kojo <tero.kojo@qt.io>
-rw-r--r--rawdoglib/feedfinder.py34
1 files changed, 17 insertions, 17 deletions
diff --git a/rawdoglib/feedfinder.py b/rawdoglib/feedfinder.py
index a3abe95..b4fd28e 100644
--- a/rawdoglib/feedfinder.py
+++ b/rawdoglib/feedfinder.py
@@ -10,8 +10,8 @@ Usage:
'http://scripting.com/rss.xml'
>>>
>>> feedfinder.feeds('scripting.com')
- ['http://delong.typepad.com/sdj/atom.xml',
- 'http://delong.typepad.com/sdj/index.rdf',
+ ['http://delong.typepad.com/sdj/atom.xml',
+ 'http://delong.typepad.com/sdj/index.rdf',
'http://delong.typepad.com/sdj/rss.xml']
>>>
@@ -25,10 +25,10 @@ How it works:
1. If the URI points to a feed, it is simply returned; otherwise
the page is downloaded and the real fun begins.
2. Feeds pointed to by LINK tags in the header of the page (autodiscovery)
- 3. <A> links to feeds on the same server ending in ".rss", ".rdf", ".xml", or
+ 3. <A> links to feeds on the same server ending in ".rss", ".rdf", ".xml", or
".atom"
4. <A> links to feeds on the same server containing "rss", "rdf", "xml", or "atom"
- 5. <A> links to feeds on external servers ending in ".rss", ".rdf", ".xml", or
+ 5. <A> links to feeds on external servers ending in ".rss", ".rdf", ".xml", or
".atom"
6. <A> links to feeds on external servers containing "rss", "rdf", "xml", or "atom"
7. Try some guesses about common places for feeds (index.xml, atom.xml, etc.).
@@ -59,7 +59,7 @@ def timelimit(timeout):
threading.Thread.__init__(self)
self.result = None
self.error = None
-
+
self.setDaemon(True)
self.start()
@@ -78,7 +78,7 @@ def timelimit(timeout):
return c.result
return _2
return _1
-
+
# XML-RPC support allows feedfinder to query Syndic8 for possible matches.
# Python 2.3 now comes with this module by default, otherwise you can download it
try:
@@ -92,10 +92,10 @@ if not dict:
for k, v in aList:
rc[k] = v
return rc
-
+
def _debuglog(message):
if _debug: print message
-
+
class URLGatekeeper:
"""a class to track robots.txt rules across multiple servers"""
def __init__(self):
@@ -106,7 +106,7 @@ class URLGatekeeper:
self.urlopener.addheaders = [('User-agent', self.urlopener.version)]
robotparser.URLopener.version = self.urlopener.version
robotparser.URLopener.addheaders = self.urlopener.addheaders
-
+
def _getrp(self, url):
protocol, domain = urlparse.urlparse(url)[:2]
if self.rpcache.has_key(domain):
@@ -121,7 +121,7 @@ class URLGatekeeper:
pass
self.rpcache[domain] = rp
return rp
-
+
def can_fetch(self, url):
rp = self._getrp(url)
allow = rp.can_fetch(self.urlopener.version, url)
@@ -143,7 +143,7 @@ class BaseParser(sgmllib.SGMLParser):
sgmllib.SGMLParser.__init__(self)
self.links = []
self.baseuri = baseuri
-
+
def normalize_attrs(self, attrs):
def cleanattr(v):
v = sgmllib.charref.sub(lambda m: unichr(int(m.groups()[0])), v)
@@ -153,14 +153,14 @@ class BaseParser(sgmllib.SGMLParser):
attrs = [(k.lower(), cleanattr(v)) for k, v in attrs]
attrs = [(k, k in ('rel','type') and v.lower() or v) for k, v in attrs]
return attrs
-
+
def do_base(self, attrs):
attrsD = dict(self.normalize_attrs(attrs))
if not attrsD.has_key('href'): return
self.baseuri = attrsD['href']
-
+
def error(self, *a, **kw): pass # we're not picky
-
+
class LinkParser(BaseParser):
FEED_TYPES = ('application/rss+xml',
'text/xml',
@@ -207,7 +207,7 @@ def getLocalLinks(links, baseuri):
return [l for l in links if l.lower().startswith(baseuri)]
def isFeedLink(link):
- return link[-4:].lower() in ('.rss', '.rdf', '.xml', '.atom')
+ return link[link.rfind('.'):].lower() in ('.rss', '.rdf', '.xml', '.atom')
def isXMLRelatedLink(link):
link = link.lower()
@@ -246,7 +246,7 @@ def getFeedsFromSyndic8(uri):
except:
pass
return feeds
-
+
def feeds(uri, all=False, querySyndic8=False, _recurs=None):
if _recurs is None: _recurs = [uri]
fulluri = makeFullURI(uri)
@@ -350,7 +350,7 @@ def test():
uri = urlparse.urljoin(uri, data.split('<link rel="next" href="').pop().split('"')[0])
print
print count, 'tests executed,', len(failed), 'failed'
-
+
if __name__ == '__main__':
args = sys.argv[1:]
if args and args[0] == '--debug':