Fix isFeedLink check

This patch fixes the .atom feed detection failing because of the hardcoded value used to extract the extension. Change-Id: Ic7f840debd4a1286ea88132d08f28626b7524bfd Reviewed-by: Tero Kojo <tero.kojo@qt.io>
author: Samuel Gaist <samuel.gaist@edeltech.ch> 2016-07-07 23:56:01 +0200
committer: Tero Kojo <tero.kojo@qt.io> 2016-08-02 09:15:21 +0000
commit: b58da2fc21b061a6490a6a97be9148d69f580510 (patch)
tree: e8c895990fa507375ae46701b639c3254abc0727
parent: f20f1041dce657ca84f900b8e28164ce6ac6a5f7 (diff)
1 files changed, 17 insertions, 17 deletions
diff --git a/rawdoglib/feedfinder.py b/rawdoglib/feedfinder.py
index a3abe95..b4fd28e 100644
--- a/rawdoglib/feedfinder.py
+++ b/rawdoglib/feedfinder.py
@@ -10,8 +10,8 @@ Usage:
     'http://scripting.com/rss.xml'
     >>>
     >>> feedfinder.feeds('scripting.com')
-    ['http://delong.typepad.com/sdj/atom.xml', 
-     'http://delong.typepad.com/sdj/index.rdf', 
+    ['http://delong.typepad.com/sdj/atom.xml',
+     'http://delong.typepad.com/sdj/index.rdf',
      'http://delong.typepad.com/sdj/rss.xml']
     >>>
 
@@ -25,10 +25,10 @@ How it works:
   1. If the URI points to a feed, it is simply returned; otherwise
      the page is downloaded and the real fun begins.
   2. Feeds pointed to by LINK tags in the header of the page (autodiscovery)
-  3. <A> links to feeds on the same server ending in ".rss", ".rdf", ".xml", or 
+  3. <A> links to feeds on the same server ending in ".rss", ".rdf", ".xml", or
      ".atom"
   4. <A> links to feeds on the same server containing "rss", "rdf", "xml", or "atom"
-  5. <A> links to feeds on external servers ending in ".rss", ".rdf", ".xml", or 
+  5. <A> links to feeds on external servers ending in ".rss", ".rdf", ".xml", or
      ".atom"
   6. <A> links to feeds on external servers containing "rss", "rdf", "xml", or "atom"
   7. Try some guesses about common places for feeds (index.xml, atom.xml, etc.).
@@ -59,7 +59,7 @@ def timelimit(timeout):
                     threading.Thread.__init__(self)
                     self.result = None
                     self.error = None
-                    
+
                     self.setDaemon(True)
                     self.start()
 
@@ -78,7 +78,7 @@ def timelimit(timeout):
             return c.result
         return _2
     return _1
-    
+
 # XML-RPC support allows feedfinder to query Syndic8 for possible matches.
 # Python 2.3 now comes with this module by default, otherwise you can download it
 try:
@@ -92,10 +92,10 @@ if not dict:
         for k, v in aList:
             rc[k] = v
         return rc
-    
+
 def _debuglog(message):
     if _debug: print message
-    
+
 class URLGatekeeper:
     """a class to track robots.txt rules across multiple servers"""
     def __init__(self):
@@ -106,7 +106,7 @@ class URLGatekeeper:
         self.urlopener.addheaders = [('User-agent', self.urlopener.version)]
         robotparser.URLopener.version = self.urlopener.version
         robotparser.URLopener.addheaders = self.urlopener.addheaders
-        
+
     def _getrp(self, url):
         protocol, domain = urlparse.urlparse(url)[:2]
         if self.rpcache.has_key(domain):
@@ -121,7 +121,7 @@ class URLGatekeeper:
             pass
         self.rpcache[domain] = rp
         return rp
-        
+
     def can_fetch(self, url):
         rp = self._getrp(url)
         allow = rp.can_fetch(self.urlopener.version, url)
@@ -143,7 +143,7 @@ class BaseParser(sgmllib.SGMLParser):
         sgmllib.SGMLParser.__init__(self)
         self.links = []
         self.baseuri = baseuri
-        
+
     def normalize_attrs(self, attrs):
         def cleanattr(v):
             v = sgmllib.charref.sub(lambda m: unichr(int(m.groups()[0])), v)
@@ -153,14 +153,14 @@ class BaseParser(sgmllib.SGMLParser):
         attrs = [(k.lower(), cleanattr(v)) for k, v in attrs]
         attrs = [(k, k in ('rel','type') and v.lower() or v) for k, v in attrs]
         return attrs
-        
+
     def do_base(self, attrs):
         attrsD = dict(self.normalize_attrs(attrs))
         if not attrsD.has_key('href'): return
         self.baseuri = attrsD['href']
-    
+
     def error(self, *a, **kw): pass # we're not picky
-        
+
 class LinkParser(BaseParser):
     FEED_TYPES = ('application/rss+xml',
                   'text/xml',
@@ -207,7 +207,7 @@ def getLocalLinks(links, baseuri):
     return [l for l in links if l.lower().startswith(baseuri)]
 
 def isFeedLink(link):
-    return link[-4:].lower() in ('.rss', '.rdf', '.xml', '.atom')
+    return link[link.rfind('.'):].lower() in ('.rss', '.rdf', '.xml', '.atom')
 
 def isXMLRelatedLink(link):
     link = link.lower()
@@ -246,7 +246,7 @@ def getFeedsFromSyndic8(uri):
     except:
         pass
     return feeds
-    
+
 def feeds(uri, all=False, querySyndic8=False, _recurs=None):
     if _recurs is None: _recurs = [uri]
     fulluri = makeFullURI(uri)
@@ -350,7 +350,7 @@ def test():
         uri = urlparse.urljoin(uri, data.split('<link rel="next" href="').pop().split('"')[0])
     print
     print count, 'tests executed,', len(failed), 'failed'
-        
+
 if __name__ == '__main__':
     args = sys.argv[1:]
     if args and args[0] == '--debug':
author	Samuel Gaist <samuel.gaist@edeltech.ch>	2016-07-07 23:56:01 +0200
committer	Tero Kojo <tero.kojo@qt.io>	2016-08-02 09:15:21 +0000
commit	b58da2fc21b061a6490a6a97be9148d69f580510 (patch)
tree	e8c895990fa507375ae46701b639c3254abc0727
parent	f20f1041dce657ca84f900b8e28164ce6ac6a5f7 (diff)