Adding ./doc from boost

Reviewer: Lauro Moura <lauro.neto@openbossa.org> Luciano Wolf <luciano.wolf@openbossa.org>
author: Lauro Neto <lauro.neto@openbossa.org> 2010-04-27 18:12:50 -0300
committer: Renato Filho <renato.filho@openbossa.org> 2010-04-28 16:36:47 -0300
commit: aaece960d3cf1c9d19dd84fe8171769517647724 (patch)
tree: ea2e20d2c8738cc407f60b10b6cccfb4e1fc2b70 /doc/pyhtml2devhelp.py
parent: 685246809debe672a0007b67fe78f7c77bad6739 (diff)
1 files changed, 225 insertions, 0 deletions
diff --git a/doc/pyhtml2devhelp.py b/doc/pyhtml2devhelp.py
new file mode 100644
index 000000000..2d78896ef
--- /dev/null
+++ b/doc/pyhtml2devhelp.py
@@ -0,0 +1,225 @@
+#! /usr/bin/python
+
+# This script is based on Python2.6 Ubuntu package script by Marc Deslauriers
+
+import formatter, htmllib
+import os, sys, re
+
+class PyHTMLParser(htmllib.HTMLParser):
+    pages_to_include = set(('index.html', 'PySide/QtHelp/index.html', 'PySide/QtSvg/index.html',
+                        'PySide/QtGui/index.html', 'PySide/QtNetwork/index.html', 'PySide/QtWebKit/index.html',
+                        'PySide/QtUiTools/index.html', 'PySide/QtXml/index.html', 'PySide/QtCore/index.html',
+                        'PySide/QtScriptTools/index.html', 'PySide/QtOpenGL/index.html', 'PySide/QtScript/index.html',
+                        'PySide/QtSql/index.html', 'howto-build/index.html'))
+
+    def __init__(self, formatter, basedir, fn, indent, parents=set()):
+        htmllib.HTMLParser.__init__(self, formatter)
+        self.basedir = basedir
+        self.dir, self.fn = os.path.split(fn)
+        self.data = ''
+        self.parents = parents
+        self.link = {}
+        self.indent = indent
+        self.last_indent = indent - 1
+        self.sub_indent = 0
+        self.sub_count = 0
+        self.next_link = False
+
+    def process_link(self):
+        new_href = os.path.join(self.dir, self.link['href'])
+        text = self.link['text']
+        indent = self.indent + self.sub_indent
+        if self.last_indent == indent:
+            print '%s</sub>' % ('  ' * self.last_indent)
+            self.sub_count -= 1
+        print '%s<sub link="%s" name="%s">' % ('  ' * indent, new_href, text)
+        self.sub_count += 1
+        self.last_indent = self.indent + self.sub_indent
+
+    def start_li(self, attrs):
+        self.sub_indent += 1
+        self.next_link = True
+
+    def end_li(self):
+        indent = self.indent + self.sub_indent
+        if self.sub_count > 0:
+            print '%s</sub>' % ('  ' * self.last_indent)
+            self.sub_count -= 1
+            self.last_indent -= 1
+        self.sub_indent -= 1
+
+    def start_a(self, attrs):
+        self.link = {}
+        for attr in attrs:
+            self.link[attr[0]] = attr[1]
+        self.data = ''
+        
+    def end_a(self):
+        process = False
+        text = self.data.replace('\t', '').replace('\n', ' ').replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
+        self.link['text'] = text
+        # handle a tag without href attribute
+        try:
+            href = self.link['href']
+        except KeyError:
+            return
+
+        abs_href = os.path.join(self.basedir, href)
+        if abs_href in self.parents:
+            return
+        if href.startswith('..') or href.startswith('http:') \
+               or href.startswith('mailto:') or href.startswith('news:'):
+            return
+        if href in ('', 'about.html', 'modindex.html', 'genindex.html', 'glossary.html',
+                    'search.html', 'contents.html', 'download.html', 'bugs.html',
+                    'license.html', 'copyright.html'):
+            return
+
+        if self.link.has_key('class'):
+            if self.link['class'] in ('biglink'):
+                process = True
+            if self.link['class'] in ('reference external'):
+                if self.next_link:
+                    process = True
+                    next_link = False
+
+        if process == True:
+            self.process_link()
+            if href in self.pages_to_include:
+                self.parse_file(os.path.join(self.dir, href))
+
+    def finish(self):
+        if self.sub_count > 0:
+            print '%s</sub>' % ('  ' * self.last_indent)
+
+    def handle_data(self, data):
+        self.data += data
+
+    def parse_file(self, href):
+        # TODO basedir bestimmen
+        parent = os.path.join(self.basedir, self.fn)
+        self.parents.add(parent)
+        parser = PyHTMLParser(formatter.NullFormatter(),
+                              self.basedir, href, self.indent + 1,
+                              self.parents)
+        text = file(self.basedir + '/' + href).read()
+        parser.feed(text)
+        parser.finish()
+        parser.close()
+        if parent in self.parents:
+            self.parents.remove(parent)
+
+class PyIdxHTMLParser(htmllib.HTMLParser):
+    def __init__(self, formatter, basedir, fn, indent):
+        htmllib.HTMLParser.__init__(self, formatter)
+        self.basedir = basedir
+        self.dir, self.fn = os.path.split(fn)
+        self.data = ''
+        self.link = {}
+        self.indent = indent
+        self.active = False
+        self.indented = False
+        self.nolink = False
+        self.header = ''
+        self.last_letter = 'Z'
+        self.last_text = ''
+
+    def process_link(self):
+        new_href = os.path.join(self.dir, self.link['href'])
+        text = self.link['text']
+        if not self.active:
+            return
+        if text.startswith('['):
+            return
+        if self.link.get('rel', None) in ('prev', 'parent', 'next', 'contents', 'index'):
+            return
+        if self.indented:
+            text = self.last_text + ' ' + text
+        else:
+            # Save it in case we need it again
+            self.last_text = re.sub(' \([\w\-\.\s]+\)', '', text)
+        indent = self.indent
+        print '%s<function link="%s" name="%s"/>' % ('  ' * indent, new_href, text)
+
+    def start_dl(self, attrs):
+        if self.last_text:
+            # Looks like we found the second part to a command
+            self.indented = True
+
+    def end_dl(self):
+        self.indented = False
+
+    def start_dt(self, attrs):
+        self.data = ''
+        self.nolink = True
+
+    def end_dt(self):
+        if not self.active:
+            return
+        if self.nolink == True:
+            # Looks like we found the first part to a command
+            self.last_text = re.sub(' \([\w\-\.\s]+\)', '', self.data)
+            self.nolink = False
+
+    def start_h2(self, attrs):
+        for k, v in attrs:
+            if k == 'id':
+                self.header = v
+                if v == '_':
+                    self.active = True
+
+    def start_td(self, attrs):
+        self.indented = False
+        self.last_text = ''
+
+    def start_table(self, attrs):
+        pass
+
+    def end_table(self):
+        if self.header == self.last_letter:
+            self.active = False
+
+    def start_a(self, attrs):
+        self.nolink = False
+        self.link = {}
+        for attr in attrs:
+            self.link[attr[0]] = attr[1]
+        self.data = ''
+        
+    def end_a(self):
+        text = self.data.replace('\t', '').replace('\n', ' ').replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
+        self.link['text'] = text
+        # handle a tag without href attribute
+        try:
+            href = self.link['href']
+        except KeyError:
+            return
+        self.process_link()
+
+    def handle_data(self, data):
+        self.data += data
+
+def main():
+    base = sys.argv[1]
+    fn = sys.argv[2]
+
+    parser = PyHTMLParser(formatter.NullFormatter(), base, fn, indent=0)
+    print '<?xml version="1.0" encoding="iso-8859-1"?>'
+    print '<book title="PySide %s Documentation" name="PySide" version="%s" link="index.html">' % (sys.argv[3], sys.argv[3])
+    print '<chapters>'
+    parser.parse_file(fn)
+    print '</chapters>'
+
+    print '<functions>'
+
+    fn = 'genindex.html'
+    parser = PyIdxHTMLParser(formatter.NullFormatter(), base, fn, indent=1)
+    text = file(base + '/' + fn).read()
+    parser.feed(text)
+    parser.close()
+
+    print '</functions>'
+    print '</book>'
+
+main()
+
author	Lauro Neto <lauro.neto@openbossa.org>	2010-04-27 18:12:50 -0300
committer	Renato Filho <renato.filho@openbossa.org>	2010-04-28 16:36:47 -0300
commit	aaece960d3cf1c9d19dd84fe8171769517647724 (patch)
tree	ea2e20d2c8738cc407f60b10b6cccfb4e1fc2b70 /doc/pyhtml2devhelp.py
parent	685246809debe672a0007b67fe78f7c77bad6739 (diff)