diff options
author | Lauro Neto <lauro.neto@openbossa.org> | 2010-04-27 18:12:50 -0300 |
---|---|---|
committer | Renato Filho <renato.filho@openbossa.org> | 2010-04-28 16:36:47 -0300 |
commit | aaece960d3cf1c9d19dd84fe8171769517647724 (patch) | |
tree | ea2e20d2c8738cc407f60b10b6cccfb4e1fc2b70 /doc/pyhtml2devhelp.py | |
parent | 685246809debe672a0007b67fe78f7c77bad6739 (diff) |
Adding ./doc from boost
Reviewer: Lauro Moura <lauro.neto@openbossa.org>
Luciano Wolf <luciano.wolf@openbossa.org>
Diffstat (limited to 'doc/pyhtml2devhelp.py')
-rw-r--r-- | doc/pyhtml2devhelp.py | 225 |
1 files changed, 225 insertions, 0 deletions
diff --git a/doc/pyhtml2devhelp.py b/doc/pyhtml2devhelp.py new file mode 100644 index 000000000..2d78896ef --- /dev/null +++ b/doc/pyhtml2devhelp.py @@ -0,0 +1,225 @@ +#! /usr/bin/python + +# This script is based on Python2.6 Ubuntu package script by Marc Deslauriers + +import formatter, htmllib +import os, sys, re + +class PyHTMLParser(htmllib.HTMLParser): + pages_to_include = set(('index.html', 'PySide/QtHelp/index.html', 'PySide/QtSvg/index.html', + 'PySide/QtGui/index.html', 'PySide/QtNetwork/index.html', 'PySide/QtWebKit/index.html', + 'PySide/QtUiTools/index.html', 'PySide/QtXml/index.html', 'PySide/QtCore/index.html', + 'PySide/QtScriptTools/index.html', 'PySide/QtOpenGL/index.html', 'PySide/QtScript/index.html', + 'PySide/QtSql/index.html', 'howto-build/index.html')) + + def __init__(self, formatter, basedir, fn, indent, parents=set()): + htmllib.HTMLParser.__init__(self, formatter) + self.basedir = basedir + self.dir, self.fn = os.path.split(fn) + self.data = '' + self.parents = parents + self.link = {} + self.indent = indent + self.last_indent = indent - 1 + self.sub_indent = 0 + self.sub_count = 0 + self.next_link = False + + def process_link(self): + new_href = os.path.join(self.dir, self.link['href']) + text = self.link['text'] + indent = self.indent + self.sub_indent + if self.last_indent == indent: + print '%s</sub>' % (' ' * self.last_indent) + self.sub_count -= 1 + print '%s<sub link="%s" name="%s">' % (' ' * indent, new_href, text) + self.sub_count += 1 + self.last_indent = self.indent + self.sub_indent + + def start_li(self, attrs): + self.sub_indent += 1 + self.next_link = True + + def end_li(self): + indent = self.indent + self.sub_indent + if self.sub_count > 0: + print '%s</sub>' % (' ' * self.last_indent) + self.sub_count -= 1 + self.last_indent -= 1 + self.sub_indent -= 1 + + def start_a(self, attrs): + self.link = {} + for attr in attrs: + self.link[attr[0]] = attr[1] + self.data = '' + + def end_a(self): + process = False + text = self.data.replace('\t', '').replace('\n', ' ').replace('&', '&').replace('<', '<').replace('>', '>') + self.link['text'] = text + # handle a tag without href attribute + try: + href = self.link['href'] + except KeyError: + return + + abs_href = os.path.join(self.basedir, href) + if abs_href in self.parents: + return + if href.startswith('..') or href.startswith('http:') \ + or href.startswith('mailto:') or href.startswith('news:'): + return + if href in ('', 'about.html', 'modindex.html', 'genindex.html', 'glossary.html', + 'search.html', 'contents.html', 'download.html', 'bugs.html', + 'license.html', 'copyright.html'): + return + + if self.link.has_key('class'): + if self.link['class'] in ('biglink'): + process = True + if self.link['class'] in ('reference external'): + if self.next_link: + process = True + next_link = False + + if process == True: + self.process_link() + if href in self.pages_to_include: + self.parse_file(os.path.join(self.dir, href)) + + def finish(self): + if self.sub_count > 0: + print '%s</sub>' % (' ' * self.last_indent) + + def handle_data(self, data): + self.data += data + + def parse_file(self, href): + # TODO basedir bestimmen + parent = os.path.join(self.basedir, self.fn) + self.parents.add(parent) + parser = PyHTMLParser(formatter.NullFormatter(), + self.basedir, href, self.indent + 1, + self.parents) + text = file(self.basedir + '/' + href).read() + parser.feed(text) + parser.finish() + parser.close() + if parent in self.parents: + self.parents.remove(parent) + +class PyIdxHTMLParser(htmllib.HTMLParser): + def __init__(self, formatter, basedir, fn, indent): + htmllib.HTMLParser.__init__(self, formatter) + self.basedir = basedir + self.dir, self.fn = os.path.split(fn) + self.data = '' + self.link = {} + self.indent = indent + self.active = False + self.indented = False + self.nolink = False + self.header = '' + self.last_letter = 'Z' + self.last_text = '' + + def process_link(self): + new_href = os.path.join(self.dir, self.link['href']) + text = self.link['text'] + if not self.active: + return + if text.startswith('['): + return + if self.link.get('rel', None) in ('prev', 'parent', 'next', 'contents', 'index'): + return + if self.indented: + text = self.last_text + ' ' + text + else: + # Save it in case we need it again + self.last_text = re.sub(' \([\w\-\.\s]+\)', '', text) + indent = self.indent + print '%s<function link="%s" name="%s"/>' % (' ' * indent, new_href, text) + + def start_dl(self, attrs): + if self.last_text: + # Looks like we found the second part to a command + self.indented = True + + def end_dl(self): + self.indented = False + + def start_dt(self, attrs): + self.data = '' + self.nolink = True + + def end_dt(self): + if not self.active: + return + if self.nolink == True: + # Looks like we found the first part to a command + self.last_text = re.sub(' \([\w\-\.\s]+\)', '', self.data) + self.nolink = False + + def start_h2(self, attrs): + for k, v in attrs: + if k == 'id': + self.header = v + if v == '_': + self.active = True + + def start_td(self, attrs): + self.indented = False + self.last_text = '' + + def start_table(self, attrs): + pass + + def end_table(self): + if self.header == self.last_letter: + self.active = False + + def start_a(self, attrs): + self.nolink = False + self.link = {} + for attr in attrs: + self.link[attr[0]] = attr[1] + self.data = '' + + def end_a(self): + text = self.data.replace('\t', '').replace('\n', ' ').replace('&', '&').replace('<', '<').replace('>', '>') + self.link['text'] = text + # handle a tag without href attribute + try: + href = self.link['href'] + except KeyError: + return + self.process_link() + + def handle_data(self, data): + self.data += data + +def main(): + base = sys.argv[1] + fn = sys.argv[2] + + parser = PyHTMLParser(formatter.NullFormatter(), base, fn, indent=0) + print '<?xml version="1.0" encoding="iso-8859-1"?>' + print '<book title="PySide %s Documentation" name="PySide" version="%s" link="index.html">' % (sys.argv[3], sys.argv[3]) + print '<chapters>' + parser.parse_file(fn) + print '</chapters>' + + print '<functions>' + + fn = 'genindex.html' + parser = PyIdxHTMLParser(formatter.NullFormatter(), base, fn, indent=1) + text = file(base + '/' + fn).read() + parser.feed(text) + parser.close() + + print '</functions>' + print '</book>' + +main() + |