summaryrefslogtreecommitdiffstats
path: root/chromium/third_party/catapult/common/py_vulcanize/py_vulcanize/parse_html_deps.py
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/third_party/catapult/common/py_vulcanize/py_vulcanize/parse_html_deps.py')
-rw-r--r--chromium/third_party/catapult/common/py_vulcanize/py_vulcanize/parse_html_deps.py283
1 files changed, 283 insertions, 0 deletions
diff --git a/chromium/third_party/catapult/common/py_vulcanize/py_vulcanize/parse_html_deps.py b/chromium/third_party/catapult/common/py_vulcanize/py_vulcanize/parse_html_deps.py
new file mode 100644
index 00000000000..4a0888ca1dc
--- /dev/null
+++ b/chromium/third_party/catapult/common/py_vulcanize/py_vulcanize/parse_html_deps.py
@@ -0,0 +1,283 @@
+# Copyright (c) 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import os
+import sys
+
+from py_vulcanize import js_utils
+from py_vulcanize import module
+from py_vulcanize import strip_js_comments
+from py_vulcanize import html_generation_controller
+
+
+def _AddToPathIfNeeded(path):
+ if path not in sys.path:
+ sys.path.insert(0, path)
+
+
+def _InitBeautifulSoup():
+ catapult_path = os.path.abspath(
+ os.path.join(os.path.dirname(__file__),
+ os.path.pardir, os.path.pardir, os.path.pardir))
+ bs_path = os.path.join(catapult_path, 'third_party', 'beautifulsoup4')
+ _AddToPathIfNeeded(bs_path)
+
+ html5lib_path = os.path.join(catapult_path, 'third_party', 'html5lib-python')
+ _AddToPathIfNeeded(html5lib_path)
+
+ six_path = os.path.join(catapult_path, 'third_party', 'six')
+ _AddToPathIfNeeded(six_path)
+
+
+_InitBeautifulSoup()
+import bs4
+
+class Script(object):
+
+ def __init__(self, soup):
+ if not soup:
+ raise module.DepsException('Script object created without soup')
+ self._soup = soup
+
+ def AppendJSContentsToFile(self, f, *args, **kwargs):
+ raise NotImplementedError()
+
+class InlineScript(Script):
+
+ def __init__(self, soup):
+ super(InlineScript, self).__init__(soup)
+ self._stripped_contents = None
+ self._open_tags = None
+ self.is_external = False
+
+ @property
+ def contents(self):
+ return unicode(self._soup.string)
+
+ @property
+ def stripped_contents(self):
+ if not self._stripped_contents:
+ self._stripped_contents = strip_js_comments.StripJSComments(
+ self.contents)
+ return self._stripped_contents
+
+ @property
+ def open_tags(self):
+ if self._open_tags:
+ return self._open_tags
+ open_tags = []
+ cur = self._soup.parent
+ while cur:
+ if isinstance(cur, bs4.BeautifulSoup):
+ break
+
+ open_tags.append(_Tag(cur.name, cur.attrs))
+ cur = cur.parent
+
+ open_tags.reverse()
+ assert open_tags[-1].tag == 'script'
+ del open_tags[-1]
+
+ self._open_tags = open_tags
+ return self._open_tags
+
+ def AppendJSContentsToFile(self, f, *args, **kwargs):
+ js = self.contents
+ escaped_js = js_utils.EscapeJSIfNeeded(js)
+ f.write(escaped_js)
+ f.write('\n')
+
+class ExternalScript(Script):
+
+ def __init__(self, soup):
+ super(ExternalScript, self).__init__(soup)
+ if 'src' not in soup.attrs:
+ raise Exception("{0} is not an external script.".format(soup))
+ self.is_external = True
+ self._loaded_raw_script = None
+
+ @property
+ def loaded_raw_script(self):
+ if self._loaded_raw_script:
+ return self._loaded_raw_script
+
+ return None
+
+ @loaded_raw_script.setter
+ def loaded_raw_script(self, value):
+ self._loaded_raw_script = value
+
+ @property
+ def src(self):
+ return self._soup.attrs['src']
+
+ def AppendJSContentsToFile(self,
+ f,
+ use_include_tags_for_scripts,
+ dir_for_include_tag_root):
+ raw_script = self.loaded_raw_script
+ if not raw_script:
+ return
+
+ if use_include_tags_for_scripts:
+ rel_filename = os.path.relpath(raw_script.filename,
+ dir_for_include_tag_root)
+ f.write("""<include src="%s">\n""" % rel_filename)
+ else:
+ f.write(js_utils.EscapeJSIfNeeded(raw_script.contents))
+ f.write('\n')
+
+def _CreateSoupWithoutHeadOrBody(html):
+ soupCopy = bs4.BeautifulSoup(html, 'html5lib')
+ soup = bs4.BeautifulSoup()
+ soup.reset()
+ if soupCopy.head:
+ for n in soupCopy.head.contents:
+ n.extract()
+ soup.append(n)
+ if soupCopy.body:
+ for n in soupCopy.body.contents:
+ n.extract()
+ soup.append(n)
+ return soup
+
+
+class HTMLModuleParserResults(object):
+
+ def __init__(self, html):
+ self._soup = bs4.BeautifulSoup(html, 'html5lib')
+ self._inline_scripts = None
+ self._scripts = None
+
+ @property
+ def scripts_external(self):
+ tags = self._soup.findAll('script', src=True)
+ return [t['src'] for t in tags]
+
+ @property
+ def inline_scripts(self):
+ if not self._inline_scripts:
+ tags = self._soup.findAll('script', src=None)
+ self._inline_scripts = [InlineScript(t.string) for t in tags]
+ return self._inline_scripts
+
+ @property
+ def scripts(self):
+ if not self._scripts:
+ self._scripts = []
+ script_elements = self._soup.findAll('script')
+ for element in script_elements:
+ if 'src' in element.attrs:
+ self._scripts.append(ExternalScript(element))
+ else:
+ self._scripts.append(InlineScript(element))
+ return self._scripts
+
+ @property
+ def imports(self):
+ tags = self._soup.findAll('link', rel='import')
+ return [t['href'] for t in tags]
+
+ @property
+ def stylesheets(self):
+ tags = self._soup.findAll('link', rel='stylesheet')
+ return [t['href'] for t in tags]
+
+ @property
+ def inline_stylesheets(self):
+ tags = self._soup.findAll('style')
+ return [unicode(t.string) for t in tags]
+
+ def YieldHTMLInPieces(self, controller, minify=False):
+ yield self.GenerateHTML(controller, minify)
+
+ def GenerateHTML(self, controller, minify=False, prettify=False):
+ soup = _CreateSoupWithoutHeadOrBody(unicode(self._soup))
+
+ # Remove declaration.
+ for x in soup.contents:
+ if isinstance(x, bs4.Doctype):
+ x.extract()
+
+ # Remove declaration.
+ for x in soup.contents:
+ if isinstance(x, bs4.Declaration):
+ x.extract()
+
+ # Remove all imports.
+ imports = soup.findAll('link', rel='import')
+ for imp in imports:
+ imp.extract()
+
+ # Remove all script links.
+ scripts_external = soup.findAll('script', src=True)
+ for script in scripts_external:
+ script.extract()
+
+ # Remove all in-line scripts.
+ scripts_external = soup.findAll('script', src=None)
+ for script in scripts_external:
+ script.extract()
+
+ # Process all in-line styles.
+ inline_styles = soup.findAll('style')
+ for style in inline_styles:
+ html = controller.GetHTMLForInlineStylesheet(unicode(style.string))
+ if html:
+ ns = soup.new_tag('style')
+ ns.append(bs4.NavigableString(html))
+ style.replaceWith(ns)
+ else:
+ style.extract()
+
+ # Rewrite all external stylesheet hrefs or remove, as needed.
+ stylesheet_links = soup.findAll('link', rel='stylesheet')
+ for stylesheet_link in stylesheet_links:
+ html = controller.GetHTMLForStylesheetHRef(stylesheet_link['href'])
+ if html:
+ tmp = bs4.BeautifulSoup(html, 'html5lib').findAll('style')
+ assert len(tmp) == 1
+ stylesheet_link.replaceWith(tmp[0])
+ else:
+ stylesheet_link.extract()
+
+ # Remove comments if minifying.
+ if minify:
+ comments = soup.findAll(
+ text=lambda text: isinstance(text, bs4.Comment))
+ for comment in comments:
+ comment.extract()
+ if prettify:
+ return soup.prettify('utf-8').strip()
+
+ # We are done.
+ return unicode(soup).strip()
+
+ @property
+ def html_contents_without_links_and_script(self):
+ return self.GenerateHTML(
+ html_generation_controller.HTMLGenerationController())
+
+
+class _Tag(object):
+
+ def __init__(self, tag, attrs):
+ self.tag = tag
+ self.attrs = attrs
+
+ def __repr__(self):
+ attr_string = ' '.join('%s="%s"' % (x[0], x[1]) for x in self.attrs)
+ return '<%s %s>' % (self.tag, attr_string)
+
+
+class HTMLModuleParser():
+
+ def Parse(self, html):
+ if html is None:
+ html = ''
+ else:
+ if html.find('< /script>') != -1:
+ raise Exception('Escape script tags with <\/script>')
+
+ return HTMLModuleParserResults(html)