1 files changed, 56 insertions, 46 deletions
diff --git a/chromium/third_party/WebKit/Source/build/scripts/make_element_lookup_trie.py b/chromium/third_party/WebKit/Source/build/scripts/make_element_lookup_trie.py
index ec186fd3093..b01042dd21d 100755
--- a/chromium/third_party/WebKit/Source/build/scripts/make_element_lookup_trie.py
+++ b/chromium/third_party/WebKit/Source/build/scripts/make_element_lookup_trie.py
@@ -27,51 +27,63 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+from itertools import groupby, islice
 import sys
-import StringIO
-from collections import defaultdict
 
 import in_generator
 import template_expander
 
+PARAMETER_NAME = 'data'
+
+
+def _trie(tags, index):
+    """Make a trie from list of tags, starting at index.
+
+    Resulting trie is partly space-optimized (semi-radix tree): once have only
+    one string left, compact the entire branch to one leaf node.
+    However, does not compact branch nodes with a single child. (FIXME)
+
+    Returns:
+        (char, subtrie, tag, conditions): (char, trie, str, list)
+            code generation differs between branch nodes and leaf nodes,
+            hence need different data for each.
+
+    Arguments:
+        tags: sorted list
+            (sorted needed by groupby, list needed by len)
+        index: index at which to branch
+            (assumes prior to this index strings have a common prefix)
+    """
+    def trie_node(char, subtags_iter):
+        # Pass in |char| so we can include in same tuple without unpacking
+        subtags = list(subtags_iter)  # need list for len
+        if len(subtags) == 1:  # terminal node, no subtrie
+            subtrie = None
+            tag = subtags[0]
+            conditions = _conditions(tag, index + 1)
+        else:
+            subtrie = _trie(subtags, index + 1)
+            tag = None
+            conditions = None
+        return char, subtrie, tag, conditions
 
-def _char_dict(tags, index):
-    new_dict = defaultdict(list)
-    for tag in tags:
-        if index >= len(tag):
-            continue
-        new_dict[tag[index].lower()].append(tag)
-    return new_dict
+    # Group by char at index
+    def char_at_index(tag):
+        return tag[index].lower()
 
+    char_subtags = ((k, g) for k, g in groupby(tags, char_at_index))
 
-def _generate_if(name, index):
-    conditions = []
-    for i in range(index, len(name)):
-        conditions.append("data[%d] == '%c'" % (i, name[i].lower()))
-    return "if (%s)\n" % " && ".join(conditions)
+    # FIXME: if all subtags have a common prefix, merge with child
+    # and skip the switch in the generated code
 
+    return (trie_node(char, subtags) for char, subtags in char_subtags)
 
-def _print_switch(buf, tag_list, index):
-    indent = '    ' * (index + 2)
-    data = _char_dict(tag_list, index)
-    # FIXME: No need to switch if there's only a single item in the data dict
-    buf.write(indent + 'switch (data[%d]) {\n' % index)
-    for char, tags in data.iteritems():
-        buf.write(indent + "case '%c':\n" % char)
-        if len(tags) > 1:
-            _print_switch(buf, tags, index + 1)
-        else:
-            tag = tags[0]
-            retval = indent + "    return %sTag.localName().impl();\n" % tag
-            if len(tag) == index + 1:
-                buf.write(retval)
-            else:
-                buf.write(indent + '    ' + _generate_if(tag, index + 1))
-                buf.write('    ' + retval)
-                buf.write(indent + "    return 0;\n")
 
-    buf.write(indent + '}\n')
-    buf.write(indent + "return 0;\n")
+def _conditions(tag, index):
+    # boolean conditions to check suffix; corresponds to compacting branch
+    # with a single leaf
+    return ["%s[%d] == '%c'" % (PARAMETER_NAME, i, c.lower())
+            for i, c in islice(enumerate(tag), index, None)]
 
 
 class ElementLookupTrieWriter(in_generator.Writer):
@@ -99,8 +111,8 @@ class ElementLookupTrieWriter(in_generator.Writer):
         self._tags = [entry['name'] for entry in self.in_file.name_dictionaries]
         self._namespace = self.in_file.parameters['namespace'].strip('"')
         self._outputs = {
-            (self._namespace + "ElementLookupTrie.h"): self.generate_header,
-            (self._namespace + "ElementLookupTrie.cpp"): self.generate_implementation,
+            (self._namespace + 'ElementLookupTrie.h'): self.generate_header,
+            (self._namespace + 'ElementLookupTrie.cpp'): self.generate_implementation,
         }
 
     @template_expander.use_jinja('ElementLookupTrie.h.tmpl')
@@ -111,19 +123,17 @@ class ElementLookupTrieWriter(in_generator.Writer):
 
     @template_expander.use_jinja('ElementLookupTrie.cpp.tmpl')
     def generate_implementation(self):
-        size_dict = defaultdict(list)
-        for tag in self._tags:
-            size_dict[len(tag)].append(tag)
-
-        buf = StringIO.StringIO()
-        for length, tags in size_dict.iteritems():
-            buf.write("    case %d:\n" % length)
-            _print_switch(buf, tags, 0)
+        # First sort, so groupby works
+        self._tags.sort(key=lambda tag: (len(tag), tag))
+        # Group tags by length
+        length_tags = ((k, g) for k, g in groupby(self._tags, len))
+
         return {
             'namespace': self._namespace,
-            'body': buf.getvalue(),
+            'length_tries': ((length, _trie(tags, 0))
+                             for length, tags in length_tags),
         }
 
 
-if __name__ == "__main__":
+if __name__ == '__main__':
     in_generator.Maker(ElementLookupTrieWriter).main(sys.argv)