1 files changed, 79 insertions, 38 deletions
diff --git a/chromium/tools/grit/grit/format/data_pack.py b/chromium/tools/grit/grit/format/data_pack.py
index 0cdbbd8d341..02616c39d40 100755
--- a/chromium/tools/grit/grit/format/data_pack.py
+++ b/chromium/tools/grit/grit/format/data_pack.py
@@ -3,9 +3,9 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
-'''Support for formatting a data pack file used for platform agnostic resource
+"""Support for formatting a data pack file used for platform agnostic resource
 files.
-'''
+"""
 
 import collections
 import exceptions
@@ -19,7 +19,6 @@ from grit import util
 from grit.node import include
 from grit.node import message
 from grit.node import structure
-from grit.node import misc
 
 
 PACK_FILE_VERSION = 4
@@ -37,7 +36,7 @@ DataPackContents = collections.namedtuple(
 
 
 def Format(root, lang='en', output_dir='.'):
-  '''Writes out the data pack file format (platform agnostic resource file).'''
+  """Writes out the data pack file format (platform agnostic resource file)."""
   data = {}
   for node in root.ActiveDescendants():
     with node:
@@ -55,9 +54,9 @@ def ReadDataPack(input_file):
   original_data = data
 
   # Read the header.
-  version, num_entries, encoding = struct.unpack("<IIB", data[:HEADER_LENGTH])
+  version, num_entries, encoding = struct.unpack('<IIB', data[:HEADER_LENGTH])
   if version != PACK_FILE_VERSION:
-    print "Wrong file version in ", input_file
+    print 'Wrong file version in ', input_file
     raise WrongFileVersion
 
   resources = {}
@@ -68,22 +67,21 @@ def ReadDataPack(input_file):
   data = data[HEADER_LENGTH:]
   kIndexEntrySize = 2 + 4  # Each entry is a uint16 and a uint32.
   for _ in range(num_entries):
-    id, offset = struct.unpack("<HI", data[:kIndexEntrySize])
+    id, offset = struct.unpack('<HI', data[:kIndexEntrySize])
     data = data[kIndexEntrySize:]
-    next_id, next_offset = struct.unpack("<HI", data[:kIndexEntrySize])
+    next_id, next_offset = struct.unpack('<HI', data[:kIndexEntrySize])
     resources[id] = original_data[offset:next_offset]
 
   return DataPackContents(resources, encoding)
 
 
 def WriteDataPackToString(resources, encoding):
-  """Write a map of id=>data into a string in the data pack format and return
-  it."""
+  """Returns a string with a map of id=>data in the data pack format."""
   ids = sorted(resources.keys())
   ret = []
 
   # Write file header.
-  ret.append(struct.pack("<IIB", PACK_FILE_VERSION, len(ids), encoding))
+  ret.append(struct.pack('<IIB', PACK_FILE_VERSION, len(ids), encoding))
   HEADER_LENGTH = 2 * 4 + 1            # Two uint32s and one uint8.
 
   # Each entry is a uint16 + a uint32s. We have one extra entry for the last
@@ -93,10 +91,10 @@ def WriteDataPackToString(resources, encoding):
   # Write index.
   data_offset = HEADER_LENGTH + index_length
   for id in ids:
-    ret.append(struct.pack("<HI", id, data_offset))
+    ret.append(struct.pack('<HI', id, data_offset))
     data_offset += len(resources[id])
 
-  ret.append(struct.pack("<HI", 0, data_offset))
+  ret.append(struct.pack('<HI', 0, data_offset))
 
   # Write data.
   for id in ids:
@@ -105,39 +103,82 @@ def WriteDataPackToString(resources, encoding):
 
 
 def WriteDataPack(resources, output_file, encoding):
-  """Write a map of id=>data into output_file as a data pack."""
+  """Writes a map of id=>data into output_file as a data pack."""
   content = WriteDataPackToString(resources, encoding)
-  with open(output_file, "wb") as file:
+  with open(output_file, 'wb') as file:
     file.write(content)
 
 
-def RePack(output_file, input_files):
-  """Write a new data pack to |output_file| based on a list of filenames
-  (|input_files|)"""
+def RePack(output_file, input_files, whitelist_file=None):
+  """Write a new data pack file by combining input pack files.
+
+  Args:
+      output_file: path to the new data pack file.
+      input_files: a list of paths to the data pack files to combine.
+      whitelist_file: path to the file that contains the list of resource IDs
+                      that should be kept in the output file or None to include
+                      all resources.
+
+  Raises:
+      KeyError: if there are duplicate keys or resource encoding is
+      inconsistent.
+  """
+  input_data_packs = [ReadDataPack(filename) for filename in input_files]
+  whitelist = None
+  if whitelist_file:
+    whitelist = util.ReadFile(whitelist_file, util.RAW_TEXT).strip().split('\n')
+    whitelist = set(map(int, whitelist))
+  resources, encoding = RePackFromDataPackStrings(input_data_packs, whitelist)
+  WriteDataPack(resources, output_file, encoding)
+
+
+def RePackFromDataPackStrings(inputs, whitelist):
+  """Returns a data pack string that combines the resources from inputs.
+
+  Args:
+      inputs: a list of data pack strings that need to be combined.
+      whitelist: a list of resource IDs that should be kept in the output string
+                 or None to include all resources.
+
+  Returns:
+      DataPackContents: a tuple containing the new combined data pack and its
+                        encoding.
+
+  Raises:
+      KeyError: if there are duplicate keys or resource encoding is
+      inconsistent.
+  """
   resources = {}
   encoding = None
-  for filename in input_files:
-    new_content = ReadDataPack(filename)
-
+  for content in inputs:
     # Make sure we have no dups.
-    duplicate_keys = set(new_content.resources.keys()) & set(resources.keys())
-    if len(duplicate_keys) != 0:
-      raise exceptions.KeyError("Duplicate keys: " + str(list(duplicate_keys)))
+    duplicate_keys = set(content.resources.keys()) & set(resources.keys())
+    if duplicate_keys:
+      raise exceptions.KeyError('Duplicate keys: ' + str(list(duplicate_keys)))
 
     # Make sure encoding is consistent.
     if encoding in (None, BINARY):
-      encoding = new_content.encoding
-    elif new_content.encoding not in (BINARY, encoding):
-        raise exceptions.KeyError("Inconsistent encodings: " +
-                                  str(encoding) + " vs " +
-                                  str(new_content.encoding))
-
-    resources.update(new_content.resources)
+      encoding = content.encoding
+    elif content.encoding not in (BINARY, encoding):
+      raise exceptions.KeyError('Inconsistent encodings: ' + str(encoding) +
+                                ' vs ' + str(content.encoding))
+
+    if whitelist:
+      whitelisted_resources = dict([(key, content.resources[key])
+                                    for key in content.resources.keys()
+                                    if key in whitelist])
+      resources.update(whitelisted_resources)
+      removed_keys = [key for key in content.resources.keys()
+                      if key not in whitelist]
+      for key in removed_keys:
+        print 'RePackFromDataPackStrings Removed Key:', key
+    else:
+      resources.update(content.resources)
 
   # Encoding is 0 for BINARY, 1 for UTF8 and 2 for UTF16
   if encoding is None:
     encoding = BINARY
-  WriteDataPack(resources, output_file, encoding)
+  return DataPackContents(resources, encoding)
 
 
 # Temporary hack for external programs that import data_pack.
@@ -157,14 +198,14 @@ def main():
     data = ReadDataPack(sys.argv[1])
     print data.encoding
     for (resource_id, text) in data.resources.iteritems():
-      print "%s: %s" % (resource_id, text)
+      print '%s: %s' % (resource_id, text)
   else:
     # Just write a simple file.
-    data = { 1: "", 4: "this is id 4", 6: "this is id 6", 10: "" }
-    WriteDataPack(data, "datapack1.pak", UTF8)
-    data2 = { 1000: "test", 5: "five" }
-    WriteDataPack(data2, "datapack2.pak", UTF8)
-    print "wrote datapack1 and datapack2 to current directory."
+    data = {1: '', 4: 'this is id 4', 6: 'this is id 6', 10: ''}
+    WriteDataPack(data, 'datapack1.pak', UTF8)
+    data2 = {1000: 'test', 5: 'five'}
+    WriteDataPack(data2, 'datapack2.pak', UTF8)
+    print 'wrote datapack1 and datapack2 to current directory.'
 
 
 if __name__ == '__main__':