Reapply ~"Bitcode: Collect all MDString records into a single blob"

Spiritually reapply commit r264409 (reverted in r264410), albeit with a bit of a redesign. Firstly, avoid splitting the big blob into multiple chunks of strings. r264409 imposed an arbitrary limit to avoid a massive allocation on the shared 'Record' SmallVector. The bug with that commit only reproduced when there were more than "chunk-size" strings. A test for this would have been useless long-term, since we're liable to adjust the chunk-size in the future. Thus, eliminate the motivation for chunk-ing by storing the string sizes in the blob. Here's the layout: vbr6: # of strings vbr6: offset-to-blob blob: [vbr6]: string lengths [char]: concatenated strings Secondly, make the output of llvm-bcanalyzer readable. I noticed when debugging r264409 that llvm-bcanalyzer was outputting a massive blob all in one line. Past a small number, the strings were impossible to split in my head, and the lines were way too long. This version adds support in llvm-bcanalyzer for pretty-printing. <STRINGS abbrevid=4 op0=3 op1=9/> num-strings = 3 { 'abc' 'def' 'ghi' } From the original commit: Inspired by Mehdi's similar patch, http://reviews.llvm.org/D18342, this should (a) slightly reduce bitcode size, since there is less record overhead, and (b) greatly improve reading speed, since blobs are super cheap to deserialize. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@264551 91177308-0d34-0410-b5e6-96231b3b80d8
author: Duncan P. N. Exon Smith <dexonsmith@apple.com> 2016-03-27 23:17:54 +0000
committer: Duncan P. N. Exon Smith <dexonsmith@apple.com> 2016-03-27 23:17:54 +0000
commit: a73e509bf8d4be01f6b78d350a652b634aa31455 (patch)
tree: 21eae48701c9a21522f7a777b719597562a37d68 /tools/llvm-bcanalyzer
parent: 15b99e03690cee6cbd2b415ee4033799f753353f (diff)
1 files changed, 55 insertions, 2 deletions
diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index 32179c168dea..6645045db8fa 100644
--- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -312,7 +312,8 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
   case bitc::METADATA_BLOCK_ID:
     switch(CodeID) {
     default:return nullptr;
-      STRINGIFY_CODE(METADATA, STRING)
+      STRINGIFY_CODE(METADATA, STRING_OLD)
+      STRINGIFY_CODE(METADATA, STRINGS)
       STRINGIFY_CODE(METADATA, NAME)
       STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK
       STRINGIFY_CODE(METADATA, NODE)
@@ -404,6 +405,57 @@ static bool Error(const Twine &Err) {
   return true;
 }
 
+static bool decodeMetadataStringsBlob(BitstreamReader &Reader, StringRef Indent,
+                                      ArrayRef<uint64_t> Record,
+                                      StringRef Blob) {
+  if (Blob.empty())
+    return true;
+
+  if (Record.size() != 2)
+    return true;
+
+  unsigned NumStrings = Record[0];
+  unsigned StringsOffset = Record[1];
+  outs() << " num-strings = " << NumStrings << " {\n";
+
+  StringRef Lengths = Blob.slice(0, StringsOffset);
+  SimpleBitstreamCursor R(Reader);
+  R.jumpToPointer(Lengths.begin());
+
+  // Ensure that Blob doesn't get invalidated, even if this is reading from a
+  // StreamingMemoryObject with corrupt data.
+  R.setArtificialByteLimit(R.getCurrentByteNo() + StringsOffset);
+
+  StringRef Strings = Blob.drop_front(StringsOffset);
+  do {
+    if (R.AtEndOfStream())
+      return Error("bad length");
+
+    unsigned Size = R.ReadVBR(6);
+    if (Strings.size() < Size)
+      return Error("truncated chars");
+
+    outs() << Indent << "    '";
+    outs().write_escaped(Strings.slice(0, Size), /*hex=*/true);
+    outs() << "'\n";
+    Strings = Strings.drop_front(Size);
+  } while (--NumStrings);
+
+  outs() << Indent << "  }";
+  return false;
+}
+
+static bool decodeBlob(unsigned Code, unsigned BlockID, BitstreamReader &Reader,
+                       StringRef Indent, ArrayRef<uint64_t> Record,
+                       StringRef Blob) {
+  if (BlockID != bitc::METADATA_BLOCK_ID)
+    return true;
+  if (Code != bitc::METADATA_STRINGS)
+    return true;
+
+  return decodeMetadataStringsBlob(Reader, Indent, Record, Blob);
+}
+
 /// ParseBlock - Read a block, updating statistics, etc.
 static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
                        unsigned IndentLevel, CurStreamTypeType CurStreamType) {
@@ -557,7 +609,8 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
         }
       }
 
-      if (Blob.data()) {
+      if (Blob.data() && decodeBlob(Code, BlockID, *Stream.getBitStreamReader(),
+                                    Indent, Record, Blob)) {
         outs() << " blob data = ";
         if (ShowBinaryBlobs) {
           outs() << "'";
author	Duncan P. N. Exon Smith <dexonsmith@apple.com>	2016-03-27 23:17:54 +0000
committer	Duncan P. N. Exon Smith <dexonsmith@apple.com>	2016-03-27 23:17:54 +0000
commit	a73e509bf8d4be01f6b78d350a652b634aa31455 (patch)
tree	21eae48701c9a21522f7a777b719597562a37d68 /tools/llvm-bcanalyzer
parent	15b99e03690cee6cbd2b415ee4033799f753353f (diff)