[analyzer] Faster hashing of subsequences in CompoundStmts.

Summary: This patches improves the hashing subsequences in CompoundStmts by incrementally hashing all subsequences with the same starting position. This results in a reduction of the time for this constraint while running over SQLite from 1.10 seconds to 0.55 seconds (-50%). Reviewers: NoQ Reviewed By: NoQ Subscribers: cfe-commits, xazax.hun, v.g.vassilev Differential Revision: https://reviews.llvm.org/D34364 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@307509 91177308-0d34-0410-b5e6-96231b3b80d8
author: Raphael Isemann <teemperor@gmail.com> 2017-07-09 21:14:36 +0000
committer: Raphael Isemann <teemperor@gmail.com> 2017-07-09 21:14:36 +0000
commit: 972c81b9ec5e4b5a19c18dc9a45bf95d5ba6c857 (patch)
tree: 5df8c1049ba6ed677d7a3ffc6dd03e62db2b09e3 /lib/Analysis/CloneDetection.cpp
parent: df9cbcd63ddad47689a52b003a7ef66791326b99 (diff)
1 files changed, 20 insertions, 9 deletions
diff --git a/lib/Analysis/CloneDetection.cpp b/lib/Analysis/CloneDetection.cpp
index e698d3e5c5..5ea74989a7 100644
--- a/lib/Analysis/CloneDetection.cpp
+++ b/lib/Analysis/CloneDetection.cpp
@@ -239,16 +239,27 @@ size_t RecursiveCloneTypeIIConstraint::saveHash(
   }
 
   if (CS) {
-    for (unsigned Length = 2; Length <= CS->size(); ++Length) {
-      for (unsigned Pos = 0; Pos <= CS->size() - Length; ++Pos) {
-        llvm::MD5 Hash;
-        for (unsigned i = Pos; i < Pos + Length; ++i) {
-          size_t ChildHash = ChildHashes[i];
-          Hash.update(StringRef(reinterpret_cast<char *>(&ChildHash),
-                                sizeof(ChildHash)));
+    // If we're in a CompoundStmt, we hash all possible combinations of child
+    // statements to find clones in those subsequences.
+    // We first go through every possible starting position of a subsequence.
+    for (unsigned Pos = 0; Pos < CS->size(); ++Pos) {
+      // Then we try all possible lengths this subsequence could have and
+      // reuse the same hash object to make sure we only hash every child
+      // hash exactly once.
+      llvm::MD5 Hash;
+      for (unsigned Length = 1; Length <= CS->size() - Pos; ++Length) {
+        // Grab the current child hash and put it into our hash. We do
+        // -1 on the index because we start counting the length at 1.
+        size_t ChildHash = ChildHashes[Pos + Length - 1];
+        Hash.update(
+            StringRef(reinterpret_cast<char *>(&ChildHash), sizeof(ChildHash)));
+        // If we have at least two elements in our subsequence, we can start
+        // saving it.
+        if (Length > 1) {
+          llvm::MD5 SubHash = Hash;
+          StmtsByHash.push_back(std::make_pair(
+              createHash(SubHash), StmtSequence(CS, D, Pos, Pos + Length)));
         }
-        StmtsByHash.push_back(std::make_pair(
-            createHash(Hash), StmtSequence(CS, D, Pos, Pos + Length)));
       }
     }
   }
author	Raphael Isemann <teemperor@gmail.com>	2017-07-09 21:14:36 +0000
committer	Raphael Isemann <teemperor@gmail.com>	2017-07-09 21:14:36 +0000
commit	972c81b9ec5e4b5a19c18dc9a45bf95d5ba6c857 (patch)
tree	5df8c1049ba6ed677d7a3ffc6dd03e62db2b09e3 /lib/Analysis/CloneDetection.cpp
parent	df9cbcd63ddad47689a52b003a7ef66791326b99 (diff)