1 files changed, 181 insertions, 0 deletions
diff --git a/scripts/gettopchanges.py b/scripts/gettopchanges.py
new file mode 100644
index 0000000..1b7282a
--- /dev/null
+++ b/scripts/gettopchanges.py
@@ -0,0 +1,181 @@
+import sys, json, calendar, time
+from dbaccess import execQuery, database
+from misc import (
+    textToId, idToText, getContext, getTimestampFromContext, getSnapshots,
+    benchmarkToComponents, printJSONHeader)
+
+
+
+# Gets the top changes for a specific context by considering only the
+# last change in each time series.
+#
+# An additional filter is applied by only considering results from
+# test cases matching test_case_ids.
+#
+# NOTE: The result may contain at most one change from a given time series.
+def getTopChangesForContext_last(
+    host_id, platform_id, branch_id, regressions, premature, limit,
+    test_case_ids):
+
+    query = (
+        "SELECT change.benchmarkId, change.metricId, sha1Id, last_timestamp, ")
+    query += ("greatest(score, premature_score)" if premature else "score")
+    query += (
+        "    AS final_score"
+        "  FROM"
+        "   (SELECT benchmarkId, metricId, max(timestamp) AS last_timestamp"
+        "      FROM change"
+        "     WHERE hostId = %s"
+        "       AND platformId = %s"
+        "       AND branchId = %s"
+        "       AND regression = %s")
+    args = [host_id, platform_id, branch_id, regressions]
+
+    if len(test_case_ids) > 0:
+        query += (
+            "   AND testCaseId IN (%s" + ", %s"*(len(test_case_ids) - 1) + ")"
+            )
+        args += test_case_ids
+
+    query += (
+        "     GROUP BY benchmarkId, metricId) AS last_change"
+        " , change"
+        " WHERE last_change.benchmarkId = change.benchmarkId"
+        "   AND last_change.metricId = change.metricId"
+        "   AND change.timestamp = last_timestamp"
+        " ORDER BY final_score DESC, last_timestamp DESC"
+        " LIMIT %s"
+        )
+    args.append(limit)
+
+    return execQuery(query, args)
+
+
+# Gets the top changes for a specific context by considering all
+# changes in the given time scope.
+#
+# An additional filter is applied by only considering results from
+# test cases matching test_case_ids.
+#
+# NOTE: The result may contain any number of changes from a given time
+# series.
+def getTopChangesForContext_timeScope(
+    host_id, platform_id, branch_id, regressions, premature, limit,
+    test_case_ids, lo_timestamp):
+
+    query = "SELECT benchmarkId, metricId, sha1Id, timestamp, "
+    query += ("greatest(score, premature_score)" if premature else "score")
+    query += (
+        "    AS final_score"
+        "  FROM change"
+        " WHERE hostId = %s"
+        "   AND platformId = %s"
+        "   AND branchId = %s"
+        "   AND regression = %s"
+        "   AND timestamp >= %s")
+    args = [host_id, platform_id, branch_id, regressions, lo_timestamp]
+
+    if len(test_case_ids) > 0:
+        query += (
+            "   AND testCaseId IN (%s" + ", %s"*(len(test_case_ids) - 1) + ")"
+            )
+        args += test_case_ids
+
+    query += (
+        " ORDER BY final_score DESC, timestamp DESC"
+        " LIMIT %s"
+        )
+    args.append(limit)
+
+    return execQuery(query, args)
+
+
+# Returns test case IDs corresponding to the names in test_case_filter.
+def getTestCaseIdsFromFilter(test_case_filter):
+    return (execQuery(
+        "SELECT id FROM testCase"
+        " WHERE value IN (%s" + ", %s"*(len(test_case_filter) - 1) + ")",
+        tuple(test_case_filter))
+            if ((test_case_filter != None) and (len(test_case_filter) > 0))
+            else ())
+
+
+class GetTopChanges:
+
+    def __init__(
+        self, test_case_filter, regressions, last, timescope, premature, limit):
+        self.test_case_filter = test_case_filter
+        self.regressions = regressions
+        self.last = last
+        self.timescope = timescope
+        self.premature = premature
+        self.limit = limit
+
+
+    # Gets the top changes for a specific context.
+    def getTopChangesForContext(
+        self, host_id, platform_id, branch_id, lo_timestamp):
+
+        test_case_ids = getTestCaseIdsFromFilter(self.test_case_filter)
+
+        return (
+            getTopChangesForContext_last(
+                host_id, platform_id, branch_id, self.regressions,
+                self.premature, self.limit, test_case_ids)
+            if self.last else
+            getTopChangesForContext_timeScope(
+                host_id, platform_id, branch_id, self.regressions,
+                self.premature, self.limit, test_case_ids, lo_timestamp)
+            )
+
+
+    # Gets the top changes for all contexts.
+    def getTopChangesForAllContexts(self):
+
+        # Compute lowest timestamp (secs since 1970) in time scope (days ago):
+        curr_timestamp = calendar.timegm(time.gmtime())
+        secs_in_day = 24 * 60 * 60
+        lo_timestamp  = (
+            -1 if (self.timescope < 0) else
+             curr_timestamp - self.timescope * secs_in_day)
+
+        context_ids = execQuery(
+            "SELECT DISTINCT hostId, platformId, branchId FROM context "
+            "ORDER BY hostId, platformId, branchId", ())
+
+        contexts = []
+
+        for host_id, platform_id, branch_id in context_ids:
+            top_changes = self.getTopChangesForContext(
+                host_id, platform_id, branch_id, lo_timestamp)
+            contexts.append({
+                    "hostId": host_id,
+                    "platformId": platform_id,
+                    "branchId": branch_id,
+                    "topchanges": top_changes
+                    })
+
+        return contexts
+
+
+    def execute(self):
+        self.contexts = self.getTopChangesForAllContexts()
+        self.writeOutput()
+
+
+    def writeOutputAsJSON(self):
+        printJSONHeader()
+        json.dump({
+                'database': database(),
+                'regressions': self.regressions,
+                'last': self.last,
+                'timescope': self.timescope,
+                'premature': self.premature,
+                'limit': self.limit,
+                'contexts': self.contexts
+                }, sys.stdout)
+
+
+class GetTopChangesAsJSON(GetTopChanges):
+    def writeOutput(self):
+        self.writeOutputAsJSON()