Adapted database queries to new database structure

In the processed also preferred using string formatting operations and string.join(list) to manual formatting + concatenation + loops. Reviewed-By: jasplin
author: João Abecasis <joao.abecasis@nokia.com> 2011-01-31 19:30:42 +0100
committer: João Abecasis <joao.abecasis@nokia.com> 2011-02-02 14:30:33 +0100
commit: f1258bdebe13df730931b59d2bcbc09e00ba6ae1 (patch)
tree: ef98a5c37f1871a2e9e70f77280885062ae4270a
parent: ff258029e0b803096f2a63d1a91b55a69503500e (diff)
7 files changed, 140 insertions, 131 deletions
diff --git a/scripts/getresultdetails2.py b/scripts/getresultdetails2.py
index 8678b52..c80c15f 100644
--- a/scripts/getresultdetails2.py
+++ b/scripts/getresultdetails2.py
@@ -2,7 +2,7 @@ import sys
 import json
 
 from dbaccess import execQuery
-from misc import textToId, printJSONHeader
+from misc import textToId, printJSONHeader, getContext
 
 class GetResultDetails2:
 
@@ -43,26 +43,23 @@ class GetResultDetails2:
     # Fetches the sample (i.e. individual observations) for both contexts.
     @classmethod
     def fetchSample(cls, host, platform, branch, sha1, benchmark, metric):
+        context = getContext(host, platform, branch, sha1)
 
         sample = []
         for value, valid, timestamp in execQuery(
                 "SELECT value, valid, EXTRACT(EPOCH FROM startTime)::INT"
                 " FROM result, upload"
                 " WHERE uploadId = upload.id"
-                " AND hostId = %d"
-                " AND platformId = %d"
-                " AND branchId = %d"
-                " AND sha1Id = %d"
+                " AND contextId = %d"
                 " AND benchmarkId = %d"
                 " AND metricId = %d"
                 " ORDER BY value DESC;"
-                    % (host, platform, branch, sha1, benchmark, metric)):
+                    % (context, benchmark, metric)):
             sample.append({
                     'value' : value,
                     'valid' : valid,
                     'timestamp' : timestamp
                 })
-
         return sample
 
     def writeOutputAsJSON(self):
diff --git a/scripts/getstats1.py b/scripts/getstats1.py
index b6e6b93..8ed1cbc 100644
--- a/scripts/getstats1.py
+++ b/scripts/getstats1.py
@@ -1,7 +1,8 @@
 from dbaccess import execQuery, database
 from singlecontextbmstats import (
     SingleContextBMStats, extractSingleContextBMStats)
-from misc import idToText, textToId, benchmarkToComponents, printJSONHeader
+from misc import idToText, textToId, benchmarkToComponents, printJSONHeader,
+     getContext
 
 class GetStats1:
 
@@ -64,12 +65,13 @@ class GetStats1:
     # SingleContextBMStats objects.
     def computeBMStatsList(self):
 
+        # Get the context
+        context = getContext(self.host_id, self.platform_id,
+                self.branch_id, self.sha1_id)
+
         # Get all distinct benchmark/metric combinations matching the context:
-        bmark_metrics = execQuery(
-            "SELECT DISTINCT benchmarkId, metricId " +
-            "FROM result WHERE hostId = " + str(self.host_id) +
-            " AND platformId = " + str(self.platform_id) + " AND branchId = " +
-            str(self.branch_id) + " AND sha1Id = " + str(self.sha1_id) + ";")
+        bmark_metrics = execQuery("SELECT DISTINCT benchmarkId, metricId"
+            " FROM result WHERE contextId = %d;" % context)
 
         bmstats_list = []
 
diff --git a/scripts/getstats2.py b/scripts/getstats2.py
index 5c43430..2352e5a 100644
--- a/scripts/getstats2.py
+++ b/scripts/getstats2.py
@@ -72,20 +72,22 @@ class GetStats2:
     # context 1 and 2 respectively.
     def computeBMStatsList(self):
 
+        context1 = getContext(self.host1_id, self.platform1_id, self.branch1_id, self.sha1_id)
+        context2 = getContext(self.host2_id, self.platform2_id, self.branch2_id, self.sha2_id)
+
         # Get all distinct benchmark/metric combinations matching both contexts
         # into a 'benchmarks' list:
         bmark_metrics = execQuery(
-            "SELECT DISTINCT benchmarkId, metricId " +
-            "FROM result WHERE hostId = " + str(self.host1_id) +
-            " AND platformId = " + str(self.platform1_id) + " AND branchId = " +
-            str(self.branch1_id) + " AND sha1Id = " + str(self.sha11_id) +
-            " INTERSECT " +
-            "SELECT DISTINCT benchmarkId, metricId " +
-            "FROM result WHERE hostId = " + str(self.host2_id) +
-            " AND platformId = " + str(self.platform2_id) + " AND branchId = " +
-            str(self.branch2_id) + " AND sha1Id = " + str(self.sha12_id) +
-            #" LIMIT 10;"
-            ";"
+                "SELECT DISTINCT benchmarkId, metricId"
+                " FROM result"
+                " WHERE contextId = %d"
+                " INTERSECT "
+                "SELECT DISTINCT benchmarkId, metricId"
+                " FROM result"
+                " WHERE contextId = %d"
+                #" LIMIT 10"
+                ";"
+                    % (context1, context2)
             )
 
         bmstats_list = []
diff --git a/scripts/listcontexts.py b/scripts/listcontexts.py
index f6b1291..a72ae4a 100644
--- a/scripts/listcontexts.py
+++ b/scripts/listcontexts.py
@@ -2,25 +2,47 @@ import sys
 import json
 
 from dbaccess import execQuery
-from misc import idToText, getAllSnapshots, printJSONHeader
+from misc import idToText, printJSONHeader
 
 class ListContexts:
     def execute(self):
 
-        # Get all host/platform/branch combinations:
-        hpbs = execQuery(
-            "SELECT DISTINCT hostId, platformId, branchId FROM result " +
-            "ORDER BY hostId, platformId, branchId;")
+        # Get all host/platform/branch/sha1 combinations:
+        contexts = execQuery(
+                "SELECT id, hostId, platformId, branchId, sha1Id, EXTRACT(EPOCH FROM timestamp)::INT"
+                " FROM context"
+                " ORDER BY hostId, platformId, branchId, timestamp")
 
         self.contexts = []
-        for host, platform, branch in hpbs:
-            snapshots = getAllSnapshots(host, platform, branch, True)
-            self.contexts.append({
-                    'host' :        idToText('host', host),
-                    'platform' :    idToText('platform', platform),
-                    'branch' :      idToText('branch', branch),
-                    'snapshots' :   curSnapshots
-                })
+
+        if not len(contexts):
+            return
+
+        id, host, platform, branch, sha1, timestamp = contexts[0]
+
+        curHost, curPlatform, curBranch = host, platform, branch
+        curSnapshots = [(idToText('sha1', sha1), timestamp, id)]
+
+        for (id, host, platform, branch, sha1, timestamp) in contexts[1:]:
+            if (host, platform, branch) != (curHost, curPlatform, curBranch):
+                self.contexts.append({
+                        'host' :        idToText('host', curHost),
+                        'platform' :    idToText('platform', curPlatform),
+                        'branch' :      idToText('branch', curBranch),
+                        'snapshots' :   curSnapshots
+                    })
+
+                curHost, curPlatform, curBranch = host, platform, branch
+                curSnapshots = []
+
+            curSnapshots.append((idToText('sha1', sha1), timestamp, id))
+
+        self.contexts.append({
+                'host' :        idToText('host', curHost),
+                'platform' :    idToText('platform', curPlatform),
+                'branch' :      idToText('branch', curBranch),
+                'snapshots' :   curSnapshots
+            })
 
         self.writeOutput()
 
diff --git a/scripts/misc.py b/scripts/misc.py
index 3fed597..660887f 100644
--- a/scripts/misc.py
+++ b/scripts/misc.py
@@ -84,20 +84,23 @@ def benchmarkToComponents(benchmark):
 # Returns a chronologically order n-tuple of 2-tuples:
 #   (sha1, first upload timestamp).
 def getSnapshots(host_id, platform_id, branch_id, sha11_id, sha12_id):
-    minuid1 = execQuery(
-        "SELECT min(uploadId) FROM result WHERE hostId = " +
-        str(host_id) + " AND platformId = " + str(platform_id) +
-        " AND branchId = " + str(branch_id) + " AND sha1Id = " +
-        str(sha11_id) + ";")[0][0]
-    minuid2 = execQuery(
-        "SELECT min(uploadId) FROM result WHERE hostId = " +
-        str(host_id) + " AND platformId = " + str(platform_id) +
-        " AND branchId = " + str(branch_id) + " AND sha1Id = " +
-        str(sha12_id) + ";")[0][0]
+    timestamp1 = execQuery(
+        "SELECT timestamp FROM context " +
+        "WHERE hostId = " + str(host_id) +
+        " AND platformId = " + str(platform_id) +
+        " AND branchId = " + str(branch_id) +
+        " AND sha1Id = " + str(sha11_id) + ";")[0][0]
+
+    timestamp2 = execQuery(
+        "SELECT timestamp FROM context " +
+        "WHERE hostId = " + str(host_id) +
+        " AND platformId = " + str(platform_id) +
+        " AND branchId = " + str(branch_id) +
+        " AND sha1Id = " + str(sha12_id) + ";")[0][0]
 
     # Ensure chronological order:
-    if minuid1 > minuid2:
-        minuid1, minuid2 = minuid2, minuid1
+    if timestamp1 > timestamp2:
+        timestamp1, timestamp2 = timestamp2, timestamp1
 
     # Each distinct SHA-1 that occurs for this host/platform/branch
     # combination may occur multiple times with different upload times.
@@ -106,16 +109,14 @@ def getSnapshots(host_id, platform_id, branch_id, sha11_id, sha12_id):
     # order of the uploadId attributes is consistent with the order
     # of their corresponding startTime attributes):
     snapshots = execQuery(
-        "SELECT sha1Id, extract(epoch FROM startTime)::int AS " +
-        "firstUploadTime FROM " +
-        "(SELECT sha1Id, min(uploadId) AS minuid FROM result " +
-        "WHERE hostId = " + str(host_id) +
-        " AND platformId = " + str(platform_id) +
-        " AND branchId = " + str(branch_id) +
-        " AND uploadId between " + str(minuid1) + " AND " + str(minuid2) +
-        " GROUP BY sha1Id) " +
-        "AS dummy, upload WHERE upload.id = minuid " +
-        "ORDER BY minuid ASC;")
+        "SELECT sha1Id, EXTRACT(EPOCH FROM timestamp)::INT AS "
+        "firstUploadTime FROM context "
+        "WHERE hostId = %d"
+        " AND platformId = %d"
+        " AND branchId = %d"
+        " AND timestamp BETWEEN %d AND %d"
+        " ORDER BY timestamp ASC;"
+            % (host_id, platform_id, branch_id, timestamp1, timestamp2))
 
     return tuple(snapshots)
 
@@ -133,15 +134,13 @@ def getAllSnapshots(host_id, platform_id, branch_id, reverse = False):
     # order of the uploadId attributes is consistent with the order
     # of their corresponding startTime attributes):
     snapshots = execQuery(
-        "SELECT sha1Id, extract(epoch FROM startTime)::int AS " +
-        "firstUploadTime FROM " +
-        "(SELECT sha1Id, min(uploadId) AS minuid FROM result " +
-        "WHERE hostId = " + str(host_id) +
-        " AND platformId = " + str(platform_id) +
-        " AND branchId = " + str(branch_id) +
-        " GROUP BY sha1Id) " +
-        "AS dummy, upload WHERE upload.id = minuid " +
-        "ORDER BY minuid " + ("DESC;" if reverse else "ASC;"))
+        "SELECT sha1Id, EXTRACT(EPOCH FROM timestamp)::INT AS firstUploadTime"
+        " FROM context"
+        " WHERE hostId = %d"
+        " AND platformId = %d"
+        " AND branchId = %d"
+            % (host_id, platform_id, branch_id) +
+        " ORDER BY timestamp " + ("DESC;" if reverse else "ASC;"))
 
     return tuple(snapshots)
 
@@ -205,26 +204,22 @@ def getLastRankingSnapshot(host_id, platform_id, branch_id, snapshots):
 def getTimeSeries(
     host_id, platform_id, branch_id, snapshots, benchmark_id, metric_id):
 
-    # Fetch raw values:
-    query = (
-        "SELECT value, valid, sha1id FROM result WHERE " +
-        " hostId = " + str(host_id) + " AND platformId = " +
-        str(platform_id) + " AND branchId = " + str(branch_id) +
-        " AND benchmarkId = " + str(benchmark_id) + " AND metricId = " +
-        str(metric_id) + " AND sha1id IN (")
-    first = True
-    for s in snapshots:
-        if not first:
-            query += ", "
-        else:
-            first = False
-        query += str(s[0])
-    query += ") ORDER BY sha1Id;"
+    contexts = []
+    for snapshot in snapshots:
+        contexts.append(getContext(host_id, platform_id, branch_id, snapshot))
 
-    raw_values = tuple(execQuery(query) + [(-1, -1, -1)]); # Note sentinel item
+    # Fetch raw values:
+    raw_values = execQuery(
+        "SELECT value, valid, contextId FROM result"
+        " WHERE contextId IN (%s)"
+        " AND benchmarkId = %d"
+        " AND metricId = %d"
+        " ORDER BY contextId;"
+            % (", ".join(map(str, contexts)), benchmark_id, metric_id)) +
+        [(-1, -1, -1)] # Note sentinel item
 
     # Compute per-sample stats:
-    curr_sha1_id = -1
+    curr_context_id = -1
     sample = []
     valid_and_positive_sample = []
     valid_median_obs = []
@@ -235,9 +230,9 @@ def getTimeSeries(
     rses = []
     tsitem_map = {}
     # Loop over all observations (which are grouped on sample;
-    # note the 1-1 correspondence between samples and SHA-1s):
-    for obs, valid, sha1_id in raw_values:
-        if sha1_id != curr_sha1_id:
+    # note the 1-1 correspondence between samples and contexts):
+    for obs, valid, context_id in raw_values:
+        if context_id != curr_context_id:
             # A new sample has been collected, so register it and
             # prepare for the next one:
             sample_size = len(sample)
@@ -256,7 +251,7 @@ def getTimeSeries(
                     except ZeroDivisionError:
                         pass
 
-            tsitem_map[curr_sha1_id] = (
+            tsitem_map[curr_context_id] = (
                 median_obs, sample_size, nrse, ninvalid, nzeros)
             sample = []
             valid_and_positive_sample = []
@@ -267,7 +262,7 @@ def getTimeSeries(
             tot_nzeros = tot_nzeros + nzeros
             nzeros = 0
 
-            curr_sha1_id = sha1_id
+            curr_context_id = context_id
 
         # Append observation to current sample:
         sample.append(obs)
@@ -284,10 +279,9 @@ def getTimeSeries(
     # Order chronologically:
     ts = []
     index = 0
-    for snapshot in snapshots:
-        sha1_id = snapshot[0]
-        if sha1_id in tsitem_map:
-            tsitem = tsitem_map[sha1_id]
+    for context in contexts:
+        if context in tsitem_map:
+            tsitem = tsitem_map[context]
             ts.append(
                 (index, tsitem[0], tsitem[1], tsitem[2], tsitem[3], tsitem[4]))
         index = index + 1
@@ -309,10 +303,10 @@ def getTimeSeries(
     else:
         rse_of_medians = -1
 
-    ms = len(snapshots) - len(ts)
+    ms = len(contexts) - len(ts)
 
     if len(ts) > 0:
-        lsd = (len(snapshots) - 1) - ts[-1][0]
+        lsd = (len(contexts) - 1) - ts[-1][0]
     else:
         lsd = -1
 
@@ -544,24 +538,16 @@ def getBMTimeSeriesStatsList(
     if progress_func != None:
         progress_func(0.0, progress_arg)
 
+    contexts = []
+    for snapshot in snapshots:
+        contexts.append(getContext(host_id, platform_id, branch_id, snapshot))
+
     # Get all distinct benchmark/metric combinations that match the
     # host/platform/branch context and are within the selected snapshot
     # interval. Each such combination corresponds to a time series.
-    query = (
-        "SELECT DISTINCT benchmarkId, metricId FROM result WHERE " +
-        " hostId = " + str(host_id) + " AND platformId = " +
-        str(platform_id) + " AND branchId = " + str(branch_id) +
-        " AND sha1id IN (")
-    first = True
-    for s in snapshots:
-        if not first:
-            query += ", "
-        else:
-            first = False
-        query += str(s[0])
-    query += ");"
-
-    bmark_metrics = execQuery(query);
+    bmark_metrics = execQuery("SELECT DISTINCT benchmarkId, metricId"
+            " FROM result WHERE contextId IN (%s);"
+                % ", ".join(map(str, contexts)))
 
     bmstats_list = []
 
diff --git a/scripts/singlecontextbmstats.py b/scripts/singlecontextbmstats.py
index fff0bcc..004c3b9 100644
--- a/scripts/singlecontextbmstats.py
+++ b/scripts/singlecontextbmstats.py
@@ -64,12 +64,12 @@ def extractSingleContextBMStats(
 
     values = []
 
-    query_result = execQuery(
-        "SELECT value, valid FROM result WHERE hostId = " + str(host_id) +
-        " AND platformId = " + str(platform_id) + " AND branchId = " +
-        str(branch_id) + " AND sha1Id = " + str(sha1_id) +
-        " AND benchmarkId = " + str(benchmark_id) + " AND metricId = " +
-        str(metric_id) + ";")
+    context = getContext(host_id, platform_id, branch_id)
+    query_result = execQuery("SELECT value, valid FROM result"
+        " WHERE contextId = %d"
+        " AND benchmarkId = %d"
+        " AND metricId = %d;"
+            % (context, benchmark_id, metric_id))
 
     for value, valid in query_result:
         if valid:
diff --git a/scripts/uploadresults.py b/scripts/uploadresults.py
index 2d7791b..cf929b5 100755
--- a/scripts/uploadresults.py
+++ b/scripts/uploadresults.py
@@ -3,7 +3,7 @@
 import sys
 from xml.dom.minidom import parse
 from dbaccess import setDatabase, execQuery, commit
-from misc import isValidSHA1
+from misc import isValidSHA1, getContext
 
 
 # === Global functions ===============================================
@@ -238,6 +238,14 @@ platformId = findOrInsertId("platform", platform)
 branchId = findOrInsertId("branch", branch)
 sha1Id = findOrInsertId("sha1", sha1)
 
+context = getContext(hostId, platformId, branchId, sha1Id)
+if context == -1:
+    context = execQuery("INSERT INTO context"
+            " (hostId, platformId, branchId, sha1Id)"
+            " VALUES (%d, %d, %d, %d)"
+            " RETURNING id;"
+                % (hostId, platformId, branchId, sha1Id))
+
 # Append rows to the 'result' table ...
 for result in results:
     benchmark = (
@@ -250,19 +258,11 @@ for result in results:
         result['lowerIsBetter'])
 
     query = (
-        "INSERT INTO result (hostId, platformId, branchId, sha1Id, " +
-        "benchmarkId, value, valid, metricId, uploadId) VALUES ("
-        + str(hostId)
-        + ", " + str(platformId)
-        + ", " + str(branchId)
-        + ", " + str(sha1Id)
-        + ", " + str(benchmarkId)
-        + ", " + str(result['value'])
-        + ", " + str(result['valid'])
-        + ", " + str(metricId)
-        + ", " + str(uploadId)
-        + ");"
-        )
+        "INSERT INTO result"
+        " (contextId, benchmarkId, value, valid, metricId, uploadId)"
+        " VALUES (%d, %d, %f, %s, %d, %d);"
+            % (contextId, benchmarkId, result['value'], result['valid'],
+                metricId, uploadId))
 
     execQuery(query, False)
author	João Abecasis <joao.abecasis@nokia.com>	2011-01-31 19:30:42 +0100
committer	João Abecasis <joao.abecasis@nokia.com>	2011-02-02 14:30:33 +0100
commit	f1258bdebe13df730931b59d2bcbc09e00ba6ae1 (patch)
tree	ef98a5c37f1871a2e9e70f77280885062ae4270a
parent	ff258029e0b803096f2a63d1a91b55a69503500e (diff)