diff options
author | João Abecasis <joao.abecasis@nokia.com> | 2011-01-31 19:30:42 +0100 |
---|---|---|
committer | João Abecasis <joao.abecasis@nokia.com> | 2011-02-02 14:30:33 +0100 |
commit | f1258bdebe13df730931b59d2bcbc09e00ba6ae1 (patch) | |
tree | ef98a5c37f1871a2e9e70f77280885062ae4270a | |
parent | ff258029e0b803096f2a63d1a91b55a69503500e (diff) |
Adapted database queries to new database structure
In the processed also preferred using string formatting operations and
string.join(list) to manual formatting + concatenation + loops.
Reviewed-By: jasplin
-rw-r--r-- | scripts/getresultdetails2.py | 11 | ||||
-rw-r--r-- | scripts/getstats1.py | 14 | ||||
-rw-r--r-- | scripts/getstats2.py | 24 | ||||
-rw-r--r-- | scripts/listcontexts.py | 48 | ||||
-rw-r--r-- | scripts/misc.py | 134 | ||||
-rw-r--r-- | scripts/singlecontextbmstats.py | 12 | ||||
-rwxr-xr-x | scripts/uploadresults.py | 28 |
7 files changed, 140 insertions, 131 deletions
diff --git a/scripts/getresultdetails2.py b/scripts/getresultdetails2.py index 8678b52..c80c15f 100644 --- a/scripts/getresultdetails2.py +++ b/scripts/getresultdetails2.py @@ -2,7 +2,7 @@ import sys import json from dbaccess import execQuery -from misc import textToId, printJSONHeader +from misc import textToId, printJSONHeader, getContext class GetResultDetails2: @@ -43,26 +43,23 @@ class GetResultDetails2: # Fetches the sample (i.e. individual observations) for both contexts. @classmethod def fetchSample(cls, host, platform, branch, sha1, benchmark, metric): + context = getContext(host, platform, branch, sha1) sample = [] for value, valid, timestamp in execQuery( "SELECT value, valid, EXTRACT(EPOCH FROM startTime)::INT" " FROM result, upload" " WHERE uploadId = upload.id" - " AND hostId = %d" - " AND platformId = %d" - " AND branchId = %d" - " AND sha1Id = %d" + " AND contextId = %d" " AND benchmarkId = %d" " AND metricId = %d" " ORDER BY value DESC;" - % (host, platform, branch, sha1, benchmark, metric)): + % (context, benchmark, metric)): sample.append({ 'value' : value, 'valid' : valid, 'timestamp' : timestamp }) - return sample def writeOutputAsJSON(self): diff --git a/scripts/getstats1.py b/scripts/getstats1.py index b6e6b93..8ed1cbc 100644 --- a/scripts/getstats1.py +++ b/scripts/getstats1.py @@ -1,7 +1,8 @@ from dbaccess import execQuery, database from singlecontextbmstats import ( SingleContextBMStats, extractSingleContextBMStats) -from misc import idToText, textToId, benchmarkToComponents, printJSONHeader +from misc import idToText, textToId, benchmarkToComponents, printJSONHeader, + getContext class GetStats1: @@ -64,12 +65,13 @@ class GetStats1: # SingleContextBMStats objects. def computeBMStatsList(self): + # Get the context + context = getContext(self.host_id, self.platform_id, + self.branch_id, self.sha1_id) + # Get all distinct benchmark/metric combinations matching the context: - bmark_metrics = execQuery( - "SELECT DISTINCT benchmarkId, metricId " + - "FROM result WHERE hostId = " + str(self.host_id) + - " AND platformId = " + str(self.platform_id) + " AND branchId = " + - str(self.branch_id) + " AND sha1Id = " + str(self.sha1_id) + ";") + bmark_metrics = execQuery("SELECT DISTINCT benchmarkId, metricId" + " FROM result WHERE contextId = %d;" % context) bmstats_list = [] diff --git a/scripts/getstats2.py b/scripts/getstats2.py index 5c43430..2352e5a 100644 --- a/scripts/getstats2.py +++ b/scripts/getstats2.py @@ -72,20 +72,22 @@ class GetStats2: # context 1 and 2 respectively. def computeBMStatsList(self): + context1 = getContext(self.host1_id, self.platform1_id, self.branch1_id, self.sha1_id) + context2 = getContext(self.host2_id, self.platform2_id, self.branch2_id, self.sha2_id) + # Get all distinct benchmark/metric combinations matching both contexts # into a 'benchmarks' list: bmark_metrics = execQuery( - "SELECT DISTINCT benchmarkId, metricId " + - "FROM result WHERE hostId = " + str(self.host1_id) + - " AND platformId = " + str(self.platform1_id) + " AND branchId = " + - str(self.branch1_id) + " AND sha1Id = " + str(self.sha11_id) + - " INTERSECT " + - "SELECT DISTINCT benchmarkId, metricId " + - "FROM result WHERE hostId = " + str(self.host2_id) + - " AND platformId = " + str(self.platform2_id) + " AND branchId = " + - str(self.branch2_id) + " AND sha1Id = " + str(self.sha12_id) + - #" LIMIT 10;" - ";" + "SELECT DISTINCT benchmarkId, metricId" + " FROM result" + " WHERE contextId = %d" + " INTERSECT " + "SELECT DISTINCT benchmarkId, metricId" + " FROM result" + " WHERE contextId = %d" + #" LIMIT 10" + ";" + % (context1, context2) ) bmstats_list = [] diff --git a/scripts/listcontexts.py b/scripts/listcontexts.py index f6b1291..a72ae4a 100644 --- a/scripts/listcontexts.py +++ b/scripts/listcontexts.py @@ -2,25 +2,47 @@ import sys import json from dbaccess import execQuery -from misc import idToText, getAllSnapshots, printJSONHeader +from misc import idToText, printJSONHeader class ListContexts: def execute(self): - # Get all host/platform/branch combinations: - hpbs = execQuery( - "SELECT DISTINCT hostId, platformId, branchId FROM result " + - "ORDER BY hostId, platformId, branchId;") + # Get all host/platform/branch/sha1 combinations: + contexts = execQuery( + "SELECT id, hostId, platformId, branchId, sha1Id, EXTRACT(EPOCH FROM timestamp)::INT" + " FROM context" + " ORDER BY hostId, platformId, branchId, timestamp") self.contexts = [] - for host, platform, branch in hpbs: - snapshots = getAllSnapshots(host, platform, branch, True) - self.contexts.append({ - 'host' : idToText('host', host), - 'platform' : idToText('platform', platform), - 'branch' : idToText('branch', branch), - 'snapshots' : curSnapshots - }) + + if not len(contexts): + return + + id, host, platform, branch, sha1, timestamp = contexts[0] + + curHost, curPlatform, curBranch = host, platform, branch + curSnapshots = [(idToText('sha1', sha1), timestamp, id)] + + for (id, host, platform, branch, sha1, timestamp) in contexts[1:]: + if (host, platform, branch) != (curHost, curPlatform, curBranch): + self.contexts.append({ + 'host' : idToText('host', curHost), + 'platform' : idToText('platform', curPlatform), + 'branch' : idToText('branch', curBranch), + 'snapshots' : curSnapshots + }) + + curHost, curPlatform, curBranch = host, platform, branch + curSnapshots = [] + + curSnapshots.append((idToText('sha1', sha1), timestamp, id)) + + self.contexts.append({ + 'host' : idToText('host', curHost), + 'platform' : idToText('platform', curPlatform), + 'branch' : idToText('branch', curBranch), + 'snapshots' : curSnapshots + }) self.writeOutput() diff --git a/scripts/misc.py b/scripts/misc.py index 3fed597..660887f 100644 --- a/scripts/misc.py +++ b/scripts/misc.py @@ -84,20 +84,23 @@ def benchmarkToComponents(benchmark): # Returns a chronologically order n-tuple of 2-tuples: # (sha1, first upload timestamp). def getSnapshots(host_id, platform_id, branch_id, sha11_id, sha12_id): - minuid1 = execQuery( - "SELECT min(uploadId) FROM result WHERE hostId = " + - str(host_id) + " AND platformId = " + str(platform_id) + - " AND branchId = " + str(branch_id) + " AND sha1Id = " + - str(sha11_id) + ";")[0][0] - minuid2 = execQuery( - "SELECT min(uploadId) FROM result WHERE hostId = " + - str(host_id) + " AND platformId = " + str(platform_id) + - " AND branchId = " + str(branch_id) + " AND sha1Id = " + - str(sha12_id) + ";")[0][0] + timestamp1 = execQuery( + "SELECT timestamp FROM context " + + "WHERE hostId = " + str(host_id) + + " AND platformId = " + str(platform_id) + + " AND branchId = " + str(branch_id) + + " AND sha1Id = " + str(sha11_id) + ";")[0][0] + + timestamp2 = execQuery( + "SELECT timestamp FROM context " + + "WHERE hostId = " + str(host_id) + + " AND platformId = " + str(platform_id) + + " AND branchId = " + str(branch_id) + + " AND sha1Id = " + str(sha12_id) + ";")[0][0] # Ensure chronological order: - if minuid1 > minuid2: - minuid1, minuid2 = minuid2, minuid1 + if timestamp1 > timestamp2: + timestamp1, timestamp2 = timestamp2, timestamp1 # Each distinct SHA-1 that occurs for this host/platform/branch # combination may occur multiple times with different upload times. @@ -106,16 +109,14 @@ def getSnapshots(host_id, platform_id, branch_id, sha11_id, sha12_id): # order of the uploadId attributes is consistent with the order # of their corresponding startTime attributes): snapshots = execQuery( - "SELECT sha1Id, extract(epoch FROM startTime)::int AS " + - "firstUploadTime FROM " + - "(SELECT sha1Id, min(uploadId) AS minuid FROM result " + - "WHERE hostId = " + str(host_id) + - " AND platformId = " + str(platform_id) + - " AND branchId = " + str(branch_id) + - " AND uploadId between " + str(minuid1) + " AND " + str(minuid2) + - " GROUP BY sha1Id) " + - "AS dummy, upload WHERE upload.id = minuid " + - "ORDER BY minuid ASC;") + "SELECT sha1Id, EXTRACT(EPOCH FROM timestamp)::INT AS " + "firstUploadTime FROM context " + "WHERE hostId = %d" + " AND platformId = %d" + " AND branchId = %d" + " AND timestamp BETWEEN %d AND %d" + " ORDER BY timestamp ASC;" + % (host_id, platform_id, branch_id, timestamp1, timestamp2)) return tuple(snapshots) @@ -133,15 +134,13 @@ def getAllSnapshots(host_id, platform_id, branch_id, reverse = False): # order of the uploadId attributes is consistent with the order # of their corresponding startTime attributes): snapshots = execQuery( - "SELECT sha1Id, extract(epoch FROM startTime)::int AS " + - "firstUploadTime FROM " + - "(SELECT sha1Id, min(uploadId) AS minuid FROM result " + - "WHERE hostId = " + str(host_id) + - " AND platformId = " + str(platform_id) + - " AND branchId = " + str(branch_id) + - " GROUP BY sha1Id) " + - "AS dummy, upload WHERE upload.id = minuid " + - "ORDER BY minuid " + ("DESC;" if reverse else "ASC;")) + "SELECT sha1Id, EXTRACT(EPOCH FROM timestamp)::INT AS firstUploadTime" + " FROM context" + " WHERE hostId = %d" + " AND platformId = %d" + " AND branchId = %d" + % (host_id, platform_id, branch_id) + + " ORDER BY timestamp " + ("DESC;" if reverse else "ASC;")) return tuple(snapshots) @@ -205,26 +204,22 @@ def getLastRankingSnapshot(host_id, platform_id, branch_id, snapshots): def getTimeSeries( host_id, platform_id, branch_id, snapshots, benchmark_id, metric_id): - # Fetch raw values: - query = ( - "SELECT value, valid, sha1id FROM result WHERE " + - " hostId = " + str(host_id) + " AND platformId = " + - str(platform_id) + " AND branchId = " + str(branch_id) + - " AND benchmarkId = " + str(benchmark_id) + " AND metricId = " + - str(metric_id) + " AND sha1id IN (") - first = True - for s in snapshots: - if not first: - query += ", " - else: - first = False - query += str(s[0]) - query += ") ORDER BY sha1Id;" + contexts = [] + for snapshot in snapshots: + contexts.append(getContext(host_id, platform_id, branch_id, snapshot)) - raw_values = tuple(execQuery(query) + [(-1, -1, -1)]); # Note sentinel item + # Fetch raw values: + raw_values = execQuery( + "SELECT value, valid, contextId FROM result" + " WHERE contextId IN (%s)" + " AND benchmarkId = %d" + " AND metricId = %d" + " ORDER BY contextId;" + % (", ".join(map(str, contexts)), benchmark_id, metric_id)) + + [(-1, -1, -1)] # Note sentinel item # Compute per-sample stats: - curr_sha1_id = -1 + curr_context_id = -1 sample = [] valid_and_positive_sample = [] valid_median_obs = [] @@ -235,9 +230,9 @@ def getTimeSeries( rses = [] tsitem_map = {} # Loop over all observations (which are grouped on sample; - # note the 1-1 correspondence between samples and SHA-1s): - for obs, valid, sha1_id in raw_values: - if sha1_id != curr_sha1_id: + # note the 1-1 correspondence between samples and contexts): + for obs, valid, context_id in raw_values: + if context_id != curr_context_id: # A new sample has been collected, so register it and # prepare for the next one: sample_size = len(sample) @@ -256,7 +251,7 @@ def getTimeSeries( except ZeroDivisionError: pass - tsitem_map[curr_sha1_id] = ( + tsitem_map[curr_context_id] = ( median_obs, sample_size, nrse, ninvalid, nzeros) sample = [] valid_and_positive_sample = [] @@ -267,7 +262,7 @@ def getTimeSeries( tot_nzeros = tot_nzeros + nzeros nzeros = 0 - curr_sha1_id = sha1_id + curr_context_id = context_id # Append observation to current sample: sample.append(obs) @@ -284,10 +279,9 @@ def getTimeSeries( # Order chronologically: ts = [] index = 0 - for snapshot in snapshots: - sha1_id = snapshot[0] - if sha1_id in tsitem_map: - tsitem = tsitem_map[sha1_id] + for context in contexts: + if context in tsitem_map: + tsitem = tsitem_map[context] ts.append( (index, tsitem[0], tsitem[1], tsitem[2], tsitem[3], tsitem[4])) index = index + 1 @@ -309,10 +303,10 @@ def getTimeSeries( else: rse_of_medians = -1 - ms = len(snapshots) - len(ts) + ms = len(contexts) - len(ts) if len(ts) > 0: - lsd = (len(snapshots) - 1) - ts[-1][0] + lsd = (len(contexts) - 1) - ts[-1][0] else: lsd = -1 @@ -544,24 +538,16 @@ def getBMTimeSeriesStatsList( if progress_func != None: progress_func(0.0, progress_arg) + contexts = [] + for snapshot in snapshots: + contexts.append(getContext(host_id, platform_id, branch_id, snapshot)) + # Get all distinct benchmark/metric combinations that match the # host/platform/branch context and are within the selected snapshot # interval. Each such combination corresponds to a time series. - query = ( - "SELECT DISTINCT benchmarkId, metricId FROM result WHERE " + - " hostId = " + str(host_id) + " AND platformId = " + - str(platform_id) + " AND branchId = " + str(branch_id) + - " AND sha1id IN (") - first = True - for s in snapshots: - if not first: - query += ", " - else: - first = False - query += str(s[0]) - query += ");" - - bmark_metrics = execQuery(query); + bmark_metrics = execQuery("SELECT DISTINCT benchmarkId, metricId" + " FROM result WHERE contextId IN (%s);" + % ", ".join(map(str, contexts))) bmstats_list = [] diff --git a/scripts/singlecontextbmstats.py b/scripts/singlecontextbmstats.py index fff0bcc..004c3b9 100644 --- a/scripts/singlecontextbmstats.py +++ b/scripts/singlecontextbmstats.py @@ -64,12 +64,12 @@ def extractSingleContextBMStats( values = [] - query_result = execQuery( - "SELECT value, valid FROM result WHERE hostId = " + str(host_id) + - " AND platformId = " + str(platform_id) + " AND branchId = " + - str(branch_id) + " AND sha1Id = " + str(sha1_id) + - " AND benchmarkId = " + str(benchmark_id) + " AND metricId = " + - str(metric_id) + ";") + context = getContext(host_id, platform_id, branch_id) + query_result = execQuery("SELECT value, valid FROM result" + " WHERE contextId = %d" + " AND benchmarkId = %d" + " AND metricId = %d;" + % (context, benchmark_id, metric_id)) for value, valid in query_result: if valid: diff --git a/scripts/uploadresults.py b/scripts/uploadresults.py index 2d7791b..cf929b5 100755 --- a/scripts/uploadresults.py +++ b/scripts/uploadresults.py @@ -3,7 +3,7 @@ import sys from xml.dom.minidom import parse from dbaccess import setDatabase, execQuery, commit -from misc import isValidSHA1 +from misc import isValidSHA1, getContext # === Global functions =============================================== @@ -238,6 +238,14 @@ platformId = findOrInsertId("platform", platform) branchId = findOrInsertId("branch", branch) sha1Id = findOrInsertId("sha1", sha1) +context = getContext(hostId, platformId, branchId, sha1Id) +if context == -1: + context = execQuery("INSERT INTO context" + " (hostId, platformId, branchId, sha1Id)" + " VALUES (%d, %d, %d, %d)" + " RETURNING id;" + % (hostId, platformId, branchId, sha1Id)) + # Append rows to the 'result' table ... for result in results: benchmark = ( @@ -250,19 +258,11 @@ for result in results: result['lowerIsBetter']) query = ( - "INSERT INTO result (hostId, platformId, branchId, sha1Id, " + - "benchmarkId, value, valid, metricId, uploadId) VALUES (" - + str(hostId) - + ", " + str(platformId) - + ", " + str(branchId) - + ", " + str(sha1Id) - + ", " + str(benchmarkId) - + ", " + str(result['value']) - + ", " + str(result['valid']) - + ", " + str(metricId) - + ", " + str(uploadId) - + ");" - ) + "INSERT INTO result" + " (contextId, benchmarkId, value, valid, metricId, uploadId)" + " VALUES (%d, %d, %f, %s, %d, %d);" + % (contextId, benchmarkId, result['value'], result['valid'], + metricId, uploadId)) execQuery(query, False) |