summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoão Abecasis <joao.abecasis@nokia.com>2011-01-31 19:30:42 +0100
committerJoão Abecasis <joao.abecasis@nokia.com>2011-02-02 14:30:33 +0100
commitf1258bdebe13df730931b59d2bcbc09e00ba6ae1 (patch)
treeef98a5c37f1871a2e9e70f77280885062ae4270a
parentff258029e0b803096f2a63d1a91b55a69503500e (diff)
Adapted database queries to new database structure
In the processed also preferred using string formatting operations and string.join(list) to manual formatting + concatenation + loops. Reviewed-By: jasplin
-rw-r--r--scripts/getresultdetails2.py11
-rw-r--r--scripts/getstats1.py14
-rw-r--r--scripts/getstats2.py24
-rw-r--r--scripts/listcontexts.py48
-rw-r--r--scripts/misc.py134
-rw-r--r--scripts/singlecontextbmstats.py12
-rwxr-xr-xscripts/uploadresults.py28
7 files changed, 140 insertions, 131 deletions
diff --git a/scripts/getresultdetails2.py b/scripts/getresultdetails2.py
index 8678b52..c80c15f 100644
--- a/scripts/getresultdetails2.py
+++ b/scripts/getresultdetails2.py
@@ -2,7 +2,7 @@ import sys
import json
from dbaccess import execQuery
-from misc import textToId, printJSONHeader
+from misc import textToId, printJSONHeader, getContext
class GetResultDetails2:
@@ -43,26 +43,23 @@ class GetResultDetails2:
# Fetches the sample (i.e. individual observations) for both contexts.
@classmethod
def fetchSample(cls, host, platform, branch, sha1, benchmark, metric):
+ context = getContext(host, platform, branch, sha1)
sample = []
for value, valid, timestamp in execQuery(
"SELECT value, valid, EXTRACT(EPOCH FROM startTime)::INT"
" FROM result, upload"
" WHERE uploadId = upload.id"
- " AND hostId = %d"
- " AND platformId = %d"
- " AND branchId = %d"
- " AND sha1Id = %d"
+ " AND contextId = %d"
" AND benchmarkId = %d"
" AND metricId = %d"
" ORDER BY value DESC;"
- % (host, platform, branch, sha1, benchmark, metric)):
+ % (context, benchmark, metric)):
sample.append({
'value' : value,
'valid' : valid,
'timestamp' : timestamp
})
-
return sample
def writeOutputAsJSON(self):
diff --git a/scripts/getstats1.py b/scripts/getstats1.py
index b6e6b93..8ed1cbc 100644
--- a/scripts/getstats1.py
+++ b/scripts/getstats1.py
@@ -1,7 +1,8 @@
from dbaccess import execQuery, database
from singlecontextbmstats import (
SingleContextBMStats, extractSingleContextBMStats)
-from misc import idToText, textToId, benchmarkToComponents, printJSONHeader
+from misc import idToText, textToId, benchmarkToComponents, printJSONHeader,
+ getContext
class GetStats1:
@@ -64,12 +65,13 @@ class GetStats1:
# SingleContextBMStats objects.
def computeBMStatsList(self):
+ # Get the context
+ context = getContext(self.host_id, self.platform_id,
+ self.branch_id, self.sha1_id)
+
# Get all distinct benchmark/metric combinations matching the context:
- bmark_metrics = execQuery(
- "SELECT DISTINCT benchmarkId, metricId " +
- "FROM result WHERE hostId = " + str(self.host_id) +
- " AND platformId = " + str(self.platform_id) + " AND branchId = " +
- str(self.branch_id) + " AND sha1Id = " + str(self.sha1_id) + ";")
+ bmark_metrics = execQuery("SELECT DISTINCT benchmarkId, metricId"
+ " FROM result WHERE contextId = %d;" % context)
bmstats_list = []
diff --git a/scripts/getstats2.py b/scripts/getstats2.py
index 5c43430..2352e5a 100644
--- a/scripts/getstats2.py
+++ b/scripts/getstats2.py
@@ -72,20 +72,22 @@ class GetStats2:
# context 1 and 2 respectively.
def computeBMStatsList(self):
+ context1 = getContext(self.host1_id, self.platform1_id, self.branch1_id, self.sha1_id)
+ context2 = getContext(self.host2_id, self.platform2_id, self.branch2_id, self.sha2_id)
+
# Get all distinct benchmark/metric combinations matching both contexts
# into a 'benchmarks' list:
bmark_metrics = execQuery(
- "SELECT DISTINCT benchmarkId, metricId " +
- "FROM result WHERE hostId = " + str(self.host1_id) +
- " AND platformId = " + str(self.platform1_id) + " AND branchId = " +
- str(self.branch1_id) + " AND sha1Id = " + str(self.sha11_id) +
- " INTERSECT " +
- "SELECT DISTINCT benchmarkId, metricId " +
- "FROM result WHERE hostId = " + str(self.host2_id) +
- " AND platformId = " + str(self.platform2_id) + " AND branchId = " +
- str(self.branch2_id) + " AND sha1Id = " + str(self.sha12_id) +
- #" LIMIT 10;"
- ";"
+ "SELECT DISTINCT benchmarkId, metricId"
+ " FROM result"
+ " WHERE contextId = %d"
+ " INTERSECT "
+ "SELECT DISTINCT benchmarkId, metricId"
+ " FROM result"
+ " WHERE contextId = %d"
+ #" LIMIT 10"
+ ";"
+ % (context1, context2)
)
bmstats_list = []
diff --git a/scripts/listcontexts.py b/scripts/listcontexts.py
index f6b1291..a72ae4a 100644
--- a/scripts/listcontexts.py
+++ b/scripts/listcontexts.py
@@ -2,25 +2,47 @@ import sys
import json
from dbaccess import execQuery
-from misc import idToText, getAllSnapshots, printJSONHeader
+from misc import idToText, printJSONHeader
class ListContexts:
def execute(self):
- # Get all host/platform/branch combinations:
- hpbs = execQuery(
- "SELECT DISTINCT hostId, platformId, branchId FROM result " +
- "ORDER BY hostId, platformId, branchId;")
+ # Get all host/platform/branch/sha1 combinations:
+ contexts = execQuery(
+ "SELECT id, hostId, platformId, branchId, sha1Id, EXTRACT(EPOCH FROM timestamp)::INT"
+ " FROM context"
+ " ORDER BY hostId, platformId, branchId, timestamp")
self.contexts = []
- for host, platform, branch in hpbs:
- snapshots = getAllSnapshots(host, platform, branch, True)
- self.contexts.append({
- 'host' : idToText('host', host),
- 'platform' : idToText('platform', platform),
- 'branch' : idToText('branch', branch),
- 'snapshots' : curSnapshots
- })
+
+ if not len(contexts):
+ return
+
+ id, host, platform, branch, sha1, timestamp = contexts[0]
+
+ curHost, curPlatform, curBranch = host, platform, branch
+ curSnapshots = [(idToText('sha1', sha1), timestamp, id)]
+
+ for (id, host, platform, branch, sha1, timestamp) in contexts[1:]:
+ if (host, platform, branch) != (curHost, curPlatform, curBranch):
+ self.contexts.append({
+ 'host' : idToText('host', curHost),
+ 'platform' : idToText('platform', curPlatform),
+ 'branch' : idToText('branch', curBranch),
+ 'snapshots' : curSnapshots
+ })
+
+ curHost, curPlatform, curBranch = host, platform, branch
+ curSnapshots = []
+
+ curSnapshots.append((idToText('sha1', sha1), timestamp, id))
+
+ self.contexts.append({
+ 'host' : idToText('host', curHost),
+ 'platform' : idToText('platform', curPlatform),
+ 'branch' : idToText('branch', curBranch),
+ 'snapshots' : curSnapshots
+ })
self.writeOutput()
diff --git a/scripts/misc.py b/scripts/misc.py
index 3fed597..660887f 100644
--- a/scripts/misc.py
+++ b/scripts/misc.py
@@ -84,20 +84,23 @@ def benchmarkToComponents(benchmark):
# Returns a chronologically order n-tuple of 2-tuples:
# (sha1, first upload timestamp).
def getSnapshots(host_id, platform_id, branch_id, sha11_id, sha12_id):
- minuid1 = execQuery(
- "SELECT min(uploadId) FROM result WHERE hostId = " +
- str(host_id) + " AND platformId = " + str(platform_id) +
- " AND branchId = " + str(branch_id) + " AND sha1Id = " +
- str(sha11_id) + ";")[0][0]
- minuid2 = execQuery(
- "SELECT min(uploadId) FROM result WHERE hostId = " +
- str(host_id) + " AND platformId = " + str(platform_id) +
- " AND branchId = " + str(branch_id) + " AND sha1Id = " +
- str(sha12_id) + ";")[0][0]
+ timestamp1 = execQuery(
+ "SELECT timestamp FROM context " +
+ "WHERE hostId = " + str(host_id) +
+ " AND platformId = " + str(platform_id) +
+ " AND branchId = " + str(branch_id) +
+ " AND sha1Id = " + str(sha11_id) + ";")[0][0]
+
+ timestamp2 = execQuery(
+ "SELECT timestamp FROM context " +
+ "WHERE hostId = " + str(host_id) +
+ " AND platformId = " + str(platform_id) +
+ " AND branchId = " + str(branch_id) +
+ " AND sha1Id = " + str(sha12_id) + ";")[0][0]
# Ensure chronological order:
- if minuid1 > minuid2:
- minuid1, minuid2 = minuid2, minuid1
+ if timestamp1 > timestamp2:
+ timestamp1, timestamp2 = timestamp2, timestamp1
# Each distinct SHA-1 that occurs for this host/platform/branch
# combination may occur multiple times with different upload times.
@@ -106,16 +109,14 @@ def getSnapshots(host_id, platform_id, branch_id, sha11_id, sha12_id):
# order of the uploadId attributes is consistent with the order
# of their corresponding startTime attributes):
snapshots = execQuery(
- "SELECT sha1Id, extract(epoch FROM startTime)::int AS " +
- "firstUploadTime FROM " +
- "(SELECT sha1Id, min(uploadId) AS minuid FROM result " +
- "WHERE hostId = " + str(host_id) +
- " AND platformId = " + str(platform_id) +
- " AND branchId = " + str(branch_id) +
- " AND uploadId between " + str(minuid1) + " AND " + str(minuid2) +
- " GROUP BY sha1Id) " +
- "AS dummy, upload WHERE upload.id = minuid " +
- "ORDER BY minuid ASC;")
+ "SELECT sha1Id, EXTRACT(EPOCH FROM timestamp)::INT AS "
+ "firstUploadTime FROM context "
+ "WHERE hostId = %d"
+ " AND platformId = %d"
+ " AND branchId = %d"
+ " AND timestamp BETWEEN %d AND %d"
+ " ORDER BY timestamp ASC;"
+ % (host_id, platform_id, branch_id, timestamp1, timestamp2))
return tuple(snapshots)
@@ -133,15 +134,13 @@ def getAllSnapshots(host_id, platform_id, branch_id, reverse = False):
# order of the uploadId attributes is consistent with the order
# of their corresponding startTime attributes):
snapshots = execQuery(
- "SELECT sha1Id, extract(epoch FROM startTime)::int AS " +
- "firstUploadTime FROM " +
- "(SELECT sha1Id, min(uploadId) AS minuid FROM result " +
- "WHERE hostId = " + str(host_id) +
- " AND platformId = " + str(platform_id) +
- " AND branchId = " + str(branch_id) +
- " GROUP BY sha1Id) " +
- "AS dummy, upload WHERE upload.id = minuid " +
- "ORDER BY minuid " + ("DESC;" if reverse else "ASC;"))
+ "SELECT sha1Id, EXTRACT(EPOCH FROM timestamp)::INT AS firstUploadTime"
+ " FROM context"
+ " WHERE hostId = %d"
+ " AND platformId = %d"
+ " AND branchId = %d"
+ % (host_id, platform_id, branch_id) +
+ " ORDER BY timestamp " + ("DESC;" if reverse else "ASC;"))
return tuple(snapshots)
@@ -205,26 +204,22 @@ def getLastRankingSnapshot(host_id, platform_id, branch_id, snapshots):
def getTimeSeries(
host_id, platform_id, branch_id, snapshots, benchmark_id, metric_id):
- # Fetch raw values:
- query = (
- "SELECT value, valid, sha1id FROM result WHERE " +
- " hostId = " + str(host_id) + " AND platformId = " +
- str(platform_id) + " AND branchId = " + str(branch_id) +
- " AND benchmarkId = " + str(benchmark_id) + " AND metricId = " +
- str(metric_id) + " AND sha1id IN (")
- first = True
- for s in snapshots:
- if not first:
- query += ", "
- else:
- first = False
- query += str(s[0])
- query += ") ORDER BY sha1Id;"
+ contexts = []
+ for snapshot in snapshots:
+ contexts.append(getContext(host_id, platform_id, branch_id, snapshot))
- raw_values = tuple(execQuery(query) + [(-1, -1, -1)]); # Note sentinel item
+ # Fetch raw values:
+ raw_values = execQuery(
+ "SELECT value, valid, contextId FROM result"
+ " WHERE contextId IN (%s)"
+ " AND benchmarkId = %d"
+ " AND metricId = %d"
+ " ORDER BY contextId;"
+ % (", ".join(map(str, contexts)), benchmark_id, metric_id)) +
+ [(-1, -1, -1)] # Note sentinel item
# Compute per-sample stats:
- curr_sha1_id = -1
+ curr_context_id = -1
sample = []
valid_and_positive_sample = []
valid_median_obs = []
@@ -235,9 +230,9 @@ def getTimeSeries(
rses = []
tsitem_map = {}
# Loop over all observations (which are grouped on sample;
- # note the 1-1 correspondence between samples and SHA-1s):
- for obs, valid, sha1_id in raw_values:
- if sha1_id != curr_sha1_id:
+ # note the 1-1 correspondence between samples and contexts):
+ for obs, valid, context_id in raw_values:
+ if context_id != curr_context_id:
# A new sample has been collected, so register it and
# prepare for the next one:
sample_size = len(sample)
@@ -256,7 +251,7 @@ def getTimeSeries(
except ZeroDivisionError:
pass
- tsitem_map[curr_sha1_id] = (
+ tsitem_map[curr_context_id] = (
median_obs, sample_size, nrse, ninvalid, nzeros)
sample = []
valid_and_positive_sample = []
@@ -267,7 +262,7 @@ def getTimeSeries(
tot_nzeros = tot_nzeros + nzeros
nzeros = 0
- curr_sha1_id = sha1_id
+ curr_context_id = context_id
# Append observation to current sample:
sample.append(obs)
@@ -284,10 +279,9 @@ def getTimeSeries(
# Order chronologically:
ts = []
index = 0
- for snapshot in snapshots:
- sha1_id = snapshot[0]
- if sha1_id in tsitem_map:
- tsitem = tsitem_map[sha1_id]
+ for context in contexts:
+ if context in tsitem_map:
+ tsitem = tsitem_map[context]
ts.append(
(index, tsitem[0], tsitem[1], tsitem[2], tsitem[3], tsitem[4]))
index = index + 1
@@ -309,10 +303,10 @@ def getTimeSeries(
else:
rse_of_medians = -1
- ms = len(snapshots) - len(ts)
+ ms = len(contexts) - len(ts)
if len(ts) > 0:
- lsd = (len(snapshots) - 1) - ts[-1][0]
+ lsd = (len(contexts) - 1) - ts[-1][0]
else:
lsd = -1
@@ -544,24 +538,16 @@ def getBMTimeSeriesStatsList(
if progress_func != None:
progress_func(0.0, progress_arg)
+ contexts = []
+ for snapshot in snapshots:
+ contexts.append(getContext(host_id, platform_id, branch_id, snapshot))
+
# Get all distinct benchmark/metric combinations that match the
# host/platform/branch context and are within the selected snapshot
# interval. Each such combination corresponds to a time series.
- query = (
- "SELECT DISTINCT benchmarkId, metricId FROM result WHERE " +
- " hostId = " + str(host_id) + " AND platformId = " +
- str(platform_id) + " AND branchId = " + str(branch_id) +
- " AND sha1id IN (")
- first = True
- for s in snapshots:
- if not first:
- query += ", "
- else:
- first = False
- query += str(s[0])
- query += ");"
-
- bmark_metrics = execQuery(query);
+ bmark_metrics = execQuery("SELECT DISTINCT benchmarkId, metricId"
+ " FROM result WHERE contextId IN (%s);"
+ % ", ".join(map(str, contexts)))
bmstats_list = []
diff --git a/scripts/singlecontextbmstats.py b/scripts/singlecontextbmstats.py
index fff0bcc..004c3b9 100644
--- a/scripts/singlecontextbmstats.py
+++ b/scripts/singlecontextbmstats.py
@@ -64,12 +64,12 @@ def extractSingleContextBMStats(
values = []
- query_result = execQuery(
- "SELECT value, valid FROM result WHERE hostId = " + str(host_id) +
- " AND platformId = " + str(platform_id) + " AND branchId = " +
- str(branch_id) + " AND sha1Id = " + str(sha1_id) +
- " AND benchmarkId = " + str(benchmark_id) + " AND metricId = " +
- str(metric_id) + ";")
+ context = getContext(host_id, platform_id, branch_id)
+ query_result = execQuery("SELECT value, valid FROM result"
+ " WHERE contextId = %d"
+ " AND benchmarkId = %d"
+ " AND metricId = %d;"
+ % (context, benchmark_id, metric_id))
for value, valid in query_result:
if valid:
diff --git a/scripts/uploadresults.py b/scripts/uploadresults.py
index 2d7791b..cf929b5 100755
--- a/scripts/uploadresults.py
+++ b/scripts/uploadresults.py
@@ -3,7 +3,7 @@
import sys
from xml.dom.minidom import parse
from dbaccess import setDatabase, execQuery, commit
-from misc import isValidSHA1
+from misc import isValidSHA1, getContext
# === Global functions ===============================================
@@ -238,6 +238,14 @@ platformId = findOrInsertId("platform", platform)
branchId = findOrInsertId("branch", branch)
sha1Id = findOrInsertId("sha1", sha1)
+context = getContext(hostId, platformId, branchId, sha1Id)
+if context == -1:
+ context = execQuery("INSERT INTO context"
+ " (hostId, platformId, branchId, sha1Id)"
+ " VALUES (%d, %d, %d, %d)"
+ " RETURNING id;"
+ % (hostId, platformId, branchId, sha1Id))
+
# Append rows to the 'result' table ...
for result in results:
benchmark = (
@@ -250,19 +258,11 @@ for result in results:
result['lowerIsBetter'])
query = (
- "INSERT INTO result (hostId, platformId, branchId, sha1Id, " +
- "benchmarkId, value, valid, metricId, uploadId) VALUES ("
- + str(hostId)
- + ", " + str(platformId)
- + ", " + str(branchId)
- + ", " + str(sha1Id)
- + ", " + str(benchmarkId)
- + ", " + str(result['value'])
- + ", " + str(result['valid'])
- + ", " + str(metricId)
- + ", " + str(uploadId)
- + ");"
- )
+ "INSERT INTO result"
+ " (contextId, benchmarkId, value, valid, metricId, uploadId)"
+ " VALUES (%d, %d, %f, %s, %d, %d);"
+ % (contextId, benchmarkId, result['value'], result['valid'],
+ metricId, uploadId))
execQuery(query, False)