summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjasplin <qt-info@nokia.com>2011-02-04 12:56:31 +0100
committerjasplin <qt-info@nokia.com>2011-02-04 12:56:31 +0100
commitd7b560485dd0f943c3aee093b2708c546dc67804 (patch)
tree5a6622036e116c64104e4a19c12a4ab3154c1e1d
parentc06cb08a12f676be66b38a2b3c28a7f8ce105b57 (diff)
Improved ranking table + merged DB scripts.
The hostId, platformId, branchId, and sha1Id in the 'ranking' table is now replaced with a single contextId. The *-ranking* DB scripts are now merged into the main scripts.
-rw-r--r--database/scripts/plpgsqlfuncs.sql17
-rw-r--r--database/scripts/privileges-ranking.sql8
-rw-r--r--database/scripts/privileges.sql7
-rwxr-xr-xdatabase/scripts/reset-ranking.py54
-rw-r--r--database/scripts/tabledefs-ranking.sql59
-rw-r--r--database/scripts/tabledefs.sql131
-rwxr-xr-xscripts/finalizeresults.py41
-rw-r--r--scripts/getstats1.py6
-rw-r--r--scripts/misc.py43
9 files changed, 132 insertions, 234 deletions
diff --git a/database/scripts/plpgsqlfuncs.sql b/database/scripts/plpgsqlfuncs.sql
index 7a50c95..b5c5538 100644
--- a/database/scripts/plpgsqlfuncs.sql
+++ b/database/scripts/plpgsqlfuncs.sql
@@ -1,25 +1,18 @@
-- Inserts or updates a row in the 'ranking' table.
CREATE OR REPLACE FUNCTION merge_ranking(
- hostId_ BIGINT, platformId_ BIGINT, branchId_ BIGINT, sha1Id_ BIGINT,
- benchmarkId_ BIGINT, metricId_ BIGINT, statId_ BIGINT, value_ REAL,
- pos_ BIGINT) RETURNS VOID AS
+ contextId_ BIGINT, benchmarkId_ BIGINT, metricId_ BIGINT, statId_ BIGINT,
+ value_ REAL, pos_ BIGINT) RETURNS VOID AS
$$
BEGIN
BEGIN
INSERT INTO ranking(
- hostId, platformId, branchId, sha1Id, benchmarkId, metricId,
- statId, value, pos)
- VALUES (
- hostId_, platformId_, branchId_, sha1Id_, benchmarkId_, metricId_,
- statId_, value_, pos_);
+ contextId, benchmarkId, metricId, statId, value, pos)
+ VALUES (contextId_, benchmarkId_, metricId_, statId_, value_, pos_);
RETURN;
EXCEPTION WHEN unique_violation THEN
UPDATE ranking
SET value = value_, pos = pos_
- WHERE hostId = hostId_
- AND platformId = platformId_
- AND branchId = branchId_
- AND sha1Id = sha1Id_
+ WHERE contextId = contextId_
AND benchmarkId = benchmarkId_
AND metricId = metricId_
AND statId = statId_;
diff --git a/database/scripts/privileges-ranking.sql b/database/scripts/privileges-ranking.sql
deleted file mode 100644
index 630baac..0000000
--- a/database/scripts/privileges-ranking.sql
+++ /dev/null
@@ -1,8 +0,0 @@
--- Define privileges for the 'bmuser':
-
-GRANT SELECT ON rankingStat TO bmuser;
-
-GRANT SELECT ON ranking TO bmuser;
-GRANT INSERT ON ranking TO bmuser;
-GRANT UPDATE ON ranking TO bmuser;
-GRANT UPDATE ON ranking_id_seq TO bmuser;
diff --git a/database/scripts/privileges.sql b/database/scripts/privileges.sql
index dba42f5..a88f1f7 100644
--- a/database/scripts/privileges.sql
+++ b/database/scripts/privileges.sql
@@ -45,3 +45,10 @@ GRANT UPDATE ON result_id_seq TO bmuser;
GRANT SELECT ON context TO bmuser;
GRANT INSERT ON context TO bmuser;
GRANT UPDATE ON context_id_seq TO bmuser;
+
+GRANT SELECT ON rankingStat TO bmuser;
+
+GRANT SELECT ON ranking TO bmuser;
+GRANT INSERT ON ranking TO bmuser;
+GRANT UPDATE ON ranking TO bmuser;
+GRANT UPDATE ON ranking_id_seq TO bmuser;
diff --git a/database/scripts/reset-ranking.py b/database/scripts/reset-ranking.py
deleted file mode 100755
index 5f7afde..0000000
--- a/database/scripts/reset-ranking.py
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-from subprocess import Popen, PIPE
-import string
-
-"""
-NOTE: This script should be run as the 'postgres' role.
-
-NOTE: Eventually, once the ranking feature is mature, this script should be removed
- and the effect it has should be implemented in reset.py instead.
-"""
-
-# --- Global functions -------------------------------------------------
-
-def runCommand(cmd, exitOnFail = True):
- print "running command >%s< ..." % string.join(cmd, " "),
- p = Popen(cmd, stdout = PIPE, stderr = PIPE)
- stdout, stderr = p.communicate()
- if (p.returncode != 0):
- print "FAIL:"
- print " return code:", p.returncode
- print " stdout: >%s<" % stdout.strip()
- print " stderr: >%s<" % stderr.strip()
- if (exitOnFail):
- print "exiting ..."
- sys.exit(1)
- else:
- print "CONTINUING ..."
- else:
- print "PASS"
-
-
-# --- Main program -------------------------------------------------
-
-# Ensure the intention of running this script.
-reply = raw_input(
- "WARNING: The 'ranking' and 'rankingStat' tables will be deleted!"
- " Continue [y/N]?").strip().lower()
-if (reply != "y" and reply != "yes"):
- sys.exit(1)
-
-database = "bm"
-user = "bmuser"
-owner = "postgres"
-tabledefs = "tabledefs-ranking.sql"
-privileges = "privileges-ranking.sql"
-
-# Load table definitions (deleting existing tables).
-runCommand(["psql", "-U", owner, "-d", database, "-f", tabledefs])
-
-# Define user privileges (if necessary).
-# (Note: remote client access is defined separately in pg_hba.conf)
-runCommand(["psql", "-U", owner, "-d", database, "-f", privileges])
diff --git a/database/scripts/tabledefs-ranking.sql b/database/scripts/tabledefs-ranking.sql
deleted file mode 100644
index 468f6fc..0000000
--- a/database/scripts/tabledefs-ranking.sql
+++ /dev/null
@@ -1,59 +0,0 @@
--- Table to keep available ranking statistic types:
-DROP TABLE IF EXISTS rankingStat;
-CREATE TABLE rankingStat (
- id BIGSERIAL PRIMARY KEY, value TEXT UNIQUE NOT NULL) WITH (OIDS=FALSE);
-ALTER TABLE rankingStat OWNER TO postgres;
--- Pre-populate:
-INSERT INTO rankingStat (value) VALUES
- ('QS'), -- Quality score
- ('LCSSR'), -- Last chance stability score (last change regressing)
- ('LCSSI'), -- Last change stability score (last change improving)
- ('LCSS1R'), -- Last change stability score not considering history after
- -- last change (last change regressing)
- ('LCSS1I'); -- Last change stability score not considering history after
- -- last change (last change improving)
-
-
--- Table to store rankings:
-DROP TABLE IF EXISTS ranking;
-CREATE TABLE ranking
-(
- id BIGSERIAL PRIMARY KEY,
-
- -- Environment:
- hostId BIGINT NOT NULL REFERENCES host ON DELETE CASCADE,
- platformId BIGINT NOT NULL REFERENCES platform ON DELETE CASCADE,
-
- -- Revision:
- branchId BIGINT NOT NULL REFERENCES branch ON DELETE CASCADE,
- sha1Id BIGINT NOT NULL REFERENCES sha1 ON DELETE CASCADE,
-
- -- Benchmark:
- benchmarkId BIGINT NOT NULL REFERENCES benchmark ON DELETE CASCADE,
-
- -- Metric:
- metricId BIGINT NOT NULL REFERENCES metric ON DELETE CASCADE,
-
- -- Ranking statistic type:
- statId BIGINT NOT NULL REFERENCES rankingStat ON DELETE CASCADE,
-
- -- Ranking statistic value:
- value REAL NOT NULL,
-
- -- Ranking position:
- pos INTEGER NOT NULL,
-
- UNIQUE (hostId, platformId, branchId, sha1Id, benchmarkId, metricId, statId)
-
-) WITH (OIDS=FALSE);
-ALTER TABLE ranking OWNER TO postgres;
-
--- Indexes:
-CREATE INDEX ranking_host_idx ON ranking (hostId);
-CREATE INDEX ranking_platform_idx ON ranking (platformId);
-CREATE INDEX ranking_branch_idx ON ranking (branchId);
-CREATE INDEX ranking_sha1_idx ON ranking (sha1Id);
-CREATE INDEX ranking_benchmark_idx ON ranking (benchmarkId);
-CREATE INDEX ranking_metric_idx ON ranking (metricId);
-CREATE INDEX ranking_stat_idx ON ranking (statId);
-CREATE INDEX ranking_pos_idx ON ranking (pos);
diff --git a/database/scripts/tabledefs.sql b/database/scripts/tabledefs.sql
index 1964011..c6f94a2 100644
--- a/database/scripts/tabledefs.sql
+++ b/database/scripts/tabledefs.sql
@@ -1,24 +1,4 @@
--- A context stores common data for a set of results.
-CREATE TABLE context
-(
- id BIGSERIAL PRIMARY KEY,
-
- -- Environment:
- hostId BIGINT NOT NULL REFERENCES host ON DELETE CASCADE,
- platformId BIGINT NOT NULL REFERENCES platform ON DELETE CASCADE,
-
- -- Revision:
- branchId BIGINT NOT NULL REFERENCES branch ON DELETE CASCADE,
- sha1Id BIGINT NOT NULL REFERENCES sha1 ON DELETE CASCADE,
-
- -- Timestamp: (approximated with earliest known startTime)
- timestamp TIMESTAMP NOT NULL DEFAULT now(),
-
- UNIQUE(hostId, platformId, branchId, sha1Id)
-) WITH (OIDS=FALSE);
-ALTER TABLE context OWNER TO postgres;
-
--- Table to record upload events:
+-- Upload events:
CREATE TABLE upload
(
id BIGSERIAL PRIMARY KEY,
@@ -26,68 +6,119 @@ CREATE TABLE upload
clientIPAddr TEXT NOT NULL DEFAULT inet_client_addr()
) WITH (OIDS=FALSE);
ALTER TABLE upload OWNER TO postgres;
+CREATE INDEX upload_startTime_idx ON upload (startTime);
+CREATE INDEX upload_clientIPAddr_idx ON upload (clientIPAddr);
+
--- Tables to store texts referred to by the main result table:
+-- Texts referred to by other tables:
CREATE TABLE host (
id BIGSERIAL PRIMARY KEY, value TEXT UNIQUE NOT NULL) WITH (OIDS=FALSE);
ALTER TABLE host OWNER TO postgres;
-
+CREATE INDEX host_value_idx ON host (value);
+--
CREATE TABLE platform (
id BIGSERIAL PRIMARY KEY, value TEXT UNIQUE NOT NULL) WITH (OIDS=FALSE);
ALTER TABLE platform OWNER TO postgres;
-
+CREATE INDEX platform_value_idx ON platform (value);
+--
CREATE TABLE branch (
id BIGSERIAL PRIMARY KEY, value TEXT UNIQUE NOT NULL) WITH (OIDS=FALSE);
ALTER TABLE branch OWNER TO postgres;
-
+CREATE INDEX branch_value_idx ON branch (value);
+--
CREATE TABLE sha1 (
id BIGSERIAL PRIMARY KEY, value TEXT UNIQUE NOT NULL) WITH (OIDS=FALSE);
ALTER TABLE sha1 OWNER TO postgres;
-
+CREATE INDEX sha1_value_idx ON sha1 (value);
+--
CREATE TABLE benchmark (
id BIGSERIAL PRIMARY KEY, value TEXT UNIQUE NOT NULL) WITH (OIDS=FALSE);
ALTER TABLE benchmark OWNER TO postgres;
-
+CREATE INDEX benchmark_value_idx ON benchmark (value);
+--
CREATE TABLE metric (
id BIGSERIAL PRIMARY KEY, value TEXT UNIQUE NOT NULL,
lowerIsBetter BOOLEAN NOT NULL) WITH (OIDS=FALSE);
ALTER TABLE metric OWNER TO postgres;
+CREATE INDEX metric_value_idx ON metric (value);
--- Table to store results:
-CREATE TABLE result
+
+-- Common data for a set of results.
+CREATE TABLE context
(
id BIGSERIAL PRIMARY KEY,
+ hostId BIGINT NOT NULL REFERENCES host ON DELETE CASCADE,
+ platformId BIGINT NOT NULL REFERENCES platform ON DELETE CASCADE,
+ branchId BIGINT NOT NULL REFERENCES branch ON DELETE CASCADE,
+ sha1Id BIGINT NOT NULL REFERENCES sha1 ON DELETE CASCADE,
+
+ -- The timestamp is approximated with earliest known startTime:
+ timestamp TIMESTAMP NOT NULL DEFAULT now(),
+
+ UNIQUE(hostId, platformId, branchId, sha1Id)
+) WITH (OIDS=FALSE);
+ALTER TABLE context OWNER TO postgres;
+CREATE INDEX context_host_idx ON context (hostId);
+CREATE INDEX context_platform_idx ON context (platformId);
+CREATE INDEX context_branch_idx ON context (branchId);
+CREATE INDEX context_sha1_idx ON context (sha1Id);
+CREATE INDEX context_timestamp_idx ON context (timestamp);
+
+--Benchmark results (i.e. raw values produced from executing benchmarks):
+CREATE TABLE result
+(
+ id BIGSERIAL PRIMARY KEY,
contextId BIGINT NOT NULL REFERENCES context ON DELETE CASCADE,
benchmarkId BIGINT NOT NULL REFERENCES benchmark ON DELETE CASCADE,
-
- -- Value:
value REAL NOT NULL,
valid BOOLEAN NOT NULL,
metricId BIGINT NOT NULL REFERENCES metric ON DELETE CASCADE,
-
uploadId BIGINT NOT NULL REFERENCES upload ON DELETE CASCADE
-
) WITH (OIDS=FALSE);
ALTER TABLE result OWNER TO postgres;
-
--- Indexes:
-CREATE INDEX upload_startTime_idx ON upload (startTime);
-CREATE INDEX upload_clientIPAddr_idx ON upload (clientIPAddr);
---
-CREATE INDEX host_value_idx ON host (value);
-CREATE INDEX platform_value_idx ON platform (value);
-CREATE INDEX branch_value_idx ON branch (value);
-CREATE INDEX sha1_value_idx ON sha1 (value);
-CREATE INDEX benchmark_value_idx ON benchmark (value);
-CREATE INDEX metric_value_idx ON metric (value);
---
CREATE INDEX result_context_idx ON result (contextId);
CREATE INDEX result_benchmark_idx ON result (benchmarkId);
CREATE INDEX result_metric_idx ON result (metricId);
CREATE INDEX result_valid_idx ON result (valid);
---
-CREATE INDEX context_host_idx ON result (hostId);
-CREATE INDEX context_platform_idx ON result (platformId);
-CREATE INDEX context_branch_idx ON result (branchId);
-CREATE INDEX context_sha1_idx ON result (sha1Id);
+
+
+-- Ranking statistic types:
+CREATE TABLE rankingStat (
+ id BIGSERIAL PRIMARY KEY, value TEXT UNIQUE NOT NULL) WITH (OIDS=FALSE);
+ALTER TABLE rankingStat OWNER TO postgres;
+INSERT INTO rankingStat (value) VALUES
+ ('QS'), -- Quality score
+ ('LCSSR'), -- Last chance stability score (last change regressing)
+ ('LCSSI'), -- Last change stability score (last change improving)
+ ('LCSS1R'), -- Last change stability score not considering history after
+ -- last change (last change regressing)
+ ('LCSS1I'); -- Last change stability score not considering history after
+ -- last change (last change improving)
+
+
+-- Rankings:
+CREATE TABLE ranking
+(
+ id BIGSERIAL PRIMARY KEY,
+ contextId BIGINT NOT NULL REFERENCES context ON DELETE CASCADE,
+ benchmarkId BIGINT NOT NULL REFERENCES benchmark ON DELETE CASCADE,
+ metricId BIGINT NOT NULL REFERENCES metric ON DELETE CASCADE,
+
+ -- Ranking statistic type:
+ statId BIGINT NOT NULL REFERENCES rankingStat ON DELETE CASCADE,
+
+ -- Ranking statistic value:
+ value REAL NOT NULL,
+
+ -- Ranking position:
+ pos INTEGER NOT NULL,
+
+ UNIQUE (contextId, benchmarkId, metricId, statId)
+) WITH (OIDS=FALSE);
+ALTER TABLE ranking OWNER TO postgres;
+CREATE INDEX ranking_context_idx ON ranking (contextId);
+CREATE INDEX ranking_benchmark_idx ON ranking (benchmarkId);
+CREATE INDEX ranking_metric_idx ON ranking (metricId);
+CREATE INDEX ranking_stat_idx ON ranking (statId);
+CREATE INDEX ranking_pos_idx ON ranking (pos);
diff --git a/scripts/finalizeresults.py b/scripts/finalizeresults.py
index 05f7949..60c41f0 100755
--- a/scripts/finalizeresults.py
+++ b/scripts/finalizeresults.py
@@ -3,7 +3,7 @@
import sys
from dbaccess import setDatabase, execQuery, commit
from misc import (
- textToId, getAllSnapshots, getLastRankingSnapshot, isValidSHA1,
+ textToId, getAllSnapshots, getLastRankingSnapshot, getContext, isValidSHA1,
getBMTimeSeriesStatsList)
@@ -67,8 +67,7 @@ def qualityScore(lsd, ni, nz, nc, mdrse):
# Assumptions:
# - A high value should be ranked above a small one.
# - A negative value is undefined and gets an invalid ranking position, i.e. -1.
-def registerRanking(
- table, stat_index, stat_name, host_id, platform_id, branch_id, sha1_id):
+def registerRanking(table, stat_index, stat_name, context_id):
table.sort(key=lambda x: x[stat_index], reverse=True)
@@ -101,10 +100,7 @@ def registerRanking(
# Insert or update the corresponding row in the 'ranking' table:
query = (
"SELECT merge_ranking("
- + str(host_id)
- + ", " + str(platform_id)
- + ", " + str(branch_id)
- + ", " + str(sha1_id)
+ + str(context_id)
+ ", " + str(benchmark_id)
+ ", " + str(metric_id)
+ ", " + str(stat_id)
@@ -112,7 +108,6 @@ def registerRanking(
+ ", " + str(ranking_pos)
+ ");"
)
-
execQuery(query, False)
@@ -174,7 +169,7 @@ def getFirstUploadTimestamp(snapshots, sha1_id):
# ### 2 B DOCUMENTED!
-def updateRankings(host_id, platform_id, branch_id, sha1_id):
+def updateRankings(host_id, platform_id, branch_id, sha1_id, context_id):
# Get all snapshots matching the host/platform/branch combination:
sys.stdout.write("getting snapshots ... ")
@@ -185,8 +180,8 @@ def updateRankings(host_id, platform_id, branch_id, sha1_id):
# Rankings will normally be computed once a week for each
- # host/context/branch combination (note the tradeoff between update
- # frequency and database space consumption):
+ # host/platform/branch combination (note the tradeoff between update
+ # frequency and database size):
ranking_interval = 3600 * 24 * 7 # secs in a week
# Rankings will be updated if at least one of the following
@@ -198,17 +193,12 @@ def updateRankings(host_id, platform_id, branch_id, sha1_id):
force_cond = force_ranking
if not force_cond:
- last_ranking_sha1_id = getLastRankingSnapshot(
- host_id, platform_id, branch_id, snapshots)[0]
- print "last_ranking_sha1_id:", last_ranking_sha1_id
+ last_ranking_sha1_id, last_ranking_timestamp = getLastRankingSnapshot(
+ host_id, platform_id, branch_id)
empty_cond = last_ranking_sha1_id < 0
if not empty_cond:
- last_ranking_timestamp = getFirstUploadTimestamp(
- snapshots, last_ranking_sha1_id)
- if last_ranking_timestamp < 0:
- print (
- "failed to extract last_ranking_timestamp (programming error?)")
- sys.exit(1)
+ assert last_ranking_timestamp >= 0
+
target_timestamp = getFirstUploadTimestamp(snapshots, sha1_id)
if target_timestamp < 0:
print (
@@ -273,9 +263,7 @@ def updateRankings(host_id, platform_id, branch_id, sha1_id):
for name in nameToIndex:
sys.stdout.write("registering ranking for " + name + " ...\r")
sys.stdout.flush()
- registerRanking(
- table, nameToIndex[name], name, host_id, platform_id, branch_id,
- sha1_id)
+ registerRanking(table, nameToIndex[name], name, context_id)
sys.stdout.write("\n")
@@ -320,8 +308,13 @@ if sha1_id == -1:
print "no such SHA-1:", sha1
sys.exit(1)
+context_id = getContext(host_id, platform_id, branch_id, sha1_id)
+if context_id == -1:
+ print "no results found for this host/platform/branch/SHA-1 combination"
+ sys.exit(1)
+
-updateRankings(host_id, platform_id, branch_id, sha1_id)
+updateRankings(host_id, platform_id, branch_id, sha1_id, context_id)
# Make sure everything is written to the database:
diff --git a/scripts/getstats1.py b/scripts/getstats1.py
index 37445ff..47798eb 100644
--- a/scripts/getstats1.py
+++ b/scripts/getstats1.py
@@ -84,9 +84,9 @@ class GetStats1:
# SingleContextBMStats objects.
def computeBMStatsList(self):
- # Get the context
- context = getContext(self.host_id, self.platform_id,
- self.branch_id, self.sha1_id)
+ # Get the context:
+ context = getContext(
+ self.host_id, self.platform_id, self.branch_id, self.sha1_id)
# Get all distinct benchmark/metric combinations matching the context:
bmark_metrics = execQuery("SELECT DISTINCT benchmarkId, metricId"
diff --git a/scripts/misc.py b/scripts/misc.py
index 216998c..564d8d3 100644
--- a/scripts/misc.py
+++ b/scripts/misc.py
@@ -140,35 +140,30 @@ def getAllSnapshots(host_id, platform_id, branch_id, reverse = False):
" WHERE hostId = %d"
" AND platformId = %d"
" AND branchId = %d"
- % (host_id, platform_id, branch_id) +
+ % (host_id, platform_id, branch_id) +
" ORDER BY timestamp " + ("DESC;" if reverse else "ASC;"))
return tuple(snapshots)
-# Gets the (SHA-1 ID, timestamp) pair associated with the most recent rankings
-# computed for the given host/platform/branch combination.
-# The 'snapshots' argument is a chronologically ordered n-tuple of
-# (SHA-1 ID, first upload timestamp) pairs. Returns (-1, -1) if no match
-# is found.
-def getLastRankingSnapshot(host_id, platform_id, branch_id, snapshots):
-
- # Get all SHA-1 IDs in 'ranking' for this host/platform/branch
- # combination:
- ranking_sha1_ids = zip(*execQuery(
- "SELECT DISTINCT sha1Id FROM ranking " +
- "WHERE hostId = " + str(host_id) +
- " AND platformId = " + str(platform_id) +
- " AND branchId = " + str(branch_id) +
- ";"))[0]
-
- # Find the most recent one among the ranking SHA-1 IDs:
- for sha1_id, timestamp in reversed(snapshots):
- print "sha1_id:", sha1_id
- if sha1_id in ranking_sha1_ids:
- return (sha1_id, timestamp)
-
- return (-1, -1) # not found
+# Returns the (SHA-1 ID, timestamp) pair associated with the most recent
+# rankings computed for the given host/platform/branch combination, or
+# (-1, -1) if no match is found.
+def getLastRankingSnapshot(host_id, platform_id, branch_id):
+ result = execQuery(
+ "SELECT matchingcontext.sha1id, timestamp"
+ " FROM ranking,"
+ " (SELECT id, sha1Id, timestamp"
+ " FROM context"
+ " WHERE hostId = %d"
+ " AND platformId = %d"
+ " AND branchId = %d) AS matchingContext"
+ " WHERE contextId = matchingContext.id"
+ " ORDER BY timestamp DESC LIMIT 1;"
+ % (host_id, platform_id, branch_id))
+ if len(result):
+ return query_result[0]
+ return -1, -1
# Retrieves the time series of valid median results for the given