diff options
-rw-r--r-- | database/scripts/plpgsqlfuncs.sql | 17 | ||||
-rw-r--r-- | database/scripts/privileges-ranking.sql | 8 | ||||
-rw-r--r-- | database/scripts/privileges.sql | 7 | ||||
-rwxr-xr-x | database/scripts/reset-ranking.py | 54 | ||||
-rw-r--r-- | database/scripts/tabledefs-ranking.sql | 59 | ||||
-rw-r--r-- | database/scripts/tabledefs.sql | 131 | ||||
-rwxr-xr-x | scripts/finalizeresults.py | 41 | ||||
-rw-r--r-- | scripts/getstats1.py | 6 | ||||
-rw-r--r-- | scripts/misc.py | 43 |
9 files changed, 132 insertions, 234 deletions
diff --git a/database/scripts/plpgsqlfuncs.sql b/database/scripts/plpgsqlfuncs.sql index 7a50c95..b5c5538 100644 --- a/database/scripts/plpgsqlfuncs.sql +++ b/database/scripts/plpgsqlfuncs.sql @@ -1,25 +1,18 @@ -- Inserts or updates a row in the 'ranking' table. CREATE OR REPLACE FUNCTION merge_ranking( - hostId_ BIGINT, platformId_ BIGINT, branchId_ BIGINT, sha1Id_ BIGINT, - benchmarkId_ BIGINT, metricId_ BIGINT, statId_ BIGINT, value_ REAL, - pos_ BIGINT) RETURNS VOID AS + contextId_ BIGINT, benchmarkId_ BIGINT, metricId_ BIGINT, statId_ BIGINT, + value_ REAL, pos_ BIGINT) RETURNS VOID AS $$ BEGIN BEGIN INSERT INTO ranking( - hostId, platformId, branchId, sha1Id, benchmarkId, metricId, - statId, value, pos) - VALUES ( - hostId_, platformId_, branchId_, sha1Id_, benchmarkId_, metricId_, - statId_, value_, pos_); + contextId, benchmarkId, metricId, statId, value, pos) + VALUES (contextId_, benchmarkId_, metricId_, statId_, value_, pos_); RETURN; EXCEPTION WHEN unique_violation THEN UPDATE ranking SET value = value_, pos = pos_ - WHERE hostId = hostId_ - AND platformId = platformId_ - AND branchId = branchId_ - AND sha1Id = sha1Id_ + WHERE contextId = contextId_ AND benchmarkId = benchmarkId_ AND metricId = metricId_ AND statId = statId_; diff --git a/database/scripts/privileges-ranking.sql b/database/scripts/privileges-ranking.sql deleted file mode 100644 index 630baac..0000000 --- a/database/scripts/privileges-ranking.sql +++ /dev/null @@ -1,8 +0,0 @@ --- Define privileges for the 'bmuser': - -GRANT SELECT ON rankingStat TO bmuser; - -GRANT SELECT ON ranking TO bmuser; -GRANT INSERT ON ranking TO bmuser; -GRANT UPDATE ON ranking TO bmuser; -GRANT UPDATE ON ranking_id_seq TO bmuser; diff --git a/database/scripts/privileges.sql b/database/scripts/privileges.sql index dba42f5..a88f1f7 100644 --- a/database/scripts/privileges.sql +++ b/database/scripts/privileges.sql @@ -45,3 +45,10 @@ GRANT UPDATE ON result_id_seq TO bmuser; GRANT SELECT ON context TO bmuser; GRANT INSERT ON context TO bmuser; GRANT UPDATE ON context_id_seq TO bmuser; + +GRANT SELECT ON rankingStat TO bmuser; + +GRANT SELECT ON ranking TO bmuser; +GRANT INSERT ON ranking TO bmuser; +GRANT UPDATE ON ranking TO bmuser; +GRANT UPDATE ON ranking_id_seq TO bmuser; diff --git a/database/scripts/reset-ranking.py b/database/scripts/reset-ranking.py deleted file mode 100755 index 5f7afde..0000000 --- a/database/scripts/reset-ranking.py +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env python - -import sys -from subprocess import Popen, PIPE -import string - -""" -NOTE: This script should be run as the 'postgres' role. - -NOTE: Eventually, once the ranking feature is mature, this script should be removed - and the effect it has should be implemented in reset.py instead. -""" - -# --- Global functions ------------------------------------------------- - -def runCommand(cmd, exitOnFail = True): - print "running command >%s< ..." % string.join(cmd, " "), - p = Popen(cmd, stdout = PIPE, stderr = PIPE) - stdout, stderr = p.communicate() - if (p.returncode != 0): - print "FAIL:" - print " return code:", p.returncode - print " stdout: >%s<" % stdout.strip() - print " stderr: >%s<" % stderr.strip() - if (exitOnFail): - print "exiting ..." - sys.exit(1) - else: - print "CONTINUING ..." - else: - print "PASS" - - -# --- Main program ------------------------------------------------- - -# Ensure the intention of running this script. -reply = raw_input( - "WARNING: The 'ranking' and 'rankingStat' tables will be deleted!" - " Continue [y/N]?").strip().lower() -if (reply != "y" and reply != "yes"): - sys.exit(1) - -database = "bm" -user = "bmuser" -owner = "postgres" -tabledefs = "tabledefs-ranking.sql" -privileges = "privileges-ranking.sql" - -# Load table definitions (deleting existing tables). -runCommand(["psql", "-U", owner, "-d", database, "-f", tabledefs]) - -# Define user privileges (if necessary). -# (Note: remote client access is defined separately in pg_hba.conf) -runCommand(["psql", "-U", owner, "-d", database, "-f", privileges]) diff --git a/database/scripts/tabledefs-ranking.sql b/database/scripts/tabledefs-ranking.sql deleted file mode 100644 index 468f6fc..0000000 --- a/database/scripts/tabledefs-ranking.sql +++ /dev/null @@ -1,59 +0,0 @@ --- Table to keep available ranking statistic types: -DROP TABLE IF EXISTS rankingStat; -CREATE TABLE rankingStat ( - id BIGSERIAL PRIMARY KEY, value TEXT UNIQUE NOT NULL) WITH (OIDS=FALSE); -ALTER TABLE rankingStat OWNER TO postgres; --- Pre-populate: -INSERT INTO rankingStat (value) VALUES - ('QS'), -- Quality score - ('LCSSR'), -- Last chance stability score (last change regressing) - ('LCSSI'), -- Last change stability score (last change improving) - ('LCSS1R'), -- Last change stability score not considering history after - -- last change (last change regressing) - ('LCSS1I'); -- Last change stability score not considering history after - -- last change (last change improving) - - --- Table to store rankings: -DROP TABLE IF EXISTS ranking; -CREATE TABLE ranking -( - id BIGSERIAL PRIMARY KEY, - - -- Environment: - hostId BIGINT NOT NULL REFERENCES host ON DELETE CASCADE, - platformId BIGINT NOT NULL REFERENCES platform ON DELETE CASCADE, - - -- Revision: - branchId BIGINT NOT NULL REFERENCES branch ON DELETE CASCADE, - sha1Id BIGINT NOT NULL REFERENCES sha1 ON DELETE CASCADE, - - -- Benchmark: - benchmarkId BIGINT NOT NULL REFERENCES benchmark ON DELETE CASCADE, - - -- Metric: - metricId BIGINT NOT NULL REFERENCES metric ON DELETE CASCADE, - - -- Ranking statistic type: - statId BIGINT NOT NULL REFERENCES rankingStat ON DELETE CASCADE, - - -- Ranking statistic value: - value REAL NOT NULL, - - -- Ranking position: - pos INTEGER NOT NULL, - - UNIQUE (hostId, platformId, branchId, sha1Id, benchmarkId, metricId, statId) - -) WITH (OIDS=FALSE); -ALTER TABLE ranking OWNER TO postgres; - --- Indexes: -CREATE INDEX ranking_host_idx ON ranking (hostId); -CREATE INDEX ranking_platform_idx ON ranking (platformId); -CREATE INDEX ranking_branch_idx ON ranking (branchId); -CREATE INDEX ranking_sha1_idx ON ranking (sha1Id); -CREATE INDEX ranking_benchmark_idx ON ranking (benchmarkId); -CREATE INDEX ranking_metric_idx ON ranking (metricId); -CREATE INDEX ranking_stat_idx ON ranking (statId); -CREATE INDEX ranking_pos_idx ON ranking (pos); diff --git a/database/scripts/tabledefs.sql b/database/scripts/tabledefs.sql index 1964011..c6f94a2 100644 --- a/database/scripts/tabledefs.sql +++ b/database/scripts/tabledefs.sql @@ -1,24 +1,4 @@ --- A context stores common data for a set of results. -CREATE TABLE context -( - id BIGSERIAL PRIMARY KEY, - - -- Environment: - hostId BIGINT NOT NULL REFERENCES host ON DELETE CASCADE, - platformId BIGINT NOT NULL REFERENCES platform ON DELETE CASCADE, - - -- Revision: - branchId BIGINT NOT NULL REFERENCES branch ON DELETE CASCADE, - sha1Id BIGINT NOT NULL REFERENCES sha1 ON DELETE CASCADE, - - -- Timestamp: (approximated with earliest known startTime) - timestamp TIMESTAMP NOT NULL DEFAULT now(), - - UNIQUE(hostId, platformId, branchId, sha1Id) -) WITH (OIDS=FALSE); -ALTER TABLE context OWNER TO postgres; - --- Table to record upload events: +-- Upload events: CREATE TABLE upload ( id BIGSERIAL PRIMARY KEY, @@ -26,68 +6,119 @@ CREATE TABLE upload clientIPAddr TEXT NOT NULL DEFAULT inet_client_addr() ) WITH (OIDS=FALSE); ALTER TABLE upload OWNER TO postgres; +CREATE INDEX upload_startTime_idx ON upload (startTime); +CREATE INDEX upload_clientIPAddr_idx ON upload (clientIPAddr); + --- Tables to store texts referred to by the main result table: +-- Texts referred to by other tables: CREATE TABLE host ( id BIGSERIAL PRIMARY KEY, value TEXT UNIQUE NOT NULL) WITH (OIDS=FALSE); ALTER TABLE host OWNER TO postgres; - +CREATE INDEX host_value_idx ON host (value); +-- CREATE TABLE platform ( id BIGSERIAL PRIMARY KEY, value TEXT UNIQUE NOT NULL) WITH (OIDS=FALSE); ALTER TABLE platform OWNER TO postgres; - +CREATE INDEX platform_value_idx ON platform (value); +-- CREATE TABLE branch ( id BIGSERIAL PRIMARY KEY, value TEXT UNIQUE NOT NULL) WITH (OIDS=FALSE); ALTER TABLE branch OWNER TO postgres; - +CREATE INDEX branch_value_idx ON branch (value); +-- CREATE TABLE sha1 ( id BIGSERIAL PRIMARY KEY, value TEXT UNIQUE NOT NULL) WITH (OIDS=FALSE); ALTER TABLE sha1 OWNER TO postgres; - +CREATE INDEX sha1_value_idx ON sha1 (value); +-- CREATE TABLE benchmark ( id BIGSERIAL PRIMARY KEY, value TEXT UNIQUE NOT NULL) WITH (OIDS=FALSE); ALTER TABLE benchmark OWNER TO postgres; - +CREATE INDEX benchmark_value_idx ON benchmark (value); +-- CREATE TABLE metric ( id BIGSERIAL PRIMARY KEY, value TEXT UNIQUE NOT NULL, lowerIsBetter BOOLEAN NOT NULL) WITH (OIDS=FALSE); ALTER TABLE metric OWNER TO postgres; +CREATE INDEX metric_value_idx ON metric (value); --- Table to store results: -CREATE TABLE result + +-- Common data for a set of results. +CREATE TABLE context ( id BIGSERIAL PRIMARY KEY, + hostId BIGINT NOT NULL REFERENCES host ON DELETE CASCADE, + platformId BIGINT NOT NULL REFERENCES platform ON DELETE CASCADE, + branchId BIGINT NOT NULL REFERENCES branch ON DELETE CASCADE, + sha1Id BIGINT NOT NULL REFERENCES sha1 ON DELETE CASCADE, + + -- The timestamp is approximated with earliest known startTime: + timestamp TIMESTAMP NOT NULL DEFAULT now(), + + UNIQUE(hostId, platformId, branchId, sha1Id) +) WITH (OIDS=FALSE); +ALTER TABLE context OWNER TO postgres; +CREATE INDEX context_host_idx ON context (hostId); +CREATE INDEX context_platform_idx ON context (platformId); +CREATE INDEX context_branch_idx ON context (branchId); +CREATE INDEX context_sha1_idx ON context (sha1Id); +CREATE INDEX context_timestamp_idx ON context (timestamp); + +--Benchmark results (i.e. raw values produced from executing benchmarks): +CREATE TABLE result +( + id BIGSERIAL PRIMARY KEY, contextId BIGINT NOT NULL REFERENCES context ON DELETE CASCADE, benchmarkId BIGINT NOT NULL REFERENCES benchmark ON DELETE CASCADE, - - -- Value: value REAL NOT NULL, valid BOOLEAN NOT NULL, metricId BIGINT NOT NULL REFERENCES metric ON DELETE CASCADE, - uploadId BIGINT NOT NULL REFERENCES upload ON DELETE CASCADE - ) WITH (OIDS=FALSE); ALTER TABLE result OWNER TO postgres; - --- Indexes: -CREATE INDEX upload_startTime_idx ON upload (startTime); -CREATE INDEX upload_clientIPAddr_idx ON upload (clientIPAddr); --- -CREATE INDEX host_value_idx ON host (value); -CREATE INDEX platform_value_idx ON platform (value); -CREATE INDEX branch_value_idx ON branch (value); -CREATE INDEX sha1_value_idx ON sha1 (value); -CREATE INDEX benchmark_value_idx ON benchmark (value); -CREATE INDEX metric_value_idx ON metric (value); --- CREATE INDEX result_context_idx ON result (contextId); CREATE INDEX result_benchmark_idx ON result (benchmarkId); CREATE INDEX result_metric_idx ON result (metricId); CREATE INDEX result_valid_idx ON result (valid); --- -CREATE INDEX context_host_idx ON result (hostId); -CREATE INDEX context_platform_idx ON result (platformId); -CREATE INDEX context_branch_idx ON result (branchId); -CREATE INDEX context_sha1_idx ON result (sha1Id); + + +-- Ranking statistic types: +CREATE TABLE rankingStat ( + id BIGSERIAL PRIMARY KEY, value TEXT UNIQUE NOT NULL) WITH (OIDS=FALSE); +ALTER TABLE rankingStat OWNER TO postgres; +INSERT INTO rankingStat (value) VALUES + ('QS'), -- Quality score + ('LCSSR'), -- Last chance stability score (last change regressing) + ('LCSSI'), -- Last change stability score (last change improving) + ('LCSS1R'), -- Last change stability score not considering history after + -- last change (last change regressing) + ('LCSS1I'); -- Last change stability score not considering history after + -- last change (last change improving) + + +-- Rankings: +CREATE TABLE ranking +( + id BIGSERIAL PRIMARY KEY, + contextId BIGINT NOT NULL REFERENCES context ON DELETE CASCADE, + benchmarkId BIGINT NOT NULL REFERENCES benchmark ON DELETE CASCADE, + metricId BIGINT NOT NULL REFERENCES metric ON DELETE CASCADE, + + -- Ranking statistic type: + statId BIGINT NOT NULL REFERENCES rankingStat ON DELETE CASCADE, + + -- Ranking statistic value: + value REAL NOT NULL, + + -- Ranking position: + pos INTEGER NOT NULL, + + UNIQUE (contextId, benchmarkId, metricId, statId) +) WITH (OIDS=FALSE); +ALTER TABLE ranking OWNER TO postgres; +CREATE INDEX ranking_context_idx ON ranking (contextId); +CREATE INDEX ranking_benchmark_idx ON ranking (benchmarkId); +CREATE INDEX ranking_metric_idx ON ranking (metricId); +CREATE INDEX ranking_stat_idx ON ranking (statId); +CREATE INDEX ranking_pos_idx ON ranking (pos); diff --git a/scripts/finalizeresults.py b/scripts/finalizeresults.py index 05f7949..60c41f0 100755 --- a/scripts/finalizeresults.py +++ b/scripts/finalizeresults.py @@ -3,7 +3,7 @@ import sys from dbaccess import setDatabase, execQuery, commit from misc import ( - textToId, getAllSnapshots, getLastRankingSnapshot, isValidSHA1, + textToId, getAllSnapshots, getLastRankingSnapshot, getContext, isValidSHA1, getBMTimeSeriesStatsList) @@ -67,8 +67,7 @@ def qualityScore(lsd, ni, nz, nc, mdrse): # Assumptions: # - A high value should be ranked above a small one. # - A negative value is undefined and gets an invalid ranking position, i.e. -1. -def registerRanking( - table, stat_index, stat_name, host_id, platform_id, branch_id, sha1_id): +def registerRanking(table, stat_index, stat_name, context_id): table.sort(key=lambda x: x[stat_index], reverse=True) @@ -101,10 +100,7 @@ def registerRanking( # Insert or update the corresponding row in the 'ranking' table: query = ( "SELECT merge_ranking(" - + str(host_id) - + ", " + str(platform_id) - + ", " + str(branch_id) - + ", " + str(sha1_id) + + str(context_id) + ", " + str(benchmark_id) + ", " + str(metric_id) + ", " + str(stat_id) @@ -112,7 +108,6 @@ def registerRanking( + ", " + str(ranking_pos) + ");" ) - execQuery(query, False) @@ -174,7 +169,7 @@ def getFirstUploadTimestamp(snapshots, sha1_id): # ### 2 B DOCUMENTED! -def updateRankings(host_id, platform_id, branch_id, sha1_id): +def updateRankings(host_id, platform_id, branch_id, sha1_id, context_id): # Get all snapshots matching the host/platform/branch combination: sys.stdout.write("getting snapshots ... ") @@ -185,8 +180,8 @@ def updateRankings(host_id, platform_id, branch_id, sha1_id): # Rankings will normally be computed once a week for each - # host/context/branch combination (note the tradeoff between update - # frequency and database space consumption): + # host/platform/branch combination (note the tradeoff between update + # frequency and database size): ranking_interval = 3600 * 24 * 7 # secs in a week # Rankings will be updated if at least one of the following @@ -198,17 +193,12 @@ def updateRankings(host_id, platform_id, branch_id, sha1_id): force_cond = force_ranking if not force_cond: - last_ranking_sha1_id = getLastRankingSnapshot( - host_id, platform_id, branch_id, snapshots)[0] - print "last_ranking_sha1_id:", last_ranking_sha1_id + last_ranking_sha1_id, last_ranking_timestamp = getLastRankingSnapshot( + host_id, platform_id, branch_id) empty_cond = last_ranking_sha1_id < 0 if not empty_cond: - last_ranking_timestamp = getFirstUploadTimestamp( - snapshots, last_ranking_sha1_id) - if last_ranking_timestamp < 0: - print ( - "failed to extract last_ranking_timestamp (programming error?)") - sys.exit(1) + assert last_ranking_timestamp >= 0 + target_timestamp = getFirstUploadTimestamp(snapshots, sha1_id) if target_timestamp < 0: print ( @@ -273,9 +263,7 @@ def updateRankings(host_id, platform_id, branch_id, sha1_id): for name in nameToIndex: sys.stdout.write("registering ranking for " + name + " ...\r") sys.stdout.flush() - registerRanking( - table, nameToIndex[name], name, host_id, platform_id, branch_id, - sha1_id) + registerRanking(table, nameToIndex[name], name, context_id) sys.stdout.write("\n") @@ -320,8 +308,13 @@ if sha1_id == -1: print "no such SHA-1:", sha1 sys.exit(1) +context_id = getContext(host_id, platform_id, branch_id, sha1_id) +if context_id == -1: + print "no results found for this host/platform/branch/SHA-1 combination" + sys.exit(1) + -updateRankings(host_id, platform_id, branch_id, sha1_id) +updateRankings(host_id, platform_id, branch_id, sha1_id, context_id) # Make sure everything is written to the database: diff --git a/scripts/getstats1.py b/scripts/getstats1.py index 37445ff..47798eb 100644 --- a/scripts/getstats1.py +++ b/scripts/getstats1.py @@ -84,9 +84,9 @@ class GetStats1: # SingleContextBMStats objects. def computeBMStatsList(self): - # Get the context - context = getContext(self.host_id, self.platform_id, - self.branch_id, self.sha1_id) + # Get the context: + context = getContext( + self.host_id, self.platform_id, self.branch_id, self.sha1_id) # Get all distinct benchmark/metric combinations matching the context: bmark_metrics = execQuery("SELECT DISTINCT benchmarkId, metricId" diff --git a/scripts/misc.py b/scripts/misc.py index 216998c..564d8d3 100644 --- a/scripts/misc.py +++ b/scripts/misc.py @@ -140,35 +140,30 @@ def getAllSnapshots(host_id, platform_id, branch_id, reverse = False): " WHERE hostId = %d" " AND platformId = %d" " AND branchId = %d" - % (host_id, platform_id, branch_id) + + % (host_id, platform_id, branch_id) + " ORDER BY timestamp " + ("DESC;" if reverse else "ASC;")) return tuple(snapshots) -# Gets the (SHA-1 ID, timestamp) pair associated with the most recent rankings -# computed for the given host/platform/branch combination. -# The 'snapshots' argument is a chronologically ordered n-tuple of -# (SHA-1 ID, first upload timestamp) pairs. Returns (-1, -1) if no match -# is found. -def getLastRankingSnapshot(host_id, platform_id, branch_id, snapshots): - - # Get all SHA-1 IDs in 'ranking' for this host/platform/branch - # combination: - ranking_sha1_ids = zip(*execQuery( - "SELECT DISTINCT sha1Id FROM ranking " + - "WHERE hostId = " + str(host_id) + - " AND platformId = " + str(platform_id) + - " AND branchId = " + str(branch_id) + - ";"))[0] - - # Find the most recent one among the ranking SHA-1 IDs: - for sha1_id, timestamp in reversed(snapshots): - print "sha1_id:", sha1_id - if sha1_id in ranking_sha1_ids: - return (sha1_id, timestamp) - - return (-1, -1) # not found +# Returns the (SHA-1 ID, timestamp) pair associated with the most recent +# rankings computed for the given host/platform/branch combination, or +# (-1, -1) if no match is found. +def getLastRankingSnapshot(host_id, platform_id, branch_id): + result = execQuery( + "SELECT matchingcontext.sha1id, timestamp" + " FROM ranking," + " (SELECT id, sha1Id, timestamp" + " FROM context" + " WHERE hostId = %d" + " AND platformId = %d" + " AND branchId = %d) AS matchingContext" + " WHERE contextId = matchingContext.id" + " ORDER BY timestamp DESC LIMIT 1;" + % (host_id, platform_id, branch_id)) + if len(result): + return query_result[0] + return -1, -1 # Retrieves the time series of valid median results for the given |