summaryrefslogtreecommitdiffstats
path: root/scripts/misc.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/misc.py')
-rw-r--r--scripts/misc.py429
1 files changed, 367 insertions, 62 deletions
diff --git a/scripts/misc.py b/scripts/misc.py
index b3a2274..8bd8516 100644
--- a/scripts/misc.py
+++ b/scripts/misc.py
@@ -45,6 +45,33 @@ def textToId(table, text):
# ### 2 B DOCUMENTED!
+def findOrInsertId(table, value, *args):
+
+ query_result = execQuery(
+ "SELECT id FROM " + table + " WHERE value = %s", (value,))
+ if len(query_result) == 1:
+ # Found, so return ID:
+ return query_result[0][0]
+
+ # Not found, so insert:
+ query = "INSERT INTO " + table + " (value"
+ for i in range(0, len(args), 2):
+ query += ", " + args[i]
+ query += ") VALUES (%s"
+ values = [value]
+ for i in range(0, len(args), 2):
+ query += ", %s"
+ values.append(args[i + 1])
+
+ # ... and retrieve ID:
+ query += ") RETURNING id"
+ query_result = execQuery(query, values)
+
+ assert len(query_result) == 1
+ return query_result[0][0]
+
+
+# ### 2 B DOCUMENTED!
# Maybe also rename to lowerIsBetter() ? (but note that a global function with
# that name already exists in uploadresults.py)
def metricIdToLowerIsBetter(metric_id):
@@ -64,6 +91,13 @@ def metricIdToLowerIsBetter(metric_id):
# Returns the non-negative ID of the given context, or -1 if not found.
def getContext(host_id, platform_id, branch_id, sha1_id):
+ global contextIdCache
+ if not 'contextIdCache' in globals():
+ contextIdCache = {}
+
+ if (host_id, platform_id, branch_id, sha1_id) in contextIdCache:
+ return contextIdCache[host_id, platform_id, branch_id, sha1_id]
+
result = execQuery(
"SELECT id FROM context"
" WHERE hostId = %s"
@@ -72,9 +106,10 @@ def getContext(host_id, platform_id, branch_id, sha1_id):
" AND sha1Id = %s"
"LIMIT 1",
(host_id, platform_id, branch_id, sha1_id))
- if len(result):
- return result[0][0]
- return -1
+ result = result[0][0] if len(result) else -1
+ contextIdCache[host_id, platform_id, branch_id, sha1_id] = result
+
+ return result
# Returns the test case, test function, and data tag components of
@@ -100,32 +135,46 @@ def getTimestampFromContext(context_id):
# Finds snapshots that match a host/platform/branch combination and that
# lie within the range
-# [sha11, sha12] if sha12_id >= 0, or
-# [sha11, +inf) if sha12_ is < 0.
+# [sha11, sha12] if both sha11_id and sha12_id are >= 0, or
+# ( -inf, sha12] if only sha11_id is < 0, or
+# [sha11, +inf) if only sha12_id is < 0, or
+# ( -inf, +inf) if both sha11_id and sha2_id are < 0
+#
# Returns a chronologically order n-tuple of 2-tuples:
# (sha1, first upload timestamp).
def getSnapshots(host_id, platform_id, branch_id, sha11_id, sha12_id):
- timestamp1 = execQuery(
+
+ timestamp1 = (execQuery(
"SELECT EXTRACT(EPOCH FROM timestamp)::INT FROM context"
" WHERE hostId = %s"
" AND platformId = %s"
" AND branchId = %s"
" AND sha1Id = %s",
(host_id, platform_id, branch_id, sha11_id))[0][0]
- if sha12_id >= 0:
- timestamp2 = execQuery(
- "SELECT EXTRACT(EPOCH FROM timestamp)::INT FROM context"
- " WHERE hostId = %s"
- " AND platformId = %s"
- " AND branchId = %s"
- " AND sha1Id = %s",
- (host_id, platform_id, branch_id, sha12_id))[0][0]
+ if (sha11_id >= 0) else -1)
+
+ timestamp2 = (execQuery(
+ "SELECT EXTRACT(EPOCH FROM timestamp)::INT FROM context"
+ " WHERE hostId = %s"
+ " AND platformId = %s"
+ " AND branchId = %s"
+ " AND sha1Id = %s",
+ (host_id, platform_id, branch_id, sha12_id))[0][0]
+ if (sha12_id >= 0) else -1)
+
+ prefix = "AND EXTRACT(EPOCH FROM timestamp)::INT"
+
+ if (timestamp1 == -1) and (timestamp2 == -1):
+ range_expr = ""
+ elif (timestamp1 >= 0) and (timestamp2 < 0):
+ range_expr = ("%s >= %d" % (prefix, timestamp1))
+ elif (timestamp1 < 0) and (timestamp2 >= 0):
+ range_expr = ("%s <= %d" % (prefix, timestamp2))
+ else:
# Ensure chronological order:
if timestamp1 > timestamp2:
timestamp1, timestamp2 = timestamp2, timestamp1
- range_expr = "BETWEEN %d AND %d" % (timestamp1, timestamp2)
- else:
- range_expr = ">= %d" % timestamp1
+ range_expr = ("%s BETWEEN %d AND %d" % (prefix, timestamp1, timestamp2))
# Each distinct SHA-1 that occurs for this host/platform/branch
# combination may occur multiple times with different upload times.
@@ -138,8 +187,7 @@ def getSnapshots(host_id, platform_id, branch_id, sha11_id, sha12_id):
" FROM context"
" WHERE hostId = %s"
" AND platformId = %s"
- " AND branchId = %s"
- " AND EXTRACT(EPOCH FROM timestamp)::INT " + range_expr +
+ " AND branchId = %s " + range_expr +
" ORDER BY timestamp ASC",
(host_id, platform_id, branch_id))
@@ -169,48 +217,8 @@ def getAllSnapshots(host_id, platform_id, branch_id, reverse = False):
return tuple(snapshots)
-# Returns the (SHA-1 ID, timestamp) pair associated with the most recent
-# rankings computed for the given host/platform/branch combination, or
-# (-1, -1) if no match is found.
-def getLastRankingSnapshot(host_id, platform_id, branch_id):
- result = execQuery(
- "SELECT matchingcontext.sha1id, EXTRACT(EPOCH FROM timestamp)::INT"
- " FROM ranking,"
- " (SELECT id, sha1Id, timestamp"
- " FROM context"
- " WHERE hostId = %s"
- " AND platformId = %s"
- " AND branchId = %s) AS matchingContext"
- " WHERE context2Id = matchingContext.id"
- " ORDER BY timestamp DESC LIMIT 1",
- (host_id, platform_id, branch_id))
- if len(result):
- return result[0]
- return -1, -1
-
-
-# For the given host/platform/branch combination, this function returns
-# all contexts for which rankings exist. The return value is a list of
-# (context ID, timestamp) pairs sorted in descending order on timestamp
-# (latest timestamp first).
-def getRankingContexts(host_id, platform_id, branch_id):
- result = execQuery(
- "SELECT DISTINCT matchingcontext.id,"
- " EXTRACT(EPOCH FROM timestamp)::INT AS etimestamp"
- " FROM ranking,"
- " (SELECT id, sha1Id, timestamp"
- " FROM context"
- " WHERE hostId = %s"
- " AND platformId = %s"
- " AND branchId = %s) AS matchingContext"
- " WHERE context2Id = matchingContext.id"
- " ORDER BY etimestamp DESC",
- (host_id, platform_id, branch_id))
- return result
-
-
-# Retrieves the time series of valid median results for the given
-# benchmark/metric combination. Only the part of the time series that
+# Retrieves the time series + additional stats of valid median results for
+# the given benchmark/metric combination. Only the part of the time series that
# is within the selected snapshot interval is considered.
#
# Returns a 7-tuple:
@@ -355,6 +363,61 @@ def getTimeSeries(
ms, lsd)
+# Retrieves the time series of valid median results for the given
+# benchmark/metric combination within the given contexts.
+# Only the part of the time series that is within the selected snapshot
+# interval is considered.
+#
+# Returns an n-tuple of 2-tuples:
+#
+# (
+# <corresponding index in the 'contexts' list>,
+# <median of valid observations or -1 if all obs. are invalid>
+# )
+#
+def getBasicTimeSeries(contexts, benchmark_id, metric_id):
+
+ # Fetch raw values:
+ assert len(contexts) > 0
+ raw_values = (execQuery(
+ "SELECT value, valid, contextId FROM result"
+ " WHERE contextId IN (%s" + ", %s"*(len(contexts) - 1) + ")" +
+ " AND benchmarkId = %s"
+ " AND metricId = %s"
+ " ORDER BY contextId",
+ tuple(contexts) + (benchmark_id, metric_id)) +
+ [(-1, -1, -1)]) # Note sentinel item
+
+ # Compute per-sample stats:
+ curr_context_id = -1
+ valid_and_positive_sample = []
+ median_obs_map = {}
+ # Loop over all observations (which are grouped on sample;
+ # note the 1-1 correspondence between samples and contexts):
+ for obs, valid, context_id in raw_values:
+ if context_id != curr_context_id:
+ # A new sample has been collected, so register it and
+ # prepare for the next one:
+ median_obs = stats.medianscore(valid_and_positive_sample) if (
+ len(valid_and_positive_sample) > 0) else -1
+ median_obs_map[curr_context_id] = median_obs
+ valid_and_positive_sample = []
+ curr_context_id = context_id
+ # Append a valid and positive observation to the current sample:
+ if valid and (obs > 0):
+ valid_and_positive_sample.append(obs)
+
+ # Order chronologically:
+ ts = []
+ index = 0
+ for context in contexts:
+ if context in median_obs_map:
+ ts.append((index, median_obs_map[context]))
+ index = index + 1
+
+ return ts
+
+
# Returns the factor by which val improves over base_val by taking the
# lower_is_better property into consideration.
# Example: base_val = 10 and val = 20 results in 0.5 if lower_is_better is true,
@@ -369,7 +432,7 @@ def metricAdjustedRatio(base_val, val, lower_is_better):
# Whether a change is significant depends on the difftol argument.
# Only positive values are considered.
#
-# The output is an n-tuple of 7-tuples, one per change:
+# The output is a list of 7-tuples, one per change:
#
# 1: Base index, i.e. the index in the time series that contains the base
# value used to compute the change.
@@ -545,6 +608,84 @@ def getChanges(time_series, lower_is_better, difftol, durtolmin, durtolmax):
return tuple(changes)
+# Extracts the (significant) changes of all time series in a given
+# host/platform/branch combination.
+#
+# The output is a list of 3-tuples, one per time series having at least one
+# change:
+#
+# 1: Benchmark ID.
+# 2: Metric ID.
+# 3: The changes as a list of 9-tuples. The first 7 elements correspond to
+# the output from getChanges() (documented elsewhere). Element 8
+# and 9 are the SHA1 ID and timestamp corresponding to the change.
+#
+def getAllChanges(
+ host_id, platform_id, branch_id, difftol, durtolmin, durtolmax,
+ progress_func = None, progress_arg = None):
+
+ if progress_func != None:
+ progress_func(0.0, progress_arg)
+
+ # Get all snapshots matching the host/platform/branch combination:
+ snapshots = getAllSnapshots(host_id, platform_id, branch_id)
+
+ # Get the list of contexts that corresponds to these snapshots in this
+ # host/platform/branch combination:
+ contexts = []
+ for sha1_id, timestamp in snapshots:
+ contexts.append(getContext(host_id, platform_id, branch_id, sha1_id))
+
+ if len(contexts) == 0:
+ return []
+
+ # Get all distinct benchmark/metric combinations that match the
+ # host/platform/branch context and are within the selected snapshot
+ # interval. Each such combination corresponds to a time series.
+ tseries_list = execQuery(
+ "SELECT DISTINCT benchmarkId, metricId FROM result"
+ " WHERE contextId IN (%s" + ", %s"*(len(contexts) - 1) + ")",
+# " WHERE contextId IN (%s" + ", %s"*(len(contexts) - 1) + ") LIMIT 10",
+ contexts)
+
+ changes = []
+
+ # Loop over time series:
+ if progress_func != None:
+ i = 0
+ for benchmark_id, metric_id in tseries_list:
+
+ # Get the time series (without extra stats):
+ time_series = getBasicTimeSeries(contexts, benchmark_id, metric_id)
+
+ # Extract the significant changes:
+ basic_tschanges = getChanges(
+ time_series, metricIdToLowerIsBetter(metric_id), difftol,
+ durtolmin, durtolmax)
+ # ... add the SHA-1 and timestamp to each item:
+ tschanges = []
+ for change in basic_tschanges:
+ index = time_series[change[1]][0]
+ sha1_id = snapshots[index][0]
+ timestamp = snapshots[index][1]
+ tschanges.append(change + (sha1_id, timestamp))
+
+ if len(tschanges) > 0:
+ changes.append((benchmark_id, metric_id, tschanges))
+
+ if progress_func != None:
+ i = i + 1
+ divisor = len(tseries_list) // 100 # report at most 100 times
+ if (divisor > 0) and ((i % divisor) == 0):
+ perc_done = (i / float(len(tseries_list))) * 100.0
+ progress_func(perc_done, progress_arg)
+
+ if progress_func != None:
+ progress_func(100.0, progress_arg)
+
+ return changes
+
+
# ### 2 B DOCUMENTED!
def getTimeSeriesMiscStats(time_series, changes, snapshots, stats):
if len(changes) > 0:
@@ -654,6 +795,170 @@ def getBMTimeSeriesStatsList(
return tuple(bmstats_list)
+# Returns the score for the most recent change in 'changes'.
+# If 'regression' is true, only regressions are considered, and otherwise
+# only improvements.
+# If 'premature' is true, the premature score (not considering post-change
+# durability) is returned instead of the regular score.
+#
+# Returns -1 if no score is found.
+#
+
+
+# OBSOLETE ???
+
+
+def getLastChangeScore(changes, regression, premature):
+ pass
+
+
+# Returns the highest score for any change in a certain time interval.
+#
+# The time interval is defined like this: <2 B DONE!>
+#
+# If 'regression' is true, only regressions are considered, and otherwise
+# only improvements.
+# If 'premature' is true, the premature score (not considering post-change
+# durability) is returned instead of the regular score.
+#
+# Returns -1 if no score is found.
+#
+
+
+#
+# OBSOLETE ???
+
+
+def getHighestChangeScore(changes, regression, premature, days):
+ pass
+
+
+# Computes change scores for each time series (benchmark/metric combination)
+# of the given host/platform/branch combination.
+#
+# Change types:
+# - most recent change (i.e. significant regression or improvement)
+# - strongest change last n days, for n in {7, 30, 180, and -1 (infinite)}
+# ... and all of these with and without inclusion of premature changes.
+#
+# ADD MORE DOCS HERE ... 2 B DONE!
+#
+
+
+# OBSOLETE ???
+
+
+# def getChangeScores(
+# host_id, platform_id, branch_id, snapshots, test_case_filter,
+# difftol, durtolmin, durtolmax, progress_func = None, progress_arg = None):
+
+# if progress_func != None:
+# progress_func(0.0, progress_arg)
+
+# contexts = []
+# for sha1_id, timestamp in snapshots:
+# contexts.append(getContext(host_id, platform_id, branch_id, sha1_id))
+
+# # Get all distinct benchmark/metric combinations that match the
+# # host/platform/branch context and are within the selected snapshot
+# # interval. Each such combination corresponds to a time series.
+# assert len(contexts) > 0
+# tseries = execQuery(
+# "SELECT DISTINCT benchmarkId, metricId FROM result"
+# " WHERE contextId IN (%s" + ", %s"*(len(contexts) - 1) + ")",
+# contexts)
+
+# scores = []
+
+# # Loop over time series:
+# if progress_func != None:
+# i = 0
+# #for benchmark_id, metric_id in tseries[800:810]:
+# for benchmark_id, metric_id in tseries:
+
+# benchmark = idToText("benchmark", benchmark_id)
+# #if benchmark != "tst_qmetaobject:indexOfMethod(_q_columnsAboutToBeRemoved(QModelIndex,int,int))":
+# # continue
+
+# test_case, test_function, data_tag = (
+# benchmarkToComponents(benchmark))
+
+# # Skip this time series if it doesn't match the test case filter:
+# if ((test_case_filter != None)
+# and (not test_case in test_case_filter)):
+# continue
+
+# # Get the time series (without extra stats):
+# time_series = getBasicTimeSeries(
+# host_id, platform_id, branch_id, snapshots, benchmark_id, metric_id)
+
+# # Extract the significant changes:
+# changes = getChanges(
+# time_series, metricIdToLowerIsBetter(metric_id), difftol,
+# durtolmin, durtolmax)
+
+# tsscores = {}
+
+# tsscores["benchmark_id"] = benchmark_id
+# tsscores["metric_id"] = metric_id
+
+# tsscores["regr_last"] = getLastChangeScore(changes, True, False)
+# tsscores["regr_last_pmt"] = getLastChangeScore(changes, True, True)
+# tsscores["impr_last"] = getLastChangeScore(changes, False, False)
+# tsscores["impr_last_pmt"] = getLastChangeScore(changes, False, True)
+
+# tsscores["regr_7"] = getHighestChangeScore(
+# changes, True, False, 7)
+# tsscores["regr_7_pmt"] = getHighestChangeScore(
+# changes, True, True, 7)
+# tsscores["impr_7"] = getHighestChangeScore(
+# changes, False, False, 7)
+# tsscores["impr_7_pmt"] = getHighestChangeScore(
+# changes, False, True, 7)
+
+# tsscores["regr_30"] = getHighestChangeScore(
+# changes, True, False, 30)
+# tsscores["regr_30_pmt"] = getHighestChangeScore(
+# changes, True, True, 30)
+# tsscores["impr_30"] = getHighestChangeScore(
+# changes, False, False, 30)
+# tsscores["impr_30_pmt"] = getHighestChangeScore(
+# changes, False, True, 30)
+
+# tsscores["regr_180"] = getHighestChangeScore(
+# changes, True, False, 180)
+# tsscores["regr_180_pmt"] = getHighestChangeScore(
+# changes, True, True, 180)
+# tsscores["impr_180"] = getHighestChangeScore(
+# changes, False, False, 180)
+# tsscores["impr_180_pmt"] = getHighestChangeScore(
+# changes, False, True, 180)
+
+# tsscores["regr_all"] = getHighestChangeScore(
+# changes, True, False, -1)
+# tsscores["regr_all_pmt"] = getHighestChangeScore(
+# changes, True, True, -1)
+# tsscores["impr_all"] = getHighestChangeScore(
+# changes, False, False, -1)
+# tsscores["impr_all_pmt"] = getHighestChangeScore(
+# changes, False, True, -1)
+
+# scores.append(tsscores)
+
+# if progress_func != None:
+# i = i + 1
+# divisor = len(tseries) // 100 # report at most 100 times
+# if (divisor > 0) and ((i % divisor) == 0):
+# perc_done = (i / float(len(tseries))) * 100.0
+# progress_func(perc_done, progress_arg)
+
+# if progress_func != None:
+# progress_func(100.0, progress_arg)
+
+# return scores
+
+
+
# Returns True iff s is a valid SHA-1 string.
def isValidSHA1(s):
def containsOnlyHexDigits(s):