Added Top Changes page to replace ranking feature.

This commit introduces the Top Changes page. This page retrieves 'top 10' changes for all host/platform/branch combinations from a new 'change' database table. The script that updates this table for a given host/platform/branch combination is run automatically after uploading a new set of results (since this is when new changes may potentially arise). This commit also removes the ranking feature as this is obsoleted (more or less) by the new feature.
author: jasplin <qt-info@nokia.com> 2011-05-26 15:07:14 +0200
committer: jasplin <qt-info@nokia.com> 2011-05-26 15:07:14 +0200
commit: cac600fc0a2069e34036c5112ba4cfb8483bb559 (patch)
tree: 8fba1a4e47113b902582cc6c5f03af909b6e9e73 /scripts/computerankings.py
parent: eae8ff839296559a637ff100d7585562d68b5cb1 (diff)
1 files changed, 0 insertions, 359 deletions
diff --git a/scripts/computerankings.py b/scripts/computerankings.py
deleted file mode 100755
index 8ad1fd3..0000000
--- a/scripts/computerankings.py
+++ /dev/null
@@ -1,359 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-from dbaccess import setDatabase, execQuery, commit
-from misc import (
-    getOptions, textToId, getAllSnapshots, getLastRankingSnapshot, getContext,
-    isValidSHA1, getBMTimeSeriesStatsList)
-
-
-# --- BEGIN Global functions ----------------------------------------------
-
-def printUsage():
-    sys.stderr.write(
-        "usage: " + sys.argv[0] +
-        " --help | [--dbhost H] [--dbport P] --db D --host H --platform P "
-        "--branch B --sha1 S [--noprogress NP]\n")
-
-def printVerboseUsage():
-    printUsage()
-    sys.stderr.write("\noptions:\n")
-    sys.stderr.write(
-        "    --help: This help.\n")
-    sys.stderr.write(
-        "  --dbhost: The database server host (overriding the default).\n")
-    sys.stderr.write(
-        "  --dbport: The database server port (overriding the default).\n")
-    sys.stderr.write(
-        "      --db: The database. One of 'bm' or 'bm-dev' (the latter "
-        "intended for experimentation).\n")
-    sys.stderr.write(
-        "    --host: The physical machine on which the results were "
-        "produced (e.g. barbarella or 172.24.90.79).\n")
-    sys.stderr.write(
-        "--platform: The OS/compiler/architecture combination "
-        "(e.g. linux-g++-32).\n")
-    sys.stderr.write(
-        "  --branch: The product branch (e.g. 'qt 4.6', 'qt 4.7', or "
-        "'qt master').\n")
-    sys.stderr.write(
-        "    --sha1: The tested revision within the branch. Can be "
-        "extracted using 'git log -1 --pretty=format:%H' (assuming the "
-        "tested revision is the current head revision).\n")
-    sys.stderr.write(
-        "  --noprogress: Specify \'true\' to disable progress indicator.\n")
-
-
-# ### 2 B DOCUMENTED!
-def printProgress(p, lead):
-    sys.stdout.write(lead + " ... (" + "{0:.2f}".format(p) + " %)\r")
-    sys.stdout.flush()
-
-
-# ### 2 B DOCUMENTED!
-# NOTE: This function is currently duplicated elsewhere in JavaScipt!
-def changeMagnitudeScore(change):
-    max_change = 2.0
-    abs_change = (1.0 / change) if change < 1 else change
-    return (min(abs_change, max_change) - 1.0) / (max_change - 1.0)
-
-
-# ### 2 B DOCUMENTED!
-# NOTE: This function is currently duplicated elsewhere in JavaScript!
-def qualityScore(lsd, ni, nz, nc, mdrse):
-    max_bad_snapshots = 10 # experimental; maybe use max durability score?
-    max_sample_size = 5;
-    max_LSD = max_bad_snapshots;
-    max_NI = max_bad_snapshots * max_sample_size;
-    max_NZ = max_bad_snapshots * max_sample_size;
-    max_NC = max_bad_snapshots;
-
-    lsd_score = 0 if (lsd == -1) else min(1, lsd / float(max_LSD));
-    ni_score = min(1, ni / float(max_NI));
-    nz_score = min(1, nz / float(max_NZ));
-    nc_score = min(1, nc / float(max_NC));
-    mdrse_score = 0 if (mdrse == -1) else (mdrse / 100.0);
-
-    return (lsd_score + ni_score + nz_score + nc_score + mdrse_score) / 5.0;
-
-
-# Registers the ranking for a given statistic. context1_id and context2_id
-# refer to the first and last snapshot respectively in the interval used for
-# computing the rankings.
-# Assumptions:
-# - A high value should be ranked above a small one.
-# - A negative value is undefined and gets an invalid ranking position, i.e. -1.
-def registerRanking(table, stat_index, stat_name, context1_id, context2_id):
-
-    table.sort(key=lambda x: x[stat_index], reverse=True)
-
-    stat_id = textToId("rankingStat", stat_name)
-    assert stat_id >= 0
-
-    row_pos = 0
-    ranking_pos = 0
-    for row in table:
-        benchmark_id = row[0]
-        metric_id = row[1]
-        lc_timestamp = row[2]
-        stat_value = row[stat_index]
-
-        # The following statement ensures the following conditions:
-        # - A negative value gets an invalid ranking position, i.e. -1
-        # - Equal values get the same ranking position.
-        # - The ranking position of benchmark B indicates the number of
-        #   benchmarks ranked higher than B (i.e. having a smaller ranking
-        #   position).
-        if stat_value < 0:
-            ranking_pos = -1
-            # Note that the remaining values will now be negative, so updating
-            # row_pos and prev_stat_value is no longer necessary!
-        else:
-            if (row_pos > 0) and (stat_value != prev_stat_value):
-                ranking_pos = row_pos
-            row_pos = row_pos + 1
-            prev_stat_value = stat_value
-
-        # Insert or update the corresponding row in the 'ranking' table:
-        execQuery(
-            "SELECT merge_ranking(%s, %s, %s, %s, %s, %s, %s, %s)",
-            (context1_id, context2_id, benchmark_id, metric_id,
-             lc_timestamp, stat_id, stat_value, ranking_pos),
-            False)
-
-
-# ### 2 B DOCUMENTED!
-def getAllRankingStats(bmstats_list):
-    table = []
-    for stats in bmstats_list:
-
-        # NOTE:
-        # - All of the ranking statistics are of type "higher is better"
-        #   (a high value is ranked better than a low value).
-        # - Moreover, all present/defined values are non-negative.
-        # - This means that representing absent/undefined values as -1 is ok,
-        #   since this ensures lowest ranking.
-
-        benchmark_id = stats["benchmark_id"]
-        metric_id = stats["metric_id"]
-        lc_timestamp = stats["lc_timestamp"]
-        lsd = stats["lsd"]
-        ni = stats["ni"]
-        nz = stats["nz"]
-        nc = stats["nc"]
-        mdrse = stats["med_of_rses"]
-        rsemd = stats["rse_of_meds"]
-
-        qs = qualityScore(lsd, ni, nz, nc, mdrse)
-
-        lc = stats["lc"]
-        if lc >= 0.0:
-            lcgss = stats["lc_gsep_score"]
-            lclss = stats["lc_lsep_score"]
-            lcds1 = stats["lc_dur1_score"]
-            lcds2 = stats["lc_dur2_score"]
-            lcms = changeMagnitudeScore(lc)
-            lcss1 = lcms * lcgss * lclss * lcds1
-            lcss  = lcss1 * lcds2
-            if lc < 1.0:
-                lcssr = lcss
-                lcss1r = lcss1
-                lcssi = lcss1i = -1
-            else:
-                lcssi = lcss
-                lcss1i = lcss1
-                lcssr = lcss1r = -1
-        else:
-            lcssr = lcssi = lcss1r = lcss1i = -1
-
-        table.append(
-            (benchmark_id, metric_id, lc_timestamp, qs, lcssr, lcssi, lcss1r,
-             lcss1i))
-
-    return table
-
-
-# ### 2 B DOCUMENTED!
-def getFirstUploadTimestamp(snapshots, sha1_id):
-    try:
-        return snapshots[zip(*snapshots)[0].index(sha1_id)][1]
-    except ValueError:
-        return -1
-
-
-# ### 2 B DOCUMENTED!
-def updateRankings(
-    host_id, platform_id, branch_id, sha12_id, context2_id, no_progress):
-
-    # Get all snapshots matching the host/platform/branch combination:
-    sys.stdout.write("getting snapshots ... ")
-    sys.stdout.flush()
-    snapshots = getAllSnapshots(host_id, platform_id, branch_id)
-    sys.stdout.write("done\n")
-    sys.stdout.flush()
-
-
-    # Rankings will normally be computed once a day for each
-    # host/platform/branch combination (note the tradeoff between update
-    # frequency and database size):
-    ranking_interval = 3600 * 24 # secs in a day
-
-    # Rankings will be updated if at least one of the following
-    # conditions eventually becomes True:
-    force_cond = empty_cond = interval_cond = False
-
-    force_ranking = False
-    #force_ranking = True # Uncomment for testing
-
-    force_cond = force_ranking
-
-    if not force_cond:
-        last_ranking_sha1_id, last_ranking_timestamp = getLastRankingSnapshot(
-            host_id, platform_id, branch_id)
-        empty_cond = last_ranking_sha1_id < 0
-        if not empty_cond:
-            assert last_ranking_timestamp >= 0
-
-            target_timestamp = getFirstUploadTimestamp(snapshots, sha12_id)
-            if target_timestamp < 0:
-                sys.stderr.write(
-                "error: failed to extract target_timestamp "
-                "(error in command-line args?)\n")
-                sys.exit(1)
-
-            interval_cond = (
-                (target_timestamp - last_ranking_timestamp) > ranking_interval)
-
-    if not (force_cond or empty_cond or interval_cond):
-        sys.stdout.write(
-            "not updating rankings ('force', 'empty', and 'interval' "
-            "conditions all failed)\n")
-        return
-
-    sys.stdout.write(
-        "updating rankings ('force' cond.: " + str(force_cond) +
-        "; 'empty' cond.: " + str(empty_cond) +
-        "; 'interval' cond.: " + str(interval_cond) + ") ...\n")
-
-    # For simplicity we hardcode the tolerances for now:
-    difftol = 1.1
-    durtolmin = 3
-    durtolmax = 10
-
-    # Determine the target snapshot range:
-    # (The range should end at the snapshot given on the command-line and begin
-    # at the snapshot that is 2 * durtolmax snapshots back in time, or, if no
-    # such snapshot exists, the first available snapshot.)
-    try:
-        sha12_pos = zip(*snapshots)[0].index(sha12_id)
-    except ValueError:
-        sys.stderr.write(
-            "no observations found for SHA-1 ID: " + str(sha12_id) + "\n")
-        sys.exit(1)
-    sha11_pos = max(0, (sha12_pos - 2 * durtolmax) + 1)
-    snapshots = snapshots[sha11_pos:(sha12_pos + 1)]
-    if len(snapshots) < 2:
-        sys.stderr.write(
-            "no observations found before SHA-1 ID: " + str(sha12_id) +
-            " (computing rankings makes no sense)\n")
-        sys.exit(1)
-
-    # Get time series statistics for all benchmarks:
-    if no_progress:
-        sys.stdout.write("getting time series statistics ... ")
-    bmstats_list = getBMTimeSeriesStatsList(
-        host_id, platform_id, branch_id, snapshots, None, difftol, durtolmin,
-        durtolmax, None if no_progress else printProgress,
-        "getting time series statistics")
-
-    if no_progress:
-        sys.stdout.write("done\n")
-    else:
-        sys.stdout.write("\n")
-
-
-    # *** Compute rankings **************************************************
-
-    # Step 1: Create a table containing all ranking statistics (one row per
-    #         benchmark/metric):
-    sys.stdout.write("creating table for all ranking stats ... ")
-    sys.stdout.flush()
-    table = getAllRankingStats(bmstats_list)
-    sys.stdout.write("done\n")
-    sys.stdout.flush()
-
-    # Step 2: Sort the table individually for each ranking statistic and
-    # register the ranking positions in the database:
-    context1_id = getContext(host_id, platform_id, branch_id, snapshots[0][0])
-    if context1_id == -1:
-        sys.stderr.write("error: failed to find context for start snapshot\n")
-        sys.exit(1)
-    nameToIndex = { "QS": 3, "LCSSR": 4, "LCSSI": 5, "LCSS1R": 6, "LCSS1I": 7 }
-    for name in nameToIndex:
-        sys.stdout.write("registering ranking for " + name + " ... ")
-        sys.stdout.flush()
-        registerRanking(
-            table, nameToIndex[name], name, context1_id, context2_id)
-        sys.stdout.write("done\n")
-        sys.stdout.flush()
-
-# --- END Global functions ----------------------------------------------
-
-
-# --- BEGIN Main program ----------------------------------------------
-
-options, http_get = getOptions()
-
-if "help" in options:
-    printVerboseUsage()
-    sys.exit(1)
-
-if (not ("db" in options and "host" in options and "platform" in options and
-    "branch" in options and "sha1" in options)):
-    printUsage()
-    sys.exit(1)
-
-if not isValidSHA1(options["sha1"]):
-    sys.stderr.write("error: invalid SHA-1: " + options["sha1"] + "\n")
-    sys.exit(1)
-
-setDatabase(
-    options["dbhost"] if "dbhost" in options else None,
-    options["dbport"] if "dbport" in options else None,
-    options["db"])
-
-host_id = textToId("host", options["host"])
-if host_id == -1:
-    sys.stderr.write("error: no such host: " + options["host"] + "\n")
-    sys.exit(1)
-platform_id = textToId("platform", options["platform"])
-if platform_id == -1:
-    sys.stderr.write("error: no such platform: " + options["platform"] + "\n")
-    sys.exit(1)
-branch_id = textToId("branch", options["branch"])
-if branch_id == -1:
-    sys.stderr.write("error: no such branch:" + options["branch"] + "\n")
-    sys.exit(1)
-sha12_id = textToId("sha1", options["sha1"])
-if sha12_id == -1:
-    sys.stderr.write("error: no such SHA-1:" + options["sha1"] + "\n")
-    sys.exit(1)
-
-context2_id = getContext(host_id, platform_id, branch_id, sha12_id)
-if context2_id == -1:
-    sys.stderr.write("error: no results found for this context\n")
-    sys.exit(1)
-
-updateRankings(
-    host_id, platform_id, branch_id, sha12_id, context2_id,
-    ("noprogress" in options) and (
-        (options["noprogress"] == "1")
-        or (options["noprogress"].lower() == "true")))
-
-# Write to database:
-commit()
-
-sys.stdout.write("rankings computation done\n")
-sys.exit(0)
-
-# --- END Main program ----------------------------------------------
author	jasplin <qt-info@nokia.com>	2011-05-26 15:07:14 +0200
committer	jasplin <qt-info@nokia.com>	2011-05-26 15:07:14 +0200
commit	cac600fc0a2069e34036c5112ba4cfb8483bb559 (patch)
tree	8fba1a4e47113b902582cc6c5f03af909b6e9e73 /scripts/computerankings.py
parent	eae8ff839296559a637ff100d7585562d68b5cb1 (diff)