diff options
Diffstat (limited to 'chromium/third_party/skia/tools/compare_codereview.py')
-rwxr-xr-x | chromium/third_party/skia/tools/compare_codereview.py | 414 |
1 files changed, 414 insertions, 0 deletions
diff --git a/chromium/third_party/skia/tools/compare_codereview.py b/chromium/third_party/skia/tools/compare_codereview.py new file mode 100755 index 00000000000..a58b3c697bf --- /dev/null +++ b/chromium/third_party/skia/tools/compare_codereview.py @@ -0,0 +1,414 @@ +#!/usr/bin/python2 + +# Copyright 2014 Google Inc. +# +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Skia's Chromium Codereview Comparison Script. + +This script takes two Codereview URLs, looks at the trybot results for +the two codereviews and compares the results. + +Usage: + compare_codereview.py CONTROL_URL ROLL_URL +""" + +import collections +import os +import re +import sys +import urllib2 +import HTMLParser + + +class CodeReviewHTMLParser(HTMLParser.HTMLParser): + """Parses CodeReview web page. + + Use the CodeReviewHTMLParser.parse static function to make use of + this class. + + This uses the HTMLParser class because it's the best thing in + Python's standard library. We need a little more power than a + regex. [Search for "You can't parse [X]HTML with regex." for more + information. + """ + # pylint: disable=I0011,R0904 + @staticmethod + def parse(url): + """Parses a CodeReview web pages. + + Args: + url (string), a codereview URL like this: + 'https://codereview.chromium.org/?????????'. + + Returns: + A dictionary; the keys are bot_name strings, the values + are CodeReviewHTMLParser.Status objects + """ + parser = CodeReviewHTMLParser() + try: + parser.feed(urllib2.urlopen(url).read()) + except (urllib2.URLError,): + print >> sys.stderr, 'Error getting', url + return None + parser.close() + return parser.statuses + + # namedtuples are like lightweight structs in Python. The low + # overhead of a tuple, but the ease of use of an object. + Status = collections.namedtuple('Status', ['status', 'url']) + + def __init__(self): + HTMLParser.HTMLParser.__init__(self) + self._id = None + self._status = None + self._href = None + self._anchor_data = '' + self._currently_parsing_trybotdiv = False + # statuses is a dictionary of CodeReviewHTMLParser.Status + self.statuses = {} + + def handle_starttag(self, tag, attrs): + """Overrides the HTMLParser method to implement functionality. + + [[begin standard library documentation]] + This method is called to handle the start of a tag + (e.g. <div id="main">). + + The tag argument is the name of the tag converted to lower + case. The attrs argument is a list of (name, value) pairs + containing the attributes found inside the tag's <> + brackets. The name will be translated to lower case, and + quotes in the value have been removed, and character and + entity references have been replaced. + + For instance, for the tag <A HREF="http://www.cwi.nl/">, this + method would be called as handle_starttag('a', [('href', + 'http://www.cwi.nl/')]). + [[end standard library documentation]] + """ + attrs = dict(attrs) + if tag == 'div': + # We are looking for <div id="tryjobdiv*">. + id_attr = attrs.get('id','') + if id_attr.startswith('tryjobdiv'): + self._id = id_attr + if (self._id and tag == 'a' + and 'build-result' in attrs.get('class', '').split()): + # If we are already inside a <div id="tryjobdiv*">, we + # look for a link if the form + # <a class="build-result" href="*">. Then we save the + # (non-standard) status attribute and the URL. + self._status = attrs.get('status') + self._href = attrs.get('href') + self._currently_parsing_trybotdiv = True + # Start saving anchor data. + + def handle_data(self, data): + """Overrides the HTMLParser method to implement functionality. + + [[begin standard library documentation]] + This method is called to process arbitrary data (e.g. text + nodes and the content of <script>...</script> and + <style>...</style>). + [[end standard library documentation]] + """ + # Save the text inside the <a></a> tags. Assume <a> tags + # aren't nested. + if self._currently_parsing_trybotdiv: + self._anchor_data += data + + def handle_endtag(self, tag): + """Overrides the HTMLParser method to implement functionality. + + [[begin standard library documentation]] + This method is called to handle the end tag of an element + (e.g. </div>). The tag argument is the name of the tag + converted to lower case. + [[end standard library documentation]] + """ + if tag == 'a' and self._status: + # We take the accumulated self._anchor_data and save it as + # the bot name. + bot = self._anchor_data.strip() + stat = CodeReviewHTMLParser.Status(status=self._status, + url=self._href) + if bot: + # Add to accumulating dictionary. + self.statuses[bot] = stat + # Reset state to search for the next bot. + self._currently_parsing_trybotdiv = False + self._anchor_data = '' + self._status = None + self._href = None + + +class BuilderHTMLParser(HTMLParser.HTMLParser): + """parses Trybot web pages. + + Use the BuilderHTMLParser.parse static function to make use of + this class. + + This uses the HTMLParser class because it's the best thing in + Python's standard library. We need a little more power than a + regex. [Search for "You can't parse [X]HTML with regex." for more + information. + """ + # pylint: disable=I0011,R0904 + @staticmethod + def parse(url): + """Parses a Trybot web page. + + Args: + url (string), a trybot result URL. + + Returns: + An array of BuilderHTMLParser.Results, each a description + of failure results, along with an optional url + """ + parser = BuilderHTMLParser() + try: + parser.feed(urllib2.urlopen(url).read()) + except (urllib2.URLError,): + print >> sys.stderr, 'Error getting', url + return [] + parser.close() + return parser.failure_results + + Result = collections.namedtuple('Result', ['text', 'url']) + + def __init__(self): + HTMLParser.HTMLParser.__init__(self) + self.failure_results = [] + self._current_failure_result = None + self._divlevel = None + self._li_level = 0 + self._li_data = '' + self._current_failure = False + self._failure_results_url = '' + + def handle_starttag(self, tag, attrs): + """Overrides the HTMLParser method to implement functionality. + + [[begin standard library documentation]] + This method is called to handle the start of a tag + (e.g. <div id="main">). + + The tag argument is the name of the tag converted to lower + case. The attrs argument is a list of (name, value) pairs + containing the attributes found inside the tag's <> + brackets. The name will be translated to lower case, and + quotes in the value have been removed, and character and + entity references have been replaced. + + For instance, for the tag <A HREF="http://www.cwi.nl/">, this + method would be called as handle_starttag('a', [('href', + 'http://www.cwi.nl/')]). + [[end standard library documentation]] + """ + attrs = dict(attrs) + if tag == 'li': + # <li> tags can be nested. So we have to count the + # nest-level for backing out. + self._li_level += 1 + return + if tag == 'div' and attrs.get('class') == 'failure result': + # We care about this sort of thing: + # <li> + # <li> + # <li> + # <div class="failure result">...</div> + # </li> + # </li> + # We want this text here. + # </li> + if self._li_level > 0: + self._current_failure = True # Tells us to keep text. + return + + if tag == 'a' and self._current_failure: + href = attrs.get('href') + # Sometimes we want to keep the stdio url. We always + # return it, just in case. + if href.endswith('/logs/stdio'): + self._failure_results_url = href + + def handle_data(self, data): + """Overrides the HTMLParser method to implement functionality. + + [[begin standard library documentation]] + This method is called to process arbitrary data (e.g. text + nodes and the content of <script>...</script> and + <style>...</style>). + [[end standard library documentation]] + """ + if self._current_failure: + self._li_data += data + + def handle_endtag(self, tag): + """Overrides the HTMLParser method to implement functionality. + + [[begin standard library documentation]] + This method is called to handle the end tag of an element + (e.g. </div>). The tag argument is the name of the tag + converted to lower case. + [[end standard library documentation]] + """ + if tag == 'li': + self._li_level -= 1 + if 0 == self._li_level: + if self._current_failure: + result = self._li_data.strip() + first = result.split()[0] + if first: + result = re.sub( + r'^%s(\s+%s)+' % (first, first), first, result) + # Sometimes, it repeats the same thing + # multiple times. + result = re.sub(r'unexpected flaky.*', '', result) + # Remove some extra unnecessary text. + result = re.sub(r'\bpreamble\b', '', result) + result = re.sub(r'\bstdio\b', '', result) + url = self._failure_results_url + self.failure_results.append( + BuilderHTMLParser.Result(result, url)) + self._current_failure_result = None + # Reset the state. + self._current_failure = False + self._li_data = '' + self._failure_results_url = '' + + +def printer(indent, string): + """Print indented, wrapped text. + """ + def wrap_to(line, columns): + """Wrap a line to the given number of columns, return a list + of strings. + """ + ret = [] + nextline = '' + for word in line.split(): + if nextline: + if len(nextline) + 1 + len(word) > columns: + ret.append(nextline) + nextline = word + else: + nextline += (' ' + word) + else: + nextline = word + if nextline: + ret.append(nextline) + return ret + out = sys.stdout + spacer = ' ' + for line in string.split('\n'): + for i, wrapped_line in enumerate(wrap_to(line, 68 - (2 * indent))): + out.write(spacer * indent) + if i > 0: + out.write(spacer) + out.write(wrapped_line) + out.write('\n') + out.flush() + + +def main(control_url, roll_url, verbosity=1): + """Compare two Codereview URLs + + Args: + control_url, roll_url: (strings) URL of the format + https://codereview.chromium.org/????????? + + verbosity: (int) verbose level. 0, 1, or 2. + """ + # pylint: disable=I0011,R0914,R0912 + control = CodeReviewHTMLParser.parse(control_url) + roll = CodeReviewHTMLParser.parse(roll_url) + all_bots = set(control) & set(roll) # Set intersection. + if not all_bots: + print >> sys.stderr, ( + 'Error: control %s and roll %s have no common trybots.' + % (list(control), list(roll))) + return + + control_name = '[control %s]' % control_url.split('/')[-1] + roll_name = '[roll %s]' % roll_url.split('/')[-1] + + out = sys.stdout + + for bot in sorted(all_bots): + if (roll[bot].status == 'success'): + if verbosity > 1: + printer(0, '==%s==' % bot) + printer(1, 'OK') + continue + + if control[bot].status != 'failure' and roll[bot].status != 'failure': + continue + printer(0, '==%s==' % bot) + + formatted_results = [] + for (status, name, url) in [ + (control[bot].status, control_name, control[bot].url), + ( roll[bot].status, roll_name, roll[bot].url)]: + lines = [] + if status == 'failure': + results = BuilderHTMLParser.parse(url) + for result in results: + formatted_result = re.sub(r'(\S*\.html) ', '\n__\g<1>\n', result.text) + # Strip runtimes. + formatted_result = re.sub(r'\(.*\)', '', formatted_result) + lines.append((2, formatted_result)) + if ('compile' in result.text or '...and more' in result.text): + lines.append((3, re.sub('/[^/]*$', '/', url) + result.url)) + formatted_results.append(lines) + + identical = formatted_results[0] == formatted_results[1] + + + for (formatted_result, (status, name, url)) in zip( + formatted_results, + [(control[bot].status, control_name, control[bot].url), + (roll[bot].status, roll_name, roll[bot].url)]): + if status != 'failure' and not identical: + printer(1, name) + printer(2, status) + elif status == 'failure': + if identical: + printer(1, control_name + ' and ' + roll_name + ' failed identically') + else: + printer(1, name) + for (indent, line) in formatted_result: + printer(indent, line) + if identical: + break + out.write('\n') + + if verbosity > 0: + # Print out summary of all of the bots. + out.write('%11s %11s %4s %s\n\n' % + ('CONTROL', 'ROLL', 'DIFF', 'BOT')) + for bot in sorted(all_bots): + if roll[bot].status == 'success': + diff = '' + elif (control[bot].status == 'success' and + roll[bot].status == 'failure'): + diff = '!!!!' + elif ('pending' in control[bot].status or + 'pending' in roll[bot].status): + diff = '....' + else: + diff = '****' + out.write('%11s %11s %4s %s\n' % ( + control[bot].status, roll[bot].status, diff, bot)) + out.write('\n') + out.flush() + +if __name__ == '__main__': + if len(sys.argv) < 3: + print >> sys.stderr, __doc__ + exit(1) + main(sys.argv[1], sys.argv[2], + int(os.environ.get('COMPARE_CODEREVIEW_VERBOSITY', 1))) + |