# Copyright (C) 2017 The Qt Company Ltd. # SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 package QtQA::App::TestRunner::Plugin::flaky; use strict; use warnings; use Carp; use Getopt::Long qw(GetOptionsFromArray); use List::Util qw(max); use Readonly; # different flaky modes. # WORST: always take the worst result # (i.e. fail if an autotest fails at least once) Readonly my $WORST => 'worst'; # BEST: always take the best result # (i.e. pass if an autotest passes at least once) Readonly my $BEST => 'best'; # IGNORE: ignore result # (i.e. pass if the autotest is unstable, regardless of # the pass/fail result of the autotest) Readonly my $IGNORE => 'ignore'; Readonly my %FLAKY_MODES => ( $WORST => 1, $BEST => 1, $IGNORE => 1, ); sub new { my ($class, %args) = @_; $args{ attempt } = 1; # `WORST' is essentially equal to operating only in an advisory mode, # so it is the safest default. my $mode = $WORST; GetOptionsFromArray( $args{ argv }, 'flaky-mode=s' => \$mode, ) || pod2usage(1); if (!$FLAKY_MODES{ $mode }) { die "`$mode' is not a valid --flaky-mode; try one of ".join(q{,}, keys %FLAKY_MODES); } $args{ mode } = $mode; return bless \%args, $class; } sub run_completed { my ($self) = @_; my $testrunner = $self->{ testrunner }; my $proc = $testrunner->proc( ); my $status = $proc->status( ); if ($self->{ attempt } == 1) { # First try, test has failed ... if ($status) { ++$self->{ attempt }; $self->{ first_attempt_status } = $status; $testrunner->print_info( "test failed, running again to see if it is flaky...\n" ); return { retry => 1 }; } # First try, test has succeeded ... return; } # Second try, test gave same results both times... if ($status == $self->{ first_attempt_status }) { $testrunner->print_info( "test failure could be reproduced twice consecutively\n" ); return; } # Second try, test gave different results each time. return $self->handle_flaky_test( $self->{ first_attempt_status }, $status ); } sub about_to_run { my ($self, $args_ref) = @_; # on attempt other than the first, omit '-silent' argument, so we get all # details about the failure. if ($self->{ attempt } > 1) { @{ $args_ref } = grep { $_ ne '-silent' } @{ $args_ref }; } return; } # Once a test has been determined as definitely being flaky, # this function will do something based on the current flaky mode. sub handle_flaky_test { my ($self, $first_status, $second_status) = @_; if ($first_status == 0) { confess 'internal error: should not be called if test passed on first attempt'; } my $testrunner = $self->{ testrunner }; if ($second_status == 0) { $testrunner->print_info( "test failed on first attempt and passed on second attempt!\n" .' first attempt: exited with '.$self->format_status( $first_status )."\n" ); } else { $testrunner->print_info( "test failed on first and second attempts, but with different behavior each time:\n" .' first attempt: exited with '.$self->format_status( $first_status )."\n" .' second attempt: exited with '.$self->format_status( $second_status )."\n" ); } $testrunner->print_info( "the test seems to be flaky, please fix this\n" ); if ($self->{ mode } eq $IGNORE) { $testrunner->print_info( "this flaky test is being ignored\n" ); $testrunner->proc( )->{ status } = 0; } elsif ($self->{ mode } eq $BEST && $second_status == 0) { $testrunner->print_info( "this flaky test is being treated as a PASS\n" ); } else { $testrunner->print_info( "this flaky test is being treated as a FAIL\n" ); # We need to tell the caller to force a failure, otherwise it will # consider that the test has passed. Take the "worst" exit code # (well, a higher exit code doesn't necessarily imply worse results, # but testlib uses the number of failures as an exitcode, and also high # exitcodes stand out more easily in test logs). my $exitcode = max( $first_status >> 8, $second_status >> 8 ) || 1; return { force_failure_exitcode => $exitcode }; } return; } # Given an exit status, return a human-readable string for the corresponding # exit code or signal. e.g., transform 139 into "signal 11". sub format_status { my ($self, $status) = @_; if ($status == -1) { return "status $status"; } my $signal = ($status & 127); if ($signal) { return "signal $signal" } return "exit code ".($status >> 8); } # Compares two status values and returns 1 if they should be considered equal for the purpose # of determining test stability. sub status_eq { my ($self, $a, $b) = @_; # We don't care if the process dumped core, since this is done by the OS _after_ the # process already crashed. It has no bearing on the stability of a failure. $a |= 128; $b |= 128; return ($a == $b); } =head1 NAME QtQA::App::TestRunner::Plugin::flaky - try to handle unstable autotests =head1 SYNOPSIS # default: advisory mode only $ testrunner --plugin flaky --capture-logs $HOME/test-logs -- tst_flaky; echo $? ********* Start testing of tst_Flaky ********* Config: Using QTest library 5.0.0, Qt 5.0.0 PASS : tst_Flaky::initTestCase() FAIL! : tst_Flaky::some_function() (The quux was not bar) PASS : tst_Flaky::cleanupTestCase() Totals: 1 passed, 1 failed, 0 skipped ********* Finished testing of tst_Flaky ********* QtQA::App::TestRunner: test failed, running again to see if it is flaky... ********* Start testing of tst_Flaky ********* Config: Using QTest library 5.0.0, Qt 5.0.0 PASS : tst_Flaky::initTestCase() PASS : tst_Flaky::some_function() PASS : tst_Flaky::cleanupTestCase() Totals: 3 passed, 0 failed, 0 skipped ********* Finished testing of tst_Flaky ********* QtQA::App::TestRunner: test failed on first attempt and passed on second attempt! QtQA::App::TestRunner: the test seems to be flaky, please fix this QtQA::App::TestRunner: this flaky test is being treated as a FAIL 1 # can also permit or ignore flaky tests ... $ testrunner --plugin flaky --flaky-mode best -- tst_flaky; echo $? ********* Start testing of tst_Flaky ********* Config: Using QTest library 5.0.0, Qt 5.0.0 PASS : tst_Flaky::initTestCase() FAIL! : tst_Flaky::some_function() (The quux was not bar) PASS : tst_Flaky::cleanupTestCase() Totals: 1 passed, 1 failed, 0 skipped ********* Finished testing of tst_Flaky ********* QtQA::App::TestRunner: test failed, running again to see if it is flaky... ********* Start testing of tst_Flaky ********* Config: Using QTest library 5.0.0, Qt 5.0.0 PASS : tst_Flaky::initTestCase() PASS : tst_Flaky::some_function() PASS : tst_Flaky::cleanupTestCase() Totals: 3 passed, 0 failed, 0 skipped ********* Finished testing of tst_Flaky ********* QtQA::App::TestRunner: test failed on first attempt and passed on second attempt! QtQA::App::TestRunner: the test seems to be flaky, please fix this QtQA::App::TestRunner: this flaky test is being treated as a PASS 0 =head1 DESCRIPTION This plugin provides a simple mechanism to help determine if an autotest failure is stable. When active, any failing autotest will be re-run at least once to check if the failure can be reproduced. If the failing autotest was initially run with the '-silent' argument, this argument will be omitted on the second run. An autotest which fails twice in a row, but with a different exit status each time, is also considered unstable (example: a test which fails "normally" once, but segfaults at the second run). By default, the plugin does not override the pass/fail state of the test. This can be configured by the B<--flaky-mode> argument, which accepts the values: =over =item B Always take the worst result of an autotest as the canonical result (default). =item B Always take the best result of an autotest as the canonical result. =item B Ignore any autotest which gives unstable results. =back To aid in the understanding of the difference between these values, the following table is provided which enumerates all possible cases: +======================================================================================+ | flaky-mode | pass | stable fail | fail then pass | fail then fail differently | +============+========+==============+==================+==============================+ | worst | PASS | FAIL | FAIL | FAIL | | best | PASS | FAIL | PASS | FAIL | | ignore | PASS | FAIL | PASS | PASS | +======================================================================================+ =head1 CAVEATS Note that this can only prove when a test is I. Even running a test successfully one trillion times wouldn't prove that it's stable. Use of any flaky-mode other than B may lead to genuine issues being hidden indefinitely. =cut 1;