summaryrefslogtreecommitdiffstats
path: root/src/qdoc/catch_generators/tests/generators/combinators/catch_oneof_generator.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/qdoc/catch_generators/tests/generators/combinators/catch_oneof_generator.cpp')
-rw-r--r--src/qdoc/catch_generators/tests/generators/combinators/catch_oneof_generator.cpp362
1 files changed, 362 insertions, 0 deletions
diff --git a/src/qdoc/catch_generators/tests/generators/combinators/catch_oneof_generator.cpp b/src/qdoc/catch_generators/tests/generators/combinators/catch_oneof_generator.cpp
new file mode 100644
index 000000000..4d5666213
--- /dev/null
+++ b/src/qdoc/catch_generators/tests/generators/combinators/catch_oneof_generator.cpp
@@ -0,0 +1,362 @@
+// Copyright (C) 2022 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+
+#include <catch_conversions/std_catch_conversions.h>
+
+#include <catch_generators/namespaces.h>
+#include <catch_generators/generators/k_partition_of_r_generator.h>
+#include <catch_generators/generators/combinators/oneof_generator.h>
+#include <catch_generators/generators/combinators/cycle_generator.h>
+#include <catch_generators/utilities/statistics/percentages.h>
+#include <catch_generators/utilities/statistics/distribution.h>
+#include <catch_generators/utilities/semantics/copy_value.h>
+
+#include <catch/catch.hpp>
+
+#include <cmath>
+#include <iterator>
+#include <vector>
+#include <algorithm>
+#include <unordered_map>
+
+using namespace QDOC_CATCH_GENERATORS_ROOT_NAMESPACE;
+using namespace QDOC_CATCH_GENERATORS_UTILITIES_ABSOLUTE_NAMESPACE;
+
+SCENARIO("Choosing between one of many generators", "[OneOf][Combinators]") {
+ GIVEN("Some generators producing values of the same type") {
+ auto generators_amount = GENERATE(take(10, random(1, 10)));
+ auto generators_values = GENERATE_COPY(take(10, chunk(generators_amount, random(0, 100000))));
+
+ std::vector<Catch::Generators::GeneratorWrapper<int>> generators;
+ generators.reserve(generators_amount);
+ std::transform(
+ generators_values.begin(), generators_values.end(), std::back_inserter(generators),
+ [](auto& value){ return Catch::Generators::value(copy_value(value)); }
+ );
+
+ AND_GIVEN("A generator choosing between them based on some distribution") {
+ std::vector<double> weights = GENERATE_COPY(take(10, k_partition_of_r(100.0, generators_amount)));
+ auto choosing_generator = oneof(std::move(generators), std::move(weights));
+
+ WHEN("A value is extracted from the choosing generator") {
+ auto generated_value = GENERATE_REF(take(100, std::move(choosing_generator)));
+
+ THEN("The generated value is a member of one of the original generators") {
+ REQUIRE(std::find(generators_values.cbegin(), generators_values.cend(), generated_value) != generators_values.cend());
+ }
+ }
+ }
+
+ AND_GIVEN("A generator choosing between them with the same probability") {
+ auto choosing_generator = uniform_oneof(std::move(generators));
+
+ WHEN("A value is extracted from the choosing generator") {
+ auto generated_value = GENERATE_REF(take(100, std::move(choosing_generator)));
+
+ THEN("The generated value is a member of one of the original generators") {
+ REQUIRE(std::find(generators_values.cbegin(), generators_values.cend(), generated_value) != generators_values.cend());
+ }
+ }
+ }
+
+ AND_GIVEN("A generator choosing between them such that each possible value has the same probability of being chosen") {
+ auto choosing_generator = uniformly_valued_oneof(std::move(generators), std::vector(generators_amount, std::size_t{1}));
+
+ WHEN("A value is extracted from the choosing generator") {
+ auto generated_value = GENERATE_REF(take(100, std::move(choosing_generator)));
+
+ THEN("The generated value is a member of one of the original generators") {
+ REQUIRE(std::find(generators_values.cbegin(), generators_values.cend(), generated_value) != generators_values.cend());
+ }
+ }
+ }
+ }
+}
+
+// TODO: The following is a generally complex test. Nonetheless, we
+// can probably ease some of the complexity by moving it out into some
+// generators or by abstracting it a little to remove the need to know
+// some of the implementation details.
+// Check if this is possible.
+
+// REMARK: [mayfail][distribution]
+// This tests cannot be precise as it depends on randomized output.
+// For this reason, we mark it as !mayfail.
+// This allows us to see cases where it fails without having the
+// test-run itself fail.
+// We generally expect this test to not fail, but it may fail randomly
+// every now and then simply because of how a correctly randomized
+// distribution may behave.
+// As long as this test doesn't fail consistently, with values that
+// shows an unsustainable deviation, it should be considered to be
+// working.
+SCENARIO("Observing the distribution of generators that are chosen from", "[OneOf][Combinators][Statistics][!mayfail]") {
+ GIVEN("Some generators producing values of the same type") {
+ std::size_t generators_amount = GENERATE(take(10, random(1, 10)));
+
+ // REMARK: To test the distribution, we want to have some
+ // amount of generators to choose from whose generated values
+ // can be uniquely reconducted to the generating generator so
+ // that we may count how many times a specific generator was
+ // chosen.
+ // The easiest way would be to have generators that produce a
+ // single value.
+ // Nonetheless, to test the version that provides an
+ // approximate uniform distribution over the values themselves
+ // correctly, we need to have generators that can produce a
+ // different amount of elements.
+ // When that is not the case, indeed, a generator that
+ // approximately distributes uniformly over values is
+ // equivalent to one that approximately distributes uniformely
+ // over the generators themselves.
+ // As such, we use ranges of positive integers, as they are
+ // the simplest multi-valued finite generator that can be dinamically
+ // construted, while still providing an easy way to infer the
+ // amount of values it contains so that we can derive the
+ // cardinality of our domain.
+ // We produce those ranges as disjoint subsequent ranges
+ // starting from 0 upward.
+ // We require the ranges to be disjoint so that we do not lose
+ // the ability of uniquely identifying a generator that
+ // produced the value.
+ //
+ // To do so, we generate a series of disjoint least upper
+ // bounds for the ranges.
+ // Then, we produce the ith range by using the successor of
+ // the (i - 1)th upper bound as its lower bound and the ith
+ // upper bound as its upper bound.
+ //
+ // We take further care to ensure that the collection of upper
+ // bounds is sorted, as this simplifies to a linear search our
+ // need to index the collection of generators to find the
+ // identifying generator and its associated probability.
+ std::vector<std::size_t> generators_bounds(generators_amount, 0);
+ std::vector<Catch::Generators::GeneratorWrapper<std::size_t>> generators;
+ generators.reserve(generators_amount);
+
+ std::size_t lowest_bound{0};
+ std::size_t generators_step{1000};
+ std::size_t lower_bound_offset{1};
+
+ generators_bounds[0] = Catch::Generators::random(lowest_bound, generators_step).get();
+ generators.push_back(Catch::Generators::random(lowest_bound, generators_bounds[0]));
+
+ // We use this one to group together values that are generated
+ // from the same generator and to provide an index for that
+ // generator to use for finding its associated probability.
+ // Since our generators are defined by their upper bounds and
+ // the collection of upper bounds is sorted, the first
+ // encountered upper bound that is not less than the value
+ // itself must be the least upper bound of the generator that
+ // produced the value.
+ // Then, the index of that upper bound must be the same as the
+ // index of the producing generator and its associated
+ // probability.
+ auto find_index_of_producing_generator = [&generators_bounds](auto value) {
+ return static_cast<std::size_t>(std::distance(
+ generators_bounds.begin(),
+ std::find_if(generators_bounds.begin(), generators_bounds.end(), [&value](auto element){ return value <= element; })
+ ));
+ };
+
+ for (std::size_t index{1}; index < generators_amount; ++index) {
+ generators_bounds[index] = Catch::Generators::random(generators_bounds[index - 1] + lower_bound_offset + 1, generators_bounds[index - 1] + lower_bound_offset + 1 + generators_step).get();
+ generators.push_back(Catch::Generators::random(generators_bounds[index - 1] + lower_bound_offset, generators_bounds[index]));
+ }
+
+ AND_GIVEN("A probability of being chosen, in percentage, for each of the generators, such that the sum of the percentages is one hundred") {
+ std::vector<double> probabilities = GENERATE_COPY(take(10, k_partition_of_r(100.0, generators_amount)));
+
+ AND_GIVEN("A choosing generator for those generators based on the given probabilities") {
+ auto choosing_generator = oneof(std::move(generators), probabilities);
+
+ WHEN("A certain amount of values are generated from the choosing generator") {
+ auto values = GENERATE_REF(take(1, chunk(10000, std::move(choosing_generator))));
+
+ THEN("The distribution of elements for each generator approximately respects the weight that was given to it") {
+ auto maybe_distribution_error{respects_distribution(
+ std::move(values),
+ find_index_of_producing_generator,
+ [&probabilities](auto key){ return probabilities[key]; }
+ )};
+
+ REQUIRE_FALSE(maybe_distribution_error);
+ }
+ }
+ }
+ }
+
+ AND_GIVEN("A choosing generator for those generators that will choose each generator with the same probability") {
+ auto choosing_generator = uniform_oneof(std::move(generators));
+
+ WHEN("A certain amount of values are generated from the choosing generator") {
+ auto values = GENERATE_REF(take(1, chunk(10000, std::move(choosing_generator))));
+
+ THEN("The distribution of elements approximates uniformity over the generators") {
+ double probability{uniform_probability(generators_amount)};
+
+ auto maybe_distribution_error{respects_distribution(
+ std::move(values),
+ find_index_of_producing_generator,
+ [&probability](auto _){ (void)(_); return probability; }
+ )};
+
+ REQUIRE_FALSE(maybe_distribution_error);
+ }
+ }
+ }
+
+ AND_GIVEN("A choosing generator for those generators that will choose each generator such that each possible value has the same probability of being chosen") {
+ // REMARK: We need to know the total amount of
+ // unique values that can be generated by our
+ // generators, so that we can construct an
+ // appropriate distribution.
+ // Since our generators are ranges defined by the
+ // collection of upper bounds we can find their
+ // length by finding the difference between
+ // adjacent elements of the collection.
+ //
+ // Some more care must be taken to ensure tha the
+ // correct amount is produced.
+ // Since we need our ranges to be disjoint, we
+ // apply a small offset from the element of the
+ // upper bounds that is used as a lower bound,
+ // since that upper bound is inclusive for the
+ // range that precedes the one we are making the
+ // calculation for.
+ //
+ // Furthermore, the first range is treated
+ // specially.
+ // As no range precedes it, it doesn't need any
+ // offset to be applied.
+ // Additionally, we implicitly use 0 as the first
+ // lower bound, such that the length of the first
+ // range is indeed equal to its upper bound.
+ //
+ // To account for this, we remove that offset from
+ // the total amount for each range after the first
+ // one and use the first upper bound as a seeding
+ // value to account for the length of the first
+ // range.
+ std::vector<std::size_t> generators_cardinality(generators_amount, generators_bounds[0]);
+
+ std::adjacent_difference(generators_bounds.begin(), generators_bounds.end(), generators_bounds.begin());
+ std::transform(std::next(generators_cardinality.begin()), generators_cardinality.end(), std::next(generators_cardinality.begin()), [](auto element){ return element - 1; });
+
+ std::size_t output_cardinality{std::accumulate(generators_cardinality.begin(), generators_cardinality.end(), std::size_t{0})};
+
+ auto choosing_generator = uniformly_valued_oneof(std::move(generators), std::move(generators_cardinality));
+
+ WHEN("A certain amount of values are generated from the choosing generator") {
+ auto values = GENERATE_REF(take(1, chunk(10000, std::move(choosing_generator))));
+
+ THEN("The distribution of elements approximates uniformity for each value") {
+ double probability{uniform_probability(output_cardinality)};
+
+ auto maybe_distribution_error{respects_distribution(
+ std::move(values),
+ [](auto value){ return value; },
+ [&probability](auto _){ (void)(_); return probability; }
+ )};
+
+ REQUIRE_FALSE(maybe_distribution_error);
+ }
+ }
+ }
+ }
+}
+
+TEST_CASE("A generator with a weight of zero is never chosen when choosing between many generators", "[OneOf][Combinators][SpecialCase]") {
+ auto excluded_value = GENERATE(take(100, random(0, 10000)));
+
+ std::vector<Catch::Generators::GeneratorWrapper<int>> generators;
+ generators.reserve(2);
+ generators.emplace_back(Catch::Generators::random(excluded_value + 1, std::numeric_limits<int>::max()));
+ generators.emplace_back(Catch::Generators::value(copy_value(excluded_value)));
+
+ auto generated_value = GENERATE_REF(take(100, oneof(std::move(generators), std::vector{100.0, 0.0})));
+
+ REQUIRE(generated_value != excluded_value);
+}
+
+TEST_CASE("The first element of the passed in generators are not lost", "[OneOf][Combinators][GeneratorFirstElement][SpecialCase]") {
+ // REMARK: We want to test that, for each generator, the first
+ // time it is chosen the first value is produced.
+ // This is complicated because of the fact that OneOf chooses
+ // random generators in a random order.
+ // This means that some generators may never be chosen, never be
+ // chosen more than once and so on.
+ // Furthermore, this specific test is particularly important only
+ // for finite generators or non-completely random, ordered,
+ // infinite generators.
+ // Additionally, we need to ensure that we test with multiple
+ // generators, as this test is a consequence of a first bugged
+ // implementation where only the first chosen generator respected
+ // the first value, which would pass a test where a single
+ // generator is used.
+ //
+ // This is non-trivial due to the randomized nature of OneOf.
+ // It can be simplified if we express it in a non-deterministic
+ // way and mark it as mayfail, where we can recognize with a good
+ // certainty that the test is actually passing.
+ //
+ // To avoid having this flaky test, we approach it as follows:
+ //
+ // We provide some amount of infinite generators. Those generators
+ // are ensured to produce one specific value as their first value
+ // and then infinitely produce a different value.
+ // We ensure that each generator that is provided produces unique
+ // values, that is, no two generators produce a first value or 1 <
+ // nth value that is equal to the one produced by another
+ // generator.
+ //
+ // Then we pass those generators to oneof and generate enough
+ // values such that at least one of the generators must have been
+ // chosen twice or more, at random.
+ //
+ // We count the appearances of each value in the produced set.
+ // Then, if a value that is generated by the 1 < nth choice of a
+ // specific generator is encountered, we check that the first
+ // value that the specific generator would produce is in the set
+ // of values that were generated.
+ // That is, if a generator has produced his non-first value, it
+ // must have been chosen twice or more.
+ // This in turn implies that the first time that the generator was
+ // chosen, its first value was actually produced.
+
+ struct IncreaseAfterFirst {
+ std::size_t increase;
+ bool first_application = true;
+
+ std::size_t operator()(std::size_t value) {
+ if (first_application) {
+ first_application = false;
+ return value;
+ }
+
+ return value + increase;
+ }
+ };
+
+ std::size_t maximum_generator_amount{100};
+ auto generators_amount = GENERATE_COPY(take(10, random(std::size_t{1}, maximum_generator_amount)));
+
+ std::vector<Catch::Generators::GeneratorWrapper<std::size_t>> generators;
+ generators.reserve(generators_amount);
+
+ for (std::size_t index{0}; index < generators_amount; ++index) {
+ generators.push_back(Catch::Generators::map(IncreaseAfterFirst{maximum_generator_amount}, cycle(Catch::Generators::value(copy_value(index)))));
+ }
+
+ auto values = GENERATE_REF(take(1, chunk(generators_amount + 1, uniform_oneof(std::move(generators)))));
+ auto histogram{make_histogram(values.begin(), values.end(), [](auto e){ return e; })};
+
+ for (std::size_t index{0}; index < generators_amount; ++index) {
+ std::size_t second_value{index + maximum_generator_amount};
+ histogram.try_emplace(second_value, 0);
+
+ if (histogram[second_value] > 0) {
+ REQUIRE(histogram.find(index) != histogram.end());
+ }
+ }
+}