summaryrefslogtreecommitdiffstats
path: root/src/qdoc/catch_generators/tests/generators/combinators/catch_oneof_generator.cpp
blob: 4d56662136e56aa36e18ca7337c0580d702ceeb6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
// Copyright (C) 2022 The Qt Company Ltd.
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0

#include <catch_conversions/std_catch_conversions.h>

#include <catch_generators/namespaces.h>
#include <catch_generators/generators/k_partition_of_r_generator.h>
#include <catch_generators/generators/combinators/oneof_generator.h>
#include <catch_generators/generators/combinators/cycle_generator.h>
#include <catch_generators/utilities/statistics/percentages.h>
#include <catch_generators/utilities/statistics/distribution.h>
#include <catch_generators/utilities/semantics/copy_value.h>

#include <catch/catch.hpp>

#include <cmath>
#include <iterator>
#include <vector>
#include <algorithm>
#include <unordered_map>

using namespace QDOC_CATCH_GENERATORS_ROOT_NAMESPACE;
using namespace QDOC_CATCH_GENERATORS_UTILITIES_ABSOLUTE_NAMESPACE;

SCENARIO("Choosing between one of many generators", "[OneOf][Combinators]") {
    GIVEN("Some generators producing values of the same type") {
        auto generators_amount = GENERATE(take(10, random(1, 10)));
        auto generators_values = GENERATE_COPY(take(10, chunk(generators_amount, random(0, 100000))));

        std::vector<Catch::Generators::GeneratorWrapper<int>> generators;
        generators.reserve(generators_amount);
        std::transform(
            generators_values.begin(), generators_values.end(), std::back_inserter(generators),
            [](auto& value){ return Catch::Generators::value(copy_value(value)); }
        );

        AND_GIVEN("A generator choosing between them based on some distribution") {
            std::vector<double> weights = GENERATE_COPY(take(10, k_partition_of_r(100.0, generators_amount)));
            auto choosing_generator = oneof(std::move(generators), std::move(weights));

            WHEN("A value is extracted from the choosing generator") {
                auto generated_value = GENERATE_REF(take(100, std::move(choosing_generator)));

                THEN("The generated value is a member of one of the original generators") {
                    REQUIRE(std::find(generators_values.cbegin(), generators_values.cend(), generated_value) != generators_values.cend());
                }
            }
        }

        AND_GIVEN("A generator choosing between them with the same probability") {
            auto choosing_generator = uniform_oneof(std::move(generators));

            WHEN("A value is extracted from the choosing generator") {
                auto generated_value = GENERATE_REF(take(100, std::move(choosing_generator)));

                THEN("The generated value is a member of one of the original generators") {
                    REQUIRE(std::find(generators_values.cbegin(), generators_values.cend(), generated_value) != generators_values.cend());
                }
            }
        }

        AND_GIVEN("A generator choosing between them such that each possible value has the same probability of being chosen") {
            auto choosing_generator = uniformly_valued_oneof(std::move(generators), std::vector(generators_amount, std::size_t{1}));

            WHEN("A value is extracted from the choosing generator") {
                auto generated_value = GENERATE_REF(take(100, std::move(choosing_generator)));

                THEN("The generated value is a member of one of the original generators") {
                    REQUIRE(std::find(generators_values.cbegin(), generators_values.cend(), generated_value) != generators_values.cend());
                }
            }
        }
    }
}

// TODO: The following is a generally complex test. Nonetheless, we
// can probably ease some of the complexity by moving it out into some
// generators or by abstracting it a little to remove the need to know
// some of the implementation details.
// Check if this is possible.

// REMARK: [mayfail][distribution]
// This tests cannot be precise as it depends on randomized output.
// For this reason, we mark it as !mayfail.
// This allows us to see cases where it fails without having the
// test-run itself fail.
// We generally expect this test to not fail, but it may fail randomly
// every now and then simply because of how a correctly randomized
// distribution may behave.
// As long as this test doesn't fail consistently, with values that
// shows an unsustainable deviation, it should be considered to be
// working.
SCENARIO("Observing the distribution of generators that are chosen from", "[OneOf][Combinators][Statistics][!mayfail]") {
    GIVEN("Some generators producing values of the same type") {
        std::size_t generators_amount = GENERATE(take(10, random(1, 10)));

        // REMARK: To test the distribution, we want to have some
        // amount of generators to choose from whose generated values
        // can be uniquely reconducted to the generating generator so
        // that we may count how many times a specific generator was
        // chosen.
        // The easiest way would be to have generators that produce a
        // single value.
        // Nonetheless, to test the version that provides an
        // approximate uniform distribution over the values themselves
        // correctly, we need to have generators that can produce a
        // different amount of elements.
        // When that is not the case, indeed, a generator that
        // approximately distributes uniformly over values is
        // equivalent to one that approximately distributes uniformely
        // over the generators themselves.
        // As such, we use ranges of positive integers, as they are
        // the simplest multi-valued finite generator that can be dinamically
        // construted, while still providing an easy way to infer the
        // amount of values it contains so that we can derive the
        // cardinality of our domain.
        // We produce those ranges as disjoint subsequent ranges
        // starting from 0 upward.
        // We require the ranges to be disjoint so that we do not lose
        // the ability of uniquely identifying a generator that
        // produced the value.
        //
        // To do so, we generate a series of disjoint least upper
        // bounds for the ranges.
        // Then, we produce the ith range by using the successor of
        // the (i - 1)th upper bound as its lower bound and the ith
        // upper bound as its upper bound.
        //
        // We take further care to ensure that the collection of upper
        // bounds is sorted, as this simplifies to a linear search our
        // need to index the collection of generators to find the
        // identifying generator and its associated probability.
        std::vector<std::size_t> generators_bounds(generators_amount, 0);
        std::vector<Catch::Generators::GeneratorWrapper<std::size_t>> generators;
        generators.reserve(generators_amount);

        std::size_t lowest_bound{0};
        std::size_t generators_step{1000};
        std::size_t lower_bound_offset{1};

        generators_bounds[0] = Catch::Generators::random(lowest_bound, generators_step).get();
        generators.push_back(Catch::Generators::random(lowest_bound, generators_bounds[0]));

        // We use this one to group together values that are generated
        // from the same generator and to provide an index for that
        // generator to use for finding its associated probability.
        // Since our generators are defined by their upper bounds and
        // the collection of upper bounds is sorted, the first
        // encountered upper bound that is not less than the value
        // itself must be the least upper bound of the generator that
        // produced the value.
        // Then, the index of that upper bound must be the same as the
        // index of the producing generator and its associated
        // probability.
        auto find_index_of_producing_generator = [&generators_bounds](auto value) {
            return static_cast<std::size_t>(std::distance(
                generators_bounds.begin(),
                std::find_if(generators_bounds.begin(), generators_bounds.end(), [&value](auto element){ return value <= element; })
            ));
        };

        for (std::size_t index{1}; index < generators_amount; ++index) {
            generators_bounds[index] = Catch::Generators::random(generators_bounds[index - 1] + lower_bound_offset + 1, generators_bounds[index - 1] + lower_bound_offset + 1 + generators_step).get();
            generators.push_back(Catch::Generators::random(generators_bounds[index - 1] + lower_bound_offset, generators_bounds[index]));
        }

        AND_GIVEN("A probability of being chosen, in percentage, for each of the generators, such that the sum of the percentages is one hundred") {
            std::vector<double> probabilities = GENERATE_COPY(take(10, k_partition_of_r(100.0, generators_amount)));

            AND_GIVEN("A choosing generator for those generators based on the given probabilities") {
                auto choosing_generator = oneof(std::move(generators), probabilities);

                WHEN("A certain amount of values are generated from the choosing generator") {
                    auto values = GENERATE_REF(take(1, chunk(10000, std::move(choosing_generator))));

                    THEN("The distribution of elements for each generator approximately respects the weight that was given to it") {
                        auto maybe_distribution_error{respects_distribution(
                            std::move(values),
                            find_index_of_producing_generator,
                            [&probabilities](auto key){ return probabilities[key]; }
                        )};

                        REQUIRE_FALSE(maybe_distribution_error);
                    }
                }
            }
        }

        AND_GIVEN("A choosing generator for those generators that will choose each generator with the same probability") {
            auto choosing_generator = uniform_oneof(std::move(generators));

            WHEN("A certain amount of values are generated from the choosing generator") {
                auto values = GENERATE_REF(take(1, chunk(10000, std::move(choosing_generator))));

                THEN("The distribution of elements approximates uniformity over the generators") {
                    double probability{uniform_probability(generators_amount)};

                    auto maybe_distribution_error{respects_distribution(
                        std::move(values),
                        find_index_of_producing_generator,
                        [&probability](auto _){ (void)(_); return probability; }
                    )};

                    REQUIRE_FALSE(maybe_distribution_error);
                }
            }
        }

        AND_GIVEN("A choosing generator for those generators that will choose each generator such that each possible value has the same probability of being chosen") {
            // REMARK: We need to know the total amount of
            // unique values that can be generated by our
            // generators, so that we can construct an
            // appropriate distribution.
            // Since our generators are ranges defined by the
            // collection of upper bounds we can find their
            // length by finding the difference between
            // adjacent elements of the collection.
            //
            // Some more care must be taken to ensure tha the
            // correct amount is produced.
            // Since we need our ranges to be disjoint, we
            // apply a small offset from the element of the
            // upper bounds that is used as a lower bound,
            // since that upper bound is inclusive for the
            // range that precedes the one we are making the
            // calculation for.
            //
            // Furthermore, the first range is treated
            // specially.
            // As no range precedes it, it doesn't need any
            // offset to be applied.
            // Additionally, we implicitly use 0 as the first
            // lower bound, such that the length of the first
            // range is indeed equal to its upper bound.
            //
            // To account for this, we remove that offset from
            // the total amount for each range after the first
            // one and use the first upper bound as a seeding
            // value to account for the length of the first
            // range.
            std::vector<std::size_t> generators_cardinality(generators_amount, generators_bounds[0]);

            std::adjacent_difference(generators_bounds.begin(), generators_bounds.end(), generators_bounds.begin());
            std::transform(std::next(generators_cardinality.begin()), generators_cardinality.end(), std::next(generators_cardinality.begin()), [](auto element){ return element - 1; });

            std::size_t output_cardinality{std::accumulate(generators_cardinality.begin(), generators_cardinality.end(), std::size_t{0})};

            auto choosing_generator = uniformly_valued_oneof(std::move(generators), std::move(generators_cardinality));

            WHEN("A certain amount of values are generated from the choosing generator") {
                auto values = GENERATE_REF(take(1, chunk(10000, std::move(choosing_generator))));

                THEN("The distribution of elements approximates uniformity for each value") {
                    double probability{uniform_probability(output_cardinality)};

                    auto maybe_distribution_error{respects_distribution(
                        std::move(values),
                        [](auto value){ return value; },
                        [&probability](auto _){ (void)(_); return probability; }
                    )};

                    REQUIRE_FALSE(maybe_distribution_error);
                }
            }
        }
    }
}

TEST_CASE("A generator with a weight of zero is never chosen when choosing between many generators", "[OneOf][Combinators][SpecialCase]") {
    auto excluded_value = GENERATE(take(100, random(0, 10000)));

    std::vector<Catch::Generators::GeneratorWrapper<int>> generators;
    generators.reserve(2);
    generators.emplace_back(Catch::Generators::random(excluded_value + 1, std::numeric_limits<int>::max()));
    generators.emplace_back(Catch::Generators::value(copy_value(excluded_value)));

    auto generated_value = GENERATE_REF(take(100, oneof(std::move(generators), std::vector{100.0, 0.0})));

    REQUIRE(generated_value != excluded_value);
}

TEST_CASE("The first element of the passed in generators are not lost", "[OneOf][Combinators][GeneratorFirstElement][SpecialCase]") {
    // REMARK: We want to test that, for each generator, the first
    // time it is chosen the first value is produced.
    // This is complicated because of the fact that OneOf chooses
    // random generators in a random order.
    // This means that some generators may never be chosen, never be
    // chosen more than once and so on.
    // Furthermore, this specific test is particularly important only
    // for finite generators or non-completely random, ordered,
    // infinite generators.
    // Additionally, we need to ensure that we test with multiple
    // generators, as this test is a consequence of a first bugged
    // implementation where only the first chosen generator respected
    // the first value, which would pass a test where a single
    // generator is used.
    //
    // This is non-trivial due to the randomized nature of OneOf.
    // It can be simplified if we express it in a non-deterministic
    // way and mark it as mayfail, where we can recognize with a good
    // certainty that the test is actually passing.
    //
    // To avoid having this flaky test, we approach it as follows:
    //
    // We provide some amount of infinite generators. Those generators
    // are ensured to produce one specific value as their first value
    // and then infinitely produce a different value.
    // We ensure that each generator that is provided produces unique
    // values, that is, no two generators produce a first value or 1 <
    // nth value that is equal to the one produced by another
    // generator.
    //
    // Then we pass those generators to oneof and generate enough
    // values such that at least one of the generators must have been
    // chosen twice or more, at random.
    //
    // We count the appearances of each value in the produced set.
    // Then, if a value that is generated by the 1 < nth choice of a
    // specific generator is encountered, we check that the first
    // value that the specific generator would produce is in the set
    // of values that were generated.
    // That is, if a generator has produced his non-first value, it
    // must have been chosen twice or more.
    // This in turn implies that the first time that the generator was
    // chosen, its first value was actually produced.

    struct IncreaseAfterFirst {
        std::size_t increase;
        bool first_application = true;

        std::size_t operator()(std::size_t value) {
            if (first_application) {
                first_application = false;
                return value;
            }

            return value + increase;
        }
    };

    std::size_t maximum_generator_amount{100};
    auto generators_amount = GENERATE_COPY(take(10, random(std::size_t{1}, maximum_generator_amount)));

    std::vector<Catch::Generators::GeneratorWrapper<std::size_t>> generators;
    generators.reserve(generators_amount);

    for (std::size_t index{0}; index < generators_amount; ++index) {
        generators.push_back(Catch::Generators::map(IncreaseAfterFirst{maximum_generator_amount}, cycle(Catch::Generators::value(copy_value(index)))));
    }

    auto values = GENERATE_REF(take(1, chunk(generators_amount + 1, uniform_oneof(std::move(generators)))));
    auto histogram{make_histogram(values.begin(), values.end(), [](auto e){ return e; })};

    for (std::size_t index{0}; index < generators_amount; ++index) {
        std::size_t second_value{index + maximum_generator_amount};
        histogram.try_emplace(second_value, 0);

        if (histogram[second_value] > 0) {
            REQUIRE(histogram.find(index) != histogram.end());
        }
    }
}