HypothesisWorks · Liam-DeVoe · Mar 26, 2025 · Mar 26, 2025 · Liam-DeVoe · Mar 26, 2025
diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,5 @@
+RELEASE_TYPE: patch
+
+|st.one_of| now chooses a subset of its strategies to disable each time it generates a value. For example, it was previously unlikely that ``st.lists(st.integers() | st.floats() | st.text()`` would generate a long list containing only string values. This is now more likely, along with other uncommon combinations.
+
+This technique is called `swarm testing <https://users.cs.utah.edu/~regehr/papers/swarm12.pdf>`__, and can considerably improve bug-finding power, for instance because some features actively prevent other interesting behavior from running. See :issue:`2643` for more details.
diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/featureflags.py b/hypothesis-python/src/hypothesis/strategies/_internal/featureflags.py
@@ -58,10 +58,16 @@ def __init__(
         # original model.
         #
         # We implement this as a single 8-bit integer and enable features which
-        # score >= that value. In particular when self.__baseline is 0, all
+        # score >= that value. In particular when self.__p_disabled is 0, all
         # features will be enabled. This is so that we shrink in the direction
         # of more features being enabled.
         if self.__data is not None:
+            # this really messes up our deduplication tracking, because all 255
+            # draws are unique. But we more or less have to choose whether something
+            # is enabled on-demand with a prior probability, rather than choosing what
+            # is enabled up front, because the latter results in a very large choice
+            # sequence when there are lots of possibilities.
+            # (a tradeoff might be selecting up front when there are <= 3 options?)
             self.__p_disabled = self.__data.draw_integer(0, 254) / 255
         else:
             # If data is None we're in example mode so all that matters is the
@@ -86,9 +92,11 @@ def is_enabled(self, name: Any) -> bool:
             return not self.__is_disabled.get(name, False)
 
         data = self.__data
-
+        # TODO I wouldn't expect a span here to do anything, since it only ever
+        # encapsulates a single draw, but test_minimizes_individual_features_to_open
+        # fails without this. Can we improve the shrinker so this span isn't
+        # necessary?
         data.start_span(label=FEATURE_LABEL)
-
         # If we've already decided on this feature then we don't actually
         # need to draw anything, but we do write the same decision to the
         # input stream. This allows us to lazily decide whether a feature

diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py b/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py
@@ -28,6 +28,7 @@
     overload,
 )
 
+from hypothesis import strategies as st
 from hypothesis._settings import HealthCheck, Phase, Verbosity, settings
 from hypothesis.control import _current_build_context, current_build_context
 from hypothesis.errors import (
@@ -689,10 +690,16 @@ class OneOfStrategy(SearchStrategy[Ex]):
     """
 
     def __init__(self, strategies: Sequence[SearchStrategy[Ex]]):
+        from hypothesis.strategies._internal.featureflags import FeatureStrategy
+
         super().__init__()
         self.original_strategies = tuple(strategies)
         self.__element_strategies: Optional[Sequence[SearchStrategy[Ex]]] = None
         self.__in_branches = False
+        self.enabled_branches_strategy = st.shared(
+            FeatureStrategy(self.original_strategies),
+            key=("one_of swarm testing", self.original_strategies),
+        )
 
     def calc_is_empty(self, recur: RecurT) -> bool:
         return all(recur(e) for e in self.original_strategies)
@@ -739,9 +746,10 @@ def calc_label(self) -> int:
         )
 
     def do_draw(self, data: ConjectureData) -> Ex:
+        feature_flags = data.draw(self.enabled_branches_strategy)
         strategy = data.draw(
             SampledFromStrategy(self.element_strategies).filter(
-                lambda s: s.available(data)
+                lambda s: s.available(data) and feature_flags.is_enabled(s)
             )
         )
         return data.draw(strategy)

diff --git a/hypothesis-python/tests/cover/test_feature_flags.py b/hypothesis-python/tests/cover/test_feature_flags.py
@@ -13,42 +13,34 @@
 
 from tests.common.debug import find_any, minimal
 
-STRAT = FeatureStrategy()
-
 
 def test_can_all_be_enabled():
-    find_any(STRAT, lambda x: all(x.is_enabled(i) for i in range(100)))
+    find_any(FeatureStrategy(), lambda x: all(x.is_enabled(i) for i in range(100)))
 
 
 def test_minimizes_open():
     features = range(10)
-
-    flags = minimal(STRAT, lambda x: [x.is_enabled(i) for i in features])
-
+    flags = minimal(FeatureStrategy(), lambda x: [x.is_enabled(i) for i in features])
     assert all(flags.is_enabled(i) for i in features)
 
 
 def test_minimizes_individual_features_to_open():
     features = list(range(10))
-
     flags = minimal(
-        STRAT, lambda x: sum(x.is_enabled(i) for i in features) < len(features)
+        FeatureStrategy(),
+        lambda x: sum(x.is_enabled(i) for i in features) < len(features),
     )
 
     assert all(flags.is_enabled(i) for i in features[:-1])
     assert not flags.is_enabled(features[-1])
 
 
 def test_marks_unknown_features_as_enabled():
-    x = find_any(STRAT, lambda v: True)
-
-    assert x.is_enabled("fish")
+    assert find_any(FeatureStrategy(), lambda v: True).is_enabled("fish")
 
 
 def test_by_default_all_enabled():
-    f = FeatureFlags()
-
-    assert f.is_enabled("foo")
+    assert FeatureFlags().is_enabled("foo")
 
 
 def test_eval_featureflags_repr():
@@ -62,20 +54,17 @@ def test_eval_featureflags_repr():
 
 @given(st.data())
 def test_repr_can_be_evalled(data):
-    flags = data.draw(STRAT)
-
+    flags = data.draw(FeatureFlags())
     features = data.draw(st.lists(st.text(), unique=True))
 
     for f in features:
         flags.is_enabled(f)
 
     flags2 = eval(repr(flags))
-
     for f in features:
         assert flags2.is_enabled(f) == flags.is_enabled(f)
 
     more_features = data.draw(st.lists(st.text().filter(lambda s: s not in features)))
-
     for f in more_features:
         assert flags2.is_enabled(f)
 

diff --git a/hypothesis-python/tests/nocover/test_precise_shrinking.py b/hypothesis-python/tests/nocover/test_precise_shrinking.py
@@ -286,10 +286,9 @@ def test_function(data):
 
 
 @pytest.mark.parametrize("a", list(itertools.product(*([common_strategies[1:]] * 2))))
-@pytest.mark.parametrize("block_falsey", [False, True])
 @pytest.mark.parametrize("allow_sloppy", [False, True])
 @pytest.mark.parametrize("seed", [0, 2452, 99085240570])
-def test_always_shrinks_to_none(a, seed, block_falsey, allow_sloppy):
+def test_always_shrinks_to_none(a, seed, allow_sloppy):
     combined_strategy = st.one_of(st.none(), *a)
 
     result, value = find_random(combined_strategy, lambda x: x is not None)

diff --git a/hypothesis-python/tests/quality/test_discovery_ability.py b/hypothesis-python/tests/quality/test_discovery_ability.py
@@ -376,3 +376,33 @@ def double(x):
 test_can_produce_nasty_strings = define_test(
     text(), lambda s: s in {"NaN", "Inf", "undefined"}, p=0.01
 )
+
+oneof_strategy = lists(integers() | floats() | text() | tuples(integers()))
+
+test_oneof_produces_all_types = define_test(
+    oneof_strategy,
+    lambda v: len({type(x) for x in v}) == 4,
+    condition=lambda v: len(v) > 5,
+    p=0.8,
+)
+
+test_oneof_produces_three_types = define_test(
+    oneof_strategy,
+    lambda v: len({type(x) for x in v}) == 3,
+    condition=lambda v: len(v) > 5,
+    p=0.8,
+)
+
+test_oneof_produces_two_types = define_test(
+    oneof_strategy,
+    lambda v: len({type(x) for x in v}) == 2,
+    condition=lambda v: len(v) > 5,
+    p=0.8,
+)
+
+test_oneof_produces_one_type = define_test(
+    oneof_strategy,
+    lambda v: len({type(x) for x in v}) == 1,
+    condition=lambda v: len(v) > 5,
+    p=0.8,
+)