Skip to content

Commit 205aea9

Browse files
authored
Fix flakiness and improve cache test (#6387)
This fixes the flakiness caused by reuse of inode values when refreshing stale cache entries by keeping the relevant directory open even as it is unlinked from the filesystem. It does this in two places, as technically we had the same flakiness in two tests. However, the second test was broken and not testing what it intended to due to confusing off-by-one naming and a typo. I've tried to improve the naming, removed the typo, and added the parallel flakiness fix. This test was also egregiously slow because we ended up building too many runtimes and trying to prune stale runtimes while holding a file lock on _all_ runtimes -- a scenario that is not what the code was designed for in the first place. Fixing that makes the test go from 10s to 1s in runtime, and makes it much easier to test for flakiness. Now appears to pass 100% of the 10k runs I did. Closes #6168
1 parent b5bdfdd commit 205aea9

File tree

1 file changed

+40
-12
lines changed

1 file changed

+40
-12
lines changed

toolchain/driver/runtimes_cache_test.cpp

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -607,11 +607,19 @@ TEST_F(RuntimesCacheTest, LookupWithManyStaleRuntimes) {
607607
auto runtimes2 = *cache_.Lookup({.target = "aarch64-unknown-unknown-fresh2"});
608608

609609
// Get the Unix-like inode of the directories so we can check whether
610-
// subsequent lookups create a new directory.
610+
// subsequent lookups create a new directory. For the stale directory, we
611+
// don't just get the inode, we also create an open directory to it. This
612+
// should allow it to be replaced, but prevent the directory's inode from
613+
// being reused.
611614
auto runtimes1_inode = runtimes1.base_dir().Stat()->unix_inode();
615+
auto runtimes2_inode = runtimes2.base_dir().Stat()->unix_inode();
616+
Filesystem::Dir stale_dir =
617+
*Filesystem::Cwd().OpenDir(stale_runtimes[0].base_path());
612618
auto stale_runtimes_0_inode =
613619
stale_runtimes[0].base_dir().Stat()->unix_inode();
614-
auto runtimes2_inode = runtimes2.base_dir().Stat()->unix_inode();
620+
// Confirm that our extra open directory points to the some entity as the
621+
// runtimes has open.
622+
ASSERT_THAT(stale_dir.Stat()->unix_inode(), Eq(stale_runtimes_0_inode));
615623

616624
// Now adjust their age backwards in time by two years to make them very, very
617625
// stale.
@@ -639,15 +647,24 @@ TEST_F(RuntimesCacheTest, LookupWithManyStaleRuntimes) {
639647
EXPECT_THAT(runtimes1.base_dir().Stat()->unix_inode(), Eq(runtimes1_inode));
640648
EXPECT_THAT(runtimes2.base_dir().Stat()->unix_inode(), Eq(runtimes2_inode));
641649

642-
// One of the stale runtimes should be freshly created though.
650+
// One of the stale runtimes should be freshly created though. Note that this
651+
// is only reliable because `stale_runtimes_0_inode` remains in use with our
652+
// open `stale_dir` above. Without that, the inode could be reused and despite
653+
// being freshly created, the directory would have the same inode.
643654
EXPECT_THAT(stale_runtimes_0.base_dir().Stat()->unix_inode(),
644655
Ne(stale_runtimes_0_inode));
645656
}
646657

647658
TEST_F(RuntimesCacheTest, LookupWithTooManyRuntimes) {
648659
auto runtimes1 = *cache_.Lookup({.target = "aarch64-unknown-unknown-fresh1"});
649660
auto runtimes2 = *cache_.Lookup({.target = "aarch64-unknown-unknown-fresh2"});
650-
int n = RuntimesTestPeer::CacheMaxNumEntries();
661+
// Compute the number of runtimes to fill up the cache as the max, minus the
662+
// two created above. Note that it is important to not _overflow_ the cache
663+
// here: because we are holding all of these runtimes open, they all have
664+
// their lock files locked. This will result in an attempt to prune the cache
665+
// that tries (and fails) to acquire a lock on all N runtimes which can be
666+
// very slow.
667+
int n = RuntimesTestPeer::CacheMaxNumEntries() - 2;
651668
auto stale_runtimes = LookupNRuntimes(n);
652669

653670
// Compute stale target strings.
@@ -662,10 +679,20 @@ TEST_F(RuntimesCacheTest, LookupWithTooManyRuntimes) {
662679
auto runtimes2_inode = runtimes2.base_dir().Stat()->unix_inode();
663680
auto stale_runtimes_0_inode =
664681
stale_runtimes[0].base_dir().Stat()->unix_inode();
665-
auto stale_runtimes_n_inode =
666-
stale_runtimes.back().base_dir().Stat()->unix_inode();
667682
auto stale_runtimes_n_1_inode =
668-
std::prev(stale_runtimes.end(), 2)->base_dir().Stat()->unix_inode();
683+
stale_runtimes.back().base_dir().Stat()->unix_inode();
684+
685+
// For the n-2 stale runtime, get the inode but also open the underlying
686+
// directory so that the inode can't be reused even after pruning the runtime
687+
// below.
688+
Runtimes& stale_runtimes_n_2_orig = *std::prev(stale_runtimes.end(), 2);
689+
auto stale_runtimes_n_2_inode =
690+
stale_runtimes_n_2_orig.base_dir().Stat()->unix_inode();
691+
Filesystem::Dir stale_dir =
692+
*Filesystem::Cwd().OpenDir(stale_runtimes_n_2_orig.base_path());
693+
// Confirm that our extra open directory points to the some entity as the
694+
// runtimes has open.
695+
ASSERT_THAT(stale_dir.Stat()->unix_inode(), Eq(stale_runtimes_n_2_inode));
669696

670697
// Now manually set all the timestamps. We do this manually to avoid any
671698
// reliance on the clock behavior or the amount of time passing between lookup
@@ -695,8 +722,9 @@ TEST_F(RuntimesCacheTest, LookupWithTooManyRuntimes) {
695722
runtimes2 = *cache_.Lookup({.target = "aarch64-unknown-unknown-fresh2"});
696723
auto stale_runtimes_0 =
697724
*cache_.Lookup({.target = "aarch64-unknown-unknown0"});
698-
auto stale_runtimes_n = *cache_.Lookup({.target = stale_runtimes_n_1_target});
699725
auto stale_runtimes_n_1 =
726+
*cache_.Lookup({.target = stale_runtimes_n_1_target});
727+
auto stale_runtimes_n_2 =
700728
*cache_.Lookup({.target = stale_runtimes_n_2_target});
701729

702730
// The fresh runtimes should be preserved.
@@ -706,12 +734,12 @@ TEST_F(RuntimesCacheTest, LookupWithTooManyRuntimes) {
706734
Eq(stale_runtimes_0_inode));
707735

708736
// THe last stale runtime should have been locked and so should remain.
709-
EXPECT_THAT(stale_runtimes_n.base_dir().Stat()->unix_inode(),
710-
Eq(stale_runtimes_n_inode));
737+
EXPECT_THAT(stale_runtimes_n_1.base_dir().Stat()->unix_inode(),
738+
Eq(stale_runtimes_n_1_inode));
711739

712740
// The next to last should have been pruned and re-created though.
713-
EXPECT_THAT(stale_runtimes_n.base_dir().Stat()->unix_inode(),
714-
Ne(stale_runtimes_n_1_inode));
741+
EXPECT_THAT(stale_runtimes_n_2.base_dir().Stat()->unix_inode(),
742+
Ne(stale_runtimes_n_2_inode));
715743
}
716744

717745
} // namespace

0 commit comments

Comments
 (0)