Skip to content

Commit 42aabb9

Browse files
authored
fix: topological sort looses nodes when they are in independent clusters (#1913)
1 parent 5c300ba commit 42aabb9

File tree

1 file changed

+160
-28
lines changed

1 file changed

+160
-28
lines changed

crates/rattler_conda_types/src/repo_data/topological_sort.rs

Lines changed: 160 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -167,42 +167,63 @@ fn get_topological_order<T: AsRef<PackageRecord>>(
167167
let mut order = Vec::new();
168168
let mut visited_packages: ahash::HashSet<String> = ahash::HashSet::default();
169169
let mut stack: Vec<_> = roots.into_iter().map(Action::ResolveAndInstall).collect();
170-
while let Some(action) = stack.pop() {
171-
match action {
172-
Action::Install(package_name) => {
173-
order.push(package_name);
174-
}
175-
Action::ResolveAndInstall(package_name) => {
176-
let already_visited = !visited_packages.insert(package_name.clone());
177-
if already_visited {
178-
continue;
179-
}
180170

181-
let mut deps = match &packages.get(package_name.as_str()) {
182-
Some(p) => p
183-
.as_ref()
184-
.depends
185-
.iter()
186-
.map(|d| package_name_from_match_spec(d).to_string())
187-
.collect::<Vec<_>>(),
188-
None => {
189-
// This is a virtual package, so no real package was found for it
171+
loop {
172+
while let Some(action) = stack.pop() {
173+
match action {
174+
Action::Install(package_name) => {
175+
order.push(package_name);
176+
}
177+
Action::ResolveAndInstall(package_name) => {
178+
let already_visited = !visited_packages.insert(package_name.clone());
179+
if already_visited {
190180
continue;
191181
}
192-
};
193182

194-
// Remove the edges that form cycles
195-
deps.retain(|dep| !cycle_breaks.contains(&(package_name.clone(), dep.clone())));
183+
let mut deps = match &packages.get(package_name.as_str()) {
184+
Some(p) => p
185+
.as_ref()
186+
.depends
187+
.iter()
188+
.map(|d| package_name_from_match_spec(d).to_string())
189+
.collect::<Vec<_>>(),
190+
None => {
191+
// This is a virtual package, so no real package was found for it
192+
continue;
193+
}
194+
};
195+
196+
// Remove the edges that form cycles
197+
deps.retain(|dep| !cycle_breaks.contains(&(package_name.clone(), dep.clone())));
196198

197-
// Sorting makes this step deterministic (i.e. the same output is returned, regardless of the
198-
// original order of the input)
199-
deps.sort();
199+
// Sorting makes this step deterministic (i.e. the same output is returned, regardless of the
200+
// original order of the input)
201+
deps.sort();
200202

201-
// Install dependencies, then ourselves (the order is reversed because of the stack)
202-
stack.push(Action::Install(package_name));
203-
stack.extend(deps.into_iter().map(Action::ResolveAndInstall));
203+
// Install dependencies, then ourselves (the order is reversed because of the stack)
204+
stack.push(Action::Install(package_name));
205+
stack.extend(deps.into_iter().map(Action::ResolveAndInstall));
206+
}
204207
}
205208
}
209+
210+
// Check for any unvisited packages (disconnected components).
211+
// This can happen when a strongly connected component (cycle) has no incoming edges
212+
// from outside the component. After breaking cycle edges, no node in the component
213+
// becomes a root, so we need to explicitly add unvisited packages as new roots.
214+
let mut unvisited: Vec<_> = packages
215+
.keys()
216+
.filter(|k| !visited_packages.contains(*k))
217+
.cloned()
218+
.collect();
219+
220+
if unvisited.is_empty() {
221+
break;
222+
}
223+
224+
// Sort for determinism and add as new roots
225+
unvisited.sort();
226+
stack.extend(unvisited.into_iter().map(Action::ResolveAndInstall));
206227
}
207228

208229
// Apply the order we just obtained
@@ -1885,4 +1906,115 @@ mod tests {
18851906
python.extend(pip);
18861907
python
18871908
}
1909+
1910+
/// Test case for disconnected cycle bug
1911+
/// See: <https://github.com/prefix-dev/rattler-build/issues/1967>
1912+
///
1913+
/// The bug occurs when:
1914+
/// 1. There's a root package that doesn't connect to a cycle
1915+
/// 2. The cycle is a strongly connected component with no external incoming edges
1916+
/// 3. After breaking one cycle edge, no node in the cycle becomes a root
1917+
/// because they all still have incoming edges from within the cycle
1918+
fn get_packages_with_disconnected_cycle() -> Vec<RepoDataRecord> {
1919+
let repodata_json = r#"[
1920+
{
1921+
"name": "root-pkg",
1922+
"version": "1.0",
1923+
"build": "0",
1924+
"build_number": 0,
1925+
"subdir": "linux-64",
1926+
"depends": ["isolated-dep"],
1927+
"md5": "d7c89558ba9fa0495403155b64376d81",
1928+
"sha256": "fe51de6107f9edc7aa4f786a70f4a883943bc9d39b3bb7307c04c41410990726",
1929+
"size": 100,
1930+
"fn": "root-pkg-1.0-0.tar.bz2",
1931+
"url": "https://example.com/root-pkg-1.0-0.tar.bz2",
1932+
"channel": "https://example.com/"
1933+
},
1934+
{
1935+
"name": "isolated-dep",
1936+
"version": "1.0",
1937+
"build": "0",
1938+
"build_number": 0,
1939+
"subdir": "linux-64",
1940+
"depends": [],
1941+
"md5": "d7c89558ba9fa0495403155b64376d81",
1942+
"sha256": "fe51de6107f9edc7aa4f786a70f4a883943bc9d39b3bb7307c04c41410990726",
1943+
"size": 100,
1944+
"fn": "isolated-dep-1.0-0.tar.bz2",
1945+
"url": "https://example.com/isolated-dep-1.0-0.tar.bz2",
1946+
"channel": "https://example.com/"
1947+
},
1948+
{
1949+
"name": "cycle-a",
1950+
"version": "1.0",
1951+
"build": "0",
1952+
"build_number": 0,
1953+
"subdir": "linux-64",
1954+
"depends": ["cycle-b"],
1955+
"md5": "d7c89558ba9fa0495403155b64376d81",
1956+
"sha256": "fe51de6107f9edc7aa4f786a70f4a883943bc9d39b3bb7307c04c41410990726",
1957+
"size": 100,
1958+
"fn": "cycle-a-1.0-0.tar.bz2",
1959+
"url": "https://example.com/cycle-a-1.0-0.tar.bz2",
1960+
"channel": "https://example.com/"
1961+
},
1962+
{
1963+
"name": "cycle-b",
1964+
"version": "1.0",
1965+
"build": "0",
1966+
"build_number": 0,
1967+
"subdir": "linux-64",
1968+
"depends": ["cycle-c"],
1969+
"md5": "d7c89558ba9fa0495403155b64376d81",
1970+
"sha256": "fe51de6107f9edc7aa4f786a70f4a883943bc9d39b3bb7307c04c41410990726",
1971+
"size": 100,
1972+
"fn": "cycle-b-1.0-0.tar.bz2",
1973+
"url": "https://example.com/cycle-b-1.0-0.tar.bz2",
1974+
"channel": "https://example.com/"
1975+
},
1976+
{
1977+
"name": "cycle-c",
1978+
"version": "1.0",
1979+
"build": "0",
1980+
"build_number": 0,
1981+
"subdir": "linux-64",
1982+
"depends": ["cycle-a"],
1983+
"md5": "d7c89558ba9fa0495403155b64376d81",
1984+
"sha256": "fe51de6107f9edc7aa4f786a70f4a883943bc9d39b3bb7307c04c41410990726",
1985+
"size": 100,
1986+
"fn": "cycle-c-1.0-0.tar.bz2",
1987+
"url": "https://example.com/cycle-c-1.0-0.tar.bz2",
1988+
"channel": "https://example.com/"
1989+
}
1990+
]"#;
1991+
1992+
serde_json::from_str(repodata_json).unwrap()
1993+
}
1994+
1995+
#[test]
1996+
fn test_topological_sort_disconnected_cycle() {
1997+
// This test reproduces the bug where a disconnected cycle causes packages to be lost
1998+
// during topological sort.
1999+
//
2000+
// Structure:
2001+
// - root-pkg -> isolated-dep (connected component 1)
2002+
// - cycle-a -> cycle-b -> cycle-c -> cycle-a (connected component 2, a cycle)
2003+
//
2004+
// The cycle component has no incoming edges from outside, so after breaking
2005+
// one cycle edge, no node becomes a root and the entire cycle is lost.
2006+
let packages = get_packages_with_disconnected_cycle();
2007+
let sorted_packages = sort_topologically(packages.clone());
2008+
2009+
// The bug causes only 2 packages to be returned instead of 5
2010+
sanity_check_topological_sort(&sorted_packages, &packages);
2011+
2012+
// All 5 packages should be present
2013+
assert_eq!(
2014+
sorted_packages.len(),
2015+
5,
2016+
"Expected 5 packages, got {}. The disconnected cycle was lost!",
2017+
sorted_packages.len()
2018+
);
2019+
}
18882020
}

0 commit comments

Comments
 (0)