Skip to content

Commit 1f8119a

Browse files
committed
fix(deps): improve discrepancy detection accuracy
- Filter out base/runtime images (intentionally different per component) - Preserve full Go module paths to avoid false positives (e.g., emperror.dev/errors vs github.com/pkg/errors) - Skip Go indirect dependencies from discrepancy checks - Add category parameter to normalize_dependency_name() Reduces false positives from 18 to 6 real discrepancies. Signed-off-by: Dan Gil <[email protected]>
1 parent bfed94c commit 1f8119a

File tree

1 file changed

+48
-14
lines changed

1 file changed

+48
-14
lines changed

.github/workflows/extract_dependency_versions.py

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -432,19 +432,12 @@ def _format_dependency_name(self, name: str, category: str, version: str) -> str
432432
formatted_base = self._format_package_name(base_name, category)
433433
return f"{self._strip_version_suffixes(formatted_base)} {extras}"
434434

435-
# Handle Go modules
435+
# Handle Go modules - keep full path for uniqueness
436436
if category == "Go Module":
437-
# Extract the last meaningful part of the module path
438-
parts = name.split("/")
439-
if len(parts) > 1:
440-
# Get the package name (last part)
441-
pkg_name = parts[-1]
442-
# If it's a versioned path, use the second-to-last
443-
if pkg_name.startswith("v") and pkg_name[1:].replace(".", "").isdigit():
444-
pkg_name = parts[-2] if len(parts) > 2 else pkg_name
445-
return self._strip_version_suffixes(
446-
self._format_package_name(pkg_name, category)
447-
)
437+
# For Go modules, we want to keep the full import path to avoid ambiguity
438+
# Different packages may have the same last component but different domains
439+
# e.g., "emperror.dev/errors" vs "github.com/pkg/errors"
440+
return name # Return as-is, no formatting needed
448441

449442
# Handle Docker base images
450443
if category == "Base Image":
@@ -1833,7 +1826,7 @@ def write_unversioned_report(self, output_path: Path) -> None:
18331826

18341827
print(f"✓ Written {len(unversioned)} unversioned dependencies to {output_path}")
18351828

1836-
def normalize_dependency_name(self, name: str) -> str:
1829+
def normalize_dependency_name(self, name: str, category: str = "") -> str:
18371830
"""
18381831
Normalize dependency names to detect the same dependency referred to differently.
18391832
@@ -1844,7 +1837,15 @@ def normalize_dependency_name(self, name: str) -> str:
18441837
18451838
Note: This is intentionally conservative to avoid false positives.
18461839
Only normalizes well-known dependencies with common naming variations.
1840+
1841+
For Go modules, we don't normalize at all since the full import path
1842+
is significant (e.g., github.com/pkg/errors vs k8s.io/errors are different).
18471843
"""
1844+
# For Go dependencies, use the full name without normalization
1845+
# Go module paths are unique identifiers and should not be normalized
1846+
if category == "Go Dependency" or category == "Go Module":
1847+
return name.strip()
1848+
18481849
# Convert to lowercase for comparison
18491850
name_lower = name.lower()
18501851

@@ -1879,16 +1880,49 @@ def detect_version_discrepancies(self) -> List[Dict[str, any]]:
18791880
List of dictionaries containing discrepancy information:
18801881
- dependency_name: The normalized dependency name
18811882
- instances: List of {version, source_file, component} for each occurrence
1883+
1884+
Note: This intentionally filters out some categories to reduce false positives:
1885+
- Base/Runtime Images (intentionally different per component)
1886+
- Go indirect dependencies (transitive, expected to vary)
18821887
"""
1888+
# Categories to skip (expected to vary by component)
1889+
skip_categories = {
1890+
"Base Image",
1891+
"Runtime Image",
1892+
"Docker Compose Service", # Services use different base images
1893+
}
1894+
1895+
# Dependency names to skip (even if they have different categories)
1896+
skip_names = {
1897+
"base image",
1898+
"runtime image",
1899+
"base", # Often refers to base images
1900+
}
1901+
18831902
# Group dependencies by normalized name
18841903
dependency_groups = {}
18851904

18861905
for dep in self.dependencies:
1887-
normalized_name = self.normalize_dependency_name(dep["Dependency Name"])
1906+
category = dep["Category"]
1907+
normalized_name = self.normalize_dependency_name(
1908+
dep["Dependency Name"], category
1909+
)
18881910

18891911
# Skip unversioned dependencies for discrepancy detection
18901912
if dep["Version"] in ["unspecified", "N/A", "", "latest"]:
18911913
continue
1914+
1915+
# Skip categories that are expected to vary
1916+
if category in skip_categories:
1917+
continue
1918+
1919+
# Skip dependency names that are expected to vary
1920+
if normalized_name in skip_names:
1921+
continue
1922+
1923+
# Skip Go indirect dependencies (transitive dependencies)
1924+
if category == "Go Dependency" and "indirect" in dep.get("Notes", "").lower():
1925+
continue
18921926

18931927
if normalized_name not in dependency_groups:
18941928
dependency_groups[normalized_name] = []

0 commit comments

Comments
 (0)