Skip to content

Commit ba316c7

Browse files
committed
fix(deps): skip sub-dependency ARGs, normalize pinning, and document known discrepancies
**Fixes for extraction accuracy:** - Skip ARGs for sub-dependencies (e.g., NIXL_UCX_REF is for UCX, not NIXL) - Fixes false positive showing NIXL v1.19.0 (was actually UCX) - Exclude PyTorch Triton from PyTorch normalization - PyTorch Triton (Triton compiler) is a separate package, not PyTorch - Was causing false positive showing 3 PyTorch versions instead of 2 **Version comparison improvements:** - Add version normalization to ignore pinning style differences - '0.6.0' vs '<=0.6.0' are now treated as the same - '==32.0.1' vs '>=32.0.1,<33.0.0' are now treated as the same - Only flag discrepancies when actual version numbers differ **Documentation:** - Add known_version_discrepancies section to config - Document intentional PyTorch version difference: - TensorRT-LLM: 2.8.0 (from NVIDIA container) - vLLM: 2.7.1+cu128 (ARM64 wheel compatibility) **Results:** - Reduced from 6 to 4 real discrepancies - Eliminated false positives from: - Sub-dependency ARGs (UCX) - Pinning style differences (NIXL, Kubernetes) - Package misidentification (PyTorch Triton) Signed-off-by: Dan Gil <[email protected]>
1 parent 1f8119a commit ba316c7

File tree

2 files changed

+66
-5
lines changed

2 files changed

+66
-5
lines changed

.github/workflows/extract_dependency_versions.py

Lines changed: 58 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -774,6 +774,15 @@ def extract_dockerfile_args(self, dockerfile_path: Path, component: str) -> None
774774
# Extract version-related ARGs
775775
version_keywords = ["VERSION", "REF", "TAG", "_VER"]
776776
if any(kw in key for kw in version_keywords):
777+
# Skip sub-dependency ARGs that are clearly for related projects
778+
# e.g., NIXL_UCX_REF is for UCX (a dependency of NIXL), not NIXL itself
779+
skip_subdeps = [
780+
"_UCX_", # UCX is a separate dependency
781+
"_NCCL_", # NCCL is a separate dependency
782+
]
783+
if any(subdep in key for subdep in skip_subdeps):
784+
continue
785+
777786
category = (
778787
"System"
779788
if key.startswith(
@@ -1849,6 +1858,12 @@ def normalize_dependency_name(self, name: str, category: str = "") -> str:
18491858
# Convert to lowercase for comparison
18501859
name_lower = name.lower()
18511860

1861+
# Special handling for PyTorch-related packages that should NOT be normalized to pytorch
1862+
# e.g., "pytorch triton" is the Triton compiler, not PyTorch itself
1863+
pytorch_exceptions = ["pytorch triton", "pytorch_triton", "triton"]
1864+
if any(exc in name_lower for exc in pytorch_exceptions):
1865+
return name_lower # Don't normalize these
1866+
18521867
# Common normalization rules (ordered by specificity to avoid false matches)
18531868
normalizations = {
18541869
"tensorrt-llm": "tensorrt-llm",
@@ -1872,6 +1887,35 @@ def normalize_dependency_name(self, name: str, category: str = "") -> str:
18721887
# This avoids false positives from overly broad matching
18731888
return name_lower.strip()
18741889

1890+
def _normalize_version_for_comparison(self, version: str) -> str:
1891+
"""
1892+
Normalize version string for comparison by removing pinning operators.
1893+
1894+
This allows us to detect true version differences while ignoring
1895+
differences in how versions are pinned.
1896+
1897+
Examples:
1898+
- "==0.115.12" -> "0.115.12"
1899+
- ">=0.115.0" -> "0.115.0"
1900+
- ">=32.0.1,<33.0.0" -> "32.0.1"
1901+
- "<=0.6.0" -> "0.6.0"
1902+
- "2.7.1+cu128" -> "2.7.1+cu128" (unchanged)
1903+
"""
1904+
import re
1905+
1906+
# Remove common Python version operators
1907+
# This regex captures: ==, >=, <=, ~=, !=, <, >, and extracts the version
1908+
version = version.strip()
1909+
1910+
# Handle compound version specs like ">=32.0.1,<33.0.0" - take the first version
1911+
if "," in version:
1912+
version = version.split(",")[0].strip()
1913+
1914+
# Remove operators
1915+
version = re.sub(r"^(==|>=|<=|~=|!=|<|>)\s*", "", version)
1916+
1917+
return version.strip()
1918+
18751919
def detect_version_discrepancies(self) -> List[Dict[str, any]]:
18761920
"""
18771921
Detect dependencies that appear multiple times with different versions.
@@ -1884,6 +1928,7 @@ def detect_version_discrepancies(self) -> List[Dict[str, any]]:
18841928
Note: This intentionally filters out some categories to reduce false positives:
18851929
- Base/Runtime Images (intentionally different per component)
18861930
- Go indirect dependencies (transitive, expected to vary)
1931+
- Pinning style differences (e.g., "0.6.0" vs "<=0.6.0" are considered the same)
18871932
"""
18881933
# Categories to skip (expected to vary by component)
18891934
skip_categories = {
@@ -1939,18 +1984,26 @@ def detect_version_discrepancies(self) -> List[Dict[str, any]]:
19391984
)
19401985

19411986
# Detect discrepancies: same normalized name with different versions
1987+
# Use normalized versions to ignore pinning style differences
19421988
discrepancies = []
19431989

19441990
for normalized_name, instances in dependency_groups.items():
1945-
# Get unique versions
1946-
versions = set(inst["version"] for inst in instances)
1991+
# Get unique normalized versions (ignoring pinning operators)
1992+
normalized_versions = set(
1993+
self._normalize_version_for_comparison(inst["version"])
1994+
for inst in instances
1995+
)
19471996

1948-
# If multiple versions exist, it's a discrepancy
1949-
if len(versions) > 1:
1997+
# If multiple normalized versions exist, it's a real discrepancy
1998+
if len(normalized_versions) > 1:
1999+
# Get the original versions for display
2000+
original_versions = sorted(set(inst["version"] for inst in instances))
2001+
19502002
discrepancies.append(
19512003
{
19522004
"normalized_name": normalized_name,
1953-
"versions": sorted(versions),
2005+
"versions": original_versions,
2006+
"normalized_versions": sorted(normalized_versions),
19542007
"instances": instances,
19552008
"is_critical": any(inst["critical"] for inst in instances),
19562009
}

.github/workflows/extract_dependency_versions_config.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,14 @@ baseline:
1010
# The script automatically uses the previous extraction's count as the baseline
1111
dependency_count: 251
1212

13+
known_version_discrepancies:
14+
# Document intentional version discrepancies to reduce noise
15+
# These will still be reported but marked as "known" with the provided reason
16+
- dependency: "PyTorch"
17+
reason: "TensorRT-LLM uses NVIDIA container (2.8.0), vLLM uses 2.7.1+cu128 (ARM64 wheel compatibility)"
18+
- dependency: "torchvision"
19+
reason: "Matches corresponding PyTorch versions across components"
20+
1321
critical_dependencies:
1422
# List of critical dependencies (case-insensitive matching)
1523
# Supports exact names or partial matches (e.g., "CUDA" matches "NVIDIA CUDA")

0 commit comments

Comments
 (0)