Skip to content

Commit 2e7673b

Browse files
committed
fix: address CodeRabbit and reviewer feedback
- Remove JSON support, keep YAML only (per rmccorm4 review) - Fix package source URL detection (check dep_name not source_file) - Fix FROM parsing to capture single-stage and untagged images - Fix unversioned report filename mismatch in workflow - Fix artifact upload pattern to match actual filenames - Fix docs link to point to existing README - Fix duplicate H2 heading in README (rename to Links) - Make extract_dependency_versions.py executable - Fix all pre-commit issues (ruff, isort, black) Signed-off-by: Dan Gil <[email protected]>
1 parent 2dad57c commit 2e7673b

File tree

3 files changed

+27
-46
lines changed

3 files changed

+27
-46
lines changed

.github/reports/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ python3 .github/workflows/extract_dependency_versions.py --validate
122122
python3 .github/workflows/extract_dependency_versions.py --help
123123
```
124124

125-
## Files
125+
## Links
126126

127127
- 🤖 [Extraction Script](../workflows/extract_dependency_versions.py)
128128
- ⚙️ [Configuration](../workflows/extract_dependency_versions_config.yaml)

.github/workflows/dependency-extraction-nightly.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,9 @@ jobs:
5757
mkdir -p .github/reports
5858
cp .github/reports/dependency_versions_${TIMESTAMP}.csv .github/reports/dependency_versions_latest.csv
5959
60-
# Copy unversioned report if it exists
61-
if [ -f "unversioned_dependencies_${TIMESTAMP}.csv" ]; then
62-
cp unversioned_dependencies_${TIMESTAMP}.csv .github/reports/unversioned_dependencies_latest.csv
60+
# Copy unversioned report if it exists (matches extractor's naming)
61+
if [ -f ".github/reports/dependency_versions_${TIMESTAMP}_unversioned.csv" ]; then
62+
cp ".github/reports/dependency_versions_${TIMESTAMP}_unversioned.csv" .github/reports/unversioned_dependencies_latest.csv
6363
fi
6464
6565
echo "TIMESTAMP=${TIMESTAMP}" >> $GITHUB_ENV
@@ -133,7 +133,7 @@ jobs:
133133
134134
---
135135
136-
🔗 **Documentation:** [Dependency Extraction Guide](../docs/dependency_extraction.md)
136+
🔗 **Documentation:** [Dependency Reports README](../.github/reports/README.md)
137137
📦 **Artifacts:** Download timestamped CSVs from workflow run
138138
139139
_Generated by nightly dependency extraction workflow_
@@ -152,7 +152,7 @@ jobs:
152152
name: dependency-extraction-${{ github.run_number }}
153153
path: |
154154
.github/reports/dependency_versions_*.csv
155-
.github/reports/unversioned_dependencies_*.csv
155+
.github/reports/dependency_versions_*_unversioned.csv
156156
retention-days: 90
157157

158158
- name: Summary

.github/workflows/extract_dependency_versions.py

Lines changed: 21 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,7 @@
3333
from pathlib import Path
3434
from typing import Dict, List, Optional, Set
3535

36-
try:
37-
import yaml
38-
39-
HAS_YAML = True
40-
except ImportError:
41-
HAS_YAML = False
36+
import yaml
4237

4338

4439
class DependencyExtractor:
@@ -97,9 +92,10 @@ def _get_package_source_url(
9792
"""Generate source URL for package/dependency based on type and name."""
9893
dep_lower = dep_name.lower()
9994

100-
# Docker images from NVIDIA NGC Catalog
101-
if category == "Base Image" or category == "Docker Compose Service":
102-
if "nvcr.io" in source_file or "nvidia" in dep_lower:
95+
# Docker images
96+
if category in ("Base Image", "Docker Compose Service"):
97+
dep_str = dep_name.lower()
98+
if "nvcr.io" in dep_str or "nvidia" in dep_str:
10399
# Extract image name for NGC
104100
image_slug = dep_name.split("/")[-1].lower()
105101
return f"https://catalog.ngc.nvidia.com/orgs/nvidia/containers/{image_slug}"
@@ -127,9 +123,8 @@ def _get_package_source_url(
127123
if category == "Rust Crate":
128124
return f"https://crates.io/crates/{dep_name}"
129125

130-
# Git dependencies already have repo URLs - extract repo URL
131-
if "Git" in category and "github.com" in source_file:
132-
# Try to extract from notes or return GitHub search
126+
# Git dependencies: provide a GitHub search fallback
127+
if "Git" in category:
133128
return f"https://github.com/search?q={dep_name}&type=repositories"
134129

135130
# Framework/System packages
@@ -258,7 +253,7 @@ def load_previous_csv(self, csv_path: Path, csv_type: str = "latest") -> None:
258253
self.warnings.append(f"Error loading previous {csv_type} CSV: {e}")
259254

260255
def load_config(self, config_path: Optional[Path] = None) -> dict:
261-
"""Load configuration from YAML or JSON file."""
256+
"""Load configuration from YAML file."""
262257
if config_path is None:
263258
# Default to extract_dependency_versions_config.yaml in same directory as script
264259
script_dir = Path(__file__).parent
@@ -272,10 +267,7 @@ def load_config(self, config_path: Optional[Path] = None) -> dict:
272267

273268
try:
274269
with open(config_path) as f:
275-
if HAS_YAML and (config_path.suffix in [".yaml", ".yml"]):
276-
config = yaml.safe_load(f)
277-
else:
278-
config = json.load(f)
270+
config = yaml.safe_load(f)
279271

280272
# Update settings from config
281273
if "github" in config:
@@ -806,19 +798,20 @@ def extract_dockerfile_args(self, dockerfile_path: Path, component: str) -> None
806798
f"ARG: {key}",
807799
)
808800

809-
# Extract base images with variable resolution
810-
if line.startswith("FROM ") and "AS" in line:
801+
# Extract base images (support with/without AS, and no explicit tag)
802+
if line.startswith("FROM "):
811803
parts = line.split()
812-
image = parts[1]
813-
if ":" in image:
814-
img_name, tag = image.rsplit(":", 1)
815-
804+
if len(parts) >= 2:
805+
image = parts[1]
806+
if ":" in image:
807+
img_name, tag = image.rsplit(":", 1)
808+
else:
809+
img_name, tag = image, "latest"
816810
# Resolve variables in image name and tag
817811
img_name = self._resolve_dockerfile_vars(img_name, arg_values)
818812
tag = self._resolve_dockerfile_vars(tag, arg_values)
819-
820-
# Only add if not just variable names
821-
if not (img_name.startswith("${") or tag.startswith("${")):
813+
# Skip unresolved variable-only entries
814+
if "$" not in img_name and "$" not in tag:
822815
self.add_dependency(
823816
component,
824817
"Base Image",
@@ -1124,14 +1117,7 @@ def extract_docker_compose(self, compose_path: Path, component: str) -> None:
11241117
self.processed_files.add(str(compose_path.relative_to(self.repo_root)))
11251118

11261119
with open(compose_path) as f:
1127-
if HAS_YAML:
1128-
compose_data = yaml.safe_load(f)
1129-
else:
1130-
# Skip if no YAML support
1131-
self.warnings.append(
1132-
f"Skipping {compose_path}: PyYAML not available"
1133-
)
1134-
return
1120+
compose_data = yaml.safe_load(f)
11351121

11361122
services = compose_data.get("services", {})
11371123
for service_name, service_config in services.items():
@@ -1173,12 +1159,7 @@ def extract_helm_chart(self, chart_path: Path, component: str) -> None:
11731159
self.processed_files.add(str(chart_path.relative_to(self.repo_root)))
11741160

11751161
with open(chart_path) as f:
1176-
if HAS_YAML:
1177-
chart_data = yaml.safe_load(f)
1178-
else:
1179-
# Skip if no YAML support
1180-
self.warnings.append(f"Skipping {chart_path}: PyYAML not available")
1181-
return
1162+
chart_data = yaml.safe_load(f)
11821163

11831164
# Extract chart version
11841165
if "version" in chart_data:

0 commit comments

Comments
 (0)