Skip to content

Commit 44c9d08

Browse files
authored
ci: add GitHub action for ovh image registry cleanup (#53)
1 parent f01e781 commit 44c9d08

File tree

2 files changed

+304
-0
lines changed

2 files changed

+304
-0
lines changed
Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Harbor Registry Cleanup Script
4+
5+
This script cleans up old container images from a Harbor registry based on tag patterns
6+
and retention policies.
7+
8+
Tag Patterns:
9+
- Version tags (v1.0.0, latest, main): Never deleted
10+
- SHA tags (sha-abc123): Deleted after SHA_RETENTION_DAYS
11+
- PR tags (pr-123): Deleted after PR_RETENTION_DAYS
12+
"""
13+
14+
import os
15+
import re
16+
import sys
17+
from datetime import UTC, datetime
18+
from typing import Any
19+
20+
import requests
21+
from dateutil import parser as date_parser # type: ignore[import-untyped]
22+
23+
# Tag patterns
24+
SHA_PATTERN = re.compile(r"^sha-[a-f0-9]+$")
25+
PR_PATTERN = re.compile(r"^pr-\d+$")
26+
VERSION_PATTERN = re.compile(r"^v?\d+\.\d+(\.\d+)?(-.*)?$|^latest$|^main$")
27+
28+
29+
def get_api_url(harbor_url: str, path: str) -> str:
30+
"""Construct full API URL."""
31+
# Handle both with and without https://
32+
base = harbor_url.rstrip("/")
33+
if not base.startswith("http"):
34+
base = f"https://{base}"
35+
return f"{base}/api/v2.0{path}"
36+
37+
38+
def get_artifacts(
39+
harbor_url: str, username: str, password: str, project_name: str, repository_name: str
40+
) -> list[Any]:
41+
"""Fetch all artifacts from the repository."""
42+
url = get_api_url(
43+
harbor_url, f"/projects/{project_name}/repositories/{repository_name}/artifacts"
44+
)
45+
params: dict[str, Any] = {"page_size": 100, "with_tag": "true"}
46+
47+
all_artifacts = []
48+
page = 1
49+
50+
while True:
51+
params["page"] = page
52+
response = requests.get(url, params=params, auth=(username, password), timeout=30)
53+
response.raise_for_status()
54+
55+
artifacts = response.json()
56+
if not artifacts:
57+
break
58+
59+
all_artifacts.extend(artifacts)
60+
page += 1
61+
62+
return all_artifacts
63+
64+
65+
def delete_artifact(
66+
harbor_url: str,
67+
username: str,
68+
password: str,
69+
project_name: str,
70+
repository_name: str,
71+
digest: str,
72+
) -> None:
73+
"""Delete an artifact by its digest."""
74+
url = get_api_url(
75+
harbor_url, f"/projects/{project_name}/repositories/{repository_name}/artifacts/{digest}"
76+
)
77+
response = requests.delete(url, auth=(username, password), timeout=30)
78+
response.raise_for_status()
79+
80+
81+
def delete_tag(
82+
harbor_url: str,
83+
username: str,
84+
password: str,
85+
project_name: str,
86+
repository_name: str,
87+
reference: str,
88+
tag_name: str,
89+
) -> None:
90+
"""Delete a specific tag from an artifact."""
91+
url = get_api_url(
92+
harbor_url,
93+
f"/projects/{project_name}/repositories/{repository_name}/artifacts/{reference}/tags/{tag_name}",
94+
)
95+
response = requests.delete(url, auth=(username, password), timeout=30)
96+
response.raise_for_status()
97+
98+
99+
def should_delete_tag(
100+
tag_name: str, push_time: str | datetime, sha_retention_days: int, pr_retention_days: int
101+
) -> tuple[bool, str]:
102+
"""Determine if a tag should be deleted based on retention policy."""
103+
now = datetime.now(UTC)
104+
105+
# Parse push time
106+
pushed_at = date_parser.parse(push_time) if isinstance(push_time, str) else push_time
107+
108+
# Ensure timezone aware
109+
if pushed_at.tzinfo is None:
110+
pushed_at = pushed_at.replace(tzinfo=UTC)
111+
112+
age_days = (now - pushed_at).days
113+
114+
# Version tags (semver, latest, main) - NEVER delete
115+
if VERSION_PATTERN.match(tag_name):
116+
return False, "version tag (protected)"
117+
118+
# SHA tags - delete after SHA_RETENTION_DAYS
119+
if SHA_PATTERN.match(tag_name):
120+
if age_days > sha_retention_days:
121+
return True, f"SHA tag older than {sha_retention_days} days ({age_days} days old)"
122+
return False, f"SHA tag within retention ({age_days} days old)"
123+
124+
# PR tags - delete after PR_RETENTION_DAYS
125+
if PR_PATTERN.match(tag_name):
126+
if age_days > pr_retention_days:
127+
return True, f"PR tag older than {pr_retention_days} days ({age_days} days old)"
128+
return False, f"PR tag within retention ({age_days} days old)"
129+
130+
# Unknown tag patterns - keep them (safe default)
131+
return False, "unknown pattern (keeping as precaution)"
132+
133+
134+
def main() -> int:
135+
"""Main cleanup logic."""
136+
# Configuration from environment variables
137+
harbor_url = os.environ["HARBOR_URL"]
138+
username = os.environ["HARBOR_USERNAME"]
139+
password = os.environ["HARBOR_PASSWORD"]
140+
project_name = os.environ["PROJECT_NAME"]
141+
repository_name = os.environ["REPOSITORY_NAME"]
142+
sha_retention_days = int(os.environ["SHA_RETENTION_DAYS"])
143+
pr_retention_days = int(os.environ["PR_RETENTION_DAYS"])
144+
dry_run = os.environ.get("DRY_RUN", "true").lower() == "true"
145+
146+
print("=" * 60)
147+
print("Harbor Registry Cleanup")
148+
print("=" * 60)
149+
print(f"Harbor URL: {harbor_url}")
150+
print(f"Project: {project_name}")
151+
print(f"Repository: {repository_name}")
152+
print(f"SHA retention: {sha_retention_days} days")
153+
print(f"PR retention: {pr_retention_days} days")
154+
print(f"Dry run: {dry_run}")
155+
print("=" * 60)
156+
157+
try:
158+
artifacts = get_artifacts(harbor_url, username, password, project_name, repository_name)
159+
print(f"\nFound {len(artifacts)} artifact(s)\n")
160+
except requests.exceptions.RequestException as e:
161+
print(f"Error fetching artifacts: {e}")
162+
return 1
163+
164+
tags_to_delete = []
165+
tags_to_keep = []
166+
artifacts_to_check_deletion = []
167+
168+
for artifact in artifacts:
169+
digest = artifact.get("digest", "unknown")
170+
push_time = artifact.get("push_time")
171+
tags = artifact.get("tags") or []
172+
173+
print(f"\nArtifact: {digest[:20]}...")
174+
print(f" Push time: {push_time}")
175+
print(f" Tags: {[t.get('name') for t in tags]}")
176+
177+
artifact_tags_to_delete = []
178+
artifact_tags_to_keep = []
179+
180+
for tag in tags:
181+
tag_name = tag.get("name")
182+
tag_push_time = tag.get("push_time") or push_time
183+
184+
delete, reason = should_delete_tag(
185+
tag_name, tag_push_time, sha_retention_days, pr_retention_days
186+
)
187+
188+
if delete:
189+
print(f" ❌ {tag_name}: DELETE - {reason}")
190+
artifact_tags_to_delete.append(tag_name)
191+
tags_to_delete.append((digest, tag_name))
192+
else:
193+
print(f" ✅ {tag_name}: KEEP - {reason}")
194+
artifact_tags_to_keep.append(tag_name)
195+
tags_to_keep.append(tag_name)
196+
197+
# If all tags are to be deleted, mark artifact for potential deletion
198+
if artifact_tags_to_delete and not artifact_tags_to_keep:
199+
artifacts_to_check_deletion.append(digest)
200+
201+
print("\n" + "=" * 60)
202+
print("SUMMARY")
203+
print("=" * 60)
204+
print(f"Tags to delete: {len(tags_to_delete)}")
205+
print(f"Tags to keep: {len(tags_to_keep)}")
206+
print(f"Artifacts that may become untagged: {len(artifacts_to_check_deletion)}")
207+
208+
if not tags_to_delete:
209+
print("\nNo tags to delete. Exiting.")
210+
return 0
211+
212+
if dry_run:
213+
print("\n🔍 DRY RUN MODE - No changes made")
214+
print("\nTags that would be deleted:")
215+
for digest, tag_name in tags_to_delete:
216+
print(f" - {tag_name} (artifact: {digest[:20]}...)")
217+
return 0
218+
219+
# Perform deletions
220+
print("\n🗑️ PERFORMING DELETIONS...")
221+
deleted_count = 0
222+
error_count = 0
223+
224+
for digest, tag_name in tags_to_delete:
225+
try:
226+
print(f" Deleting tag: {tag_name}...", end=" ")
227+
delete_tag(
228+
harbor_url, username, password, project_name, repository_name, digest, tag_name
229+
)
230+
print("✓")
231+
deleted_count += 1
232+
except requests.exceptions.RequestException as e:
233+
print(f"✗ Error: {e}")
234+
error_count += 1
235+
236+
print("\n" + "=" * 60)
237+
print(f"Deleted: {deleted_count} tags")
238+
print(f"Errors: {error_count}")
239+
print("=" * 60)
240+
241+
# Note: Untagged artifacts can be cleaned up by Harbor's garbage collection
242+
if artifacts_to_check_deletion:
243+
print("\n⚠️ Some artifacts are now untagged.")
244+
print("Run Harbor garbage collection to reclaim storage space.")
245+
246+
return 0 if error_count == 0 else 1
247+
248+
249+
if __name__ == "__main__":
250+
sys.exit(main())
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
name: Harbor Registry Cleanup
2+
3+
on:
4+
# schedule:
5+
# # Run daily at 2:00 AM UTC
6+
# - cron: '0 2 * * *'
7+
workflow_dispatch:
8+
inputs:
9+
dry_run:
10+
description: 'Dry run mode (no actual deletions)'
11+
required: false
12+
default: true
13+
type: boolean
14+
15+
env:
16+
HARBOR_URL: ${{ secrets.OVH_HARBOR_REGISTRY }}
17+
HARBOR_USERNAME: ${{ secrets.OVH_HARBOR_USERNAME }}
18+
HARBOR_PASSWORD: ${{ secrets.OVH_HARBOR_PASSWORD }}
19+
PROJECT_NAME: eopf-sentinel-zarr-explorer
20+
REPOSITORY_NAME: data-pipeline
21+
# Retention periods in days
22+
SHA_RETENTION_DAYS: 0 #7
23+
PR_RETENTION_DAYS: 1 #90
24+
25+
jobs:
26+
cleanup:
27+
runs-on: ubuntu-latest
28+
steps:
29+
- name: Checkout code
30+
uses: actions/checkout@v5
31+
32+
- name: Set up Python
33+
uses: actions/setup-python@v5
34+
with:
35+
python-version: '3.11'
36+
37+
- name: Install dependencies
38+
run: |
39+
pip install requests python-dateutil
40+
41+
- name: Run cleanup script
42+
env:
43+
DRY_RUN: ${{ github.event.inputs.dry_run || 'false' }}
44+
run: |
45+
python3 .github/workflows/cleanup_harbor_registry.py
46+
47+
- name: Summary
48+
run: |
49+
echo "### Harbor Cleanup Complete 🧹" >> $GITHUB_STEP_SUMMARY
50+
echo "" >> $GITHUB_STEP_SUMMARY
51+
echo "**Configuration:**" >> $GITHUB_STEP_SUMMARY
52+
echo "- SHA tags retention: ${{ env.SHA_RETENTION_DAYS }} days" >> $GITHUB_STEP_SUMMARY
53+
echo "- PR tags retention: ${{ env.PR_RETENTION_DAYS }} days" >> $GITHUB_STEP_SUMMARY
54+
echo "- Version tags: Protected (never deleted)" >> $GITHUB_STEP_SUMMARY

0 commit comments

Comments
 (0)