Skip to content

Commit 092a89b

Browse files
authored
ci: fix the script so it delete the entire artifcat not only the tag (#54)
1 parent 44c9d08 commit 092a89b

File tree

1 file changed

+92
-35
lines changed

1 file changed

+92
-35
lines changed

.github/workflows/cleanup_harbor_registry.py

Lines changed: 92 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
- Version tags (v1.0.0, latest, main): Never deleted
1010
- SHA tags (sha-abc123): Deleted after SHA_RETENTION_DAYS
1111
- PR tags (pr-123): Deleted after PR_RETENTION_DAYS
12+
- Untagged images: Deleted after SHA_RETENTION_DAYS
1213
"""
1314

1415
import os
@@ -23,7 +24,7 @@
2324
# Tag patterns
2425
SHA_PATTERN = re.compile(r"^sha-[a-f0-9]+$")
2526
PR_PATTERN = re.compile(r"^pr-\d+$")
26-
VERSION_PATTERN = re.compile(r"^v?\d+\.\d+(\.\d+)?(-.*)?$|^latest$|^main$")
27+
VERSION_PATTERN = re.compile(r"^v?\d+\.\d+(\.\d+)? (-.*)?$|^latest$|^main$")
2728

2829

2930
def get_api_url(harbor_url: str, path: str) -> str:
@@ -96,20 +97,19 @@ def delete_tag(
9697
response.raise_for_status()
9798

9899

100+
def parse_push_time(push_time: str | datetime) -> datetime:
101+
"""Parse and normalize push time to timezone-aware datetime."""
102+
pushed_at: datetime = date_parser.parse(push_time) if isinstance(push_time, str) else push_time
103+
if pushed_at.tzinfo is None:
104+
pushed_at = pushed_at.replace(tzinfo=UTC)
105+
return pushed_at
106+
107+
99108
def should_delete_tag(
100109
tag_name: str, push_time: str | datetime, sha_retention_days: int, pr_retention_days: int
101110
) -> tuple[bool, str]:
102111
"""Determine if a tag should be deleted based on retention policy."""
103-
now = datetime.now(UTC)
104-
105-
# Parse push time
106-
pushed_at = date_parser.parse(push_time) if isinstance(push_time, str) else push_time
107-
108-
# Ensure timezone aware
109-
if pushed_at.tzinfo is None:
110-
pushed_at = pushed_at.replace(tzinfo=UTC)
111-
112-
age_days = (now - pushed_at).days
112+
age_days = (datetime.now(UTC) - parse_push_time(push_time)).days
113113

114114
# Version tags (semver, latest, main) - NEVER delete
115115
if VERSION_PATTERN.match(tag_name):
@@ -131,6 +131,16 @@ def should_delete_tag(
131131
return False, "unknown pattern (keeping as precaution)"
132132

133133

134+
def should_delete_untagged_artifact(
135+
push_time: str | datetime, sha_retention_days: int
136+
) -> tuple[bool, str]:
137+
"""Determine if an untagged artifact should be deleted based on retention policy."""
138+
age_days = (datetime.now(UTC) - parse_push_time(push_time)).days
139+
if age_days > sha_retention_days:
140+
return True, f"untagged artifact older than {sha_retention_days} days ({age_days} days old)"
141+
return False, f"untagged artifact within retention ({age_days} days old)"
142+
143+
134144
def main() -> int:
135145
"""Main cleanup logic."""
136146
# Configuration from environment variables
@@ -151,6 +161,7 @@ def main() -> int:
151161
print(f"Repository: {repository_name}")
152162
print(f"SHA retention: {sha_retention_days} days")
153163
print(f"PR retention: {pr_retention_days} days")
164+
print(f"Untagged retention: {sha_retention_days} days (same as SHA)")
154165
print(f"Dry run: {dry_run}")
155166
print("=" * 60)
156167

@@ -161,9 +172,9 @@ def main() -> int:
161172
print(f"Error fetching artifacts: {e}")
162173
return 1
163174

164-
tags_to_delete = []
165-
tags_to_keep = []
166-
artifacts_to_check_deletion = []
175+
tags_to_delete: list[tuple[str, str]] = []
176+
tags_to_keep: list[str] = []
177+
artifacts_to_delete: list[tuple[str, list[str], str]] = [] # (digest, tag_names, reason)
167178

168179
for artifact in artifacts:
169180
digest = artifact.get("digest", "unknown")
@@ -172,7 +183,17 @@ def main() -> int:
172183

173184
print(f"\nArtifact: {digest[:20]}...")
174185
print(f" Push time: {push_time}")
175-
print(f" Tags: {[t.get('name') for t in tags]}")
186+
print(f" Tags: {[t.get('name') for t in tags] if tags else '(untagged)'}")
187+
188+
# Handle untagged artifacts
189+
if not tags:
190+
delete, reason = should_delete_untagged_artifact(push_time, sha_retention_days)
191+
if delete:
192+
print(f" ❌ UNTAGGED: DELETE - {reason}")
193+
artifacts_to_delete.append((digest, [], "untagged"))
194+
else:
195+
print(f" ✅ UNTAGGED: KEEP - {reason}")
196+
continue
176197

177198
artifact_tags_to_delete = []
178199
artifact_tags_to_keep = []
@@ -194,55 +215,91 @@ def main() -> int:
194215
artifact_tags_to_keep.append(tag_name)
195216
tags_to_keep.append(tag_name)
196217

197-
# If all tags are to be deleted, mark artifact for potential deletion
218+
# If all tags are to be deleted, mark the entire artifact for deletion
198219
if artifact_tags_to_delete and not artifact_tags_to_keep:
199-
artifacts_to_check_deletion.append(digest)
220+
print(" 🗑️ Entire artifact marked for deletion (all tags expired)")
221+
artifacts_to_delete.append((digest, artifact_tags_to_delete, "all_tags_expired"))
200222

201223
print("\n" + "=" * 60)
202224
print("SUMMARY")
203225
print("=" * 60)
204226
print(f"Tags to delete: {len(tags_to_delete)}")
205227
print(f"Tags to keep: {len(tags_to_keep)}")
206-
print(f"Artifacts that may become untagged: {len(artifacts_to_check_deletion)}")
228+
print(f"Artifacts to delete (entire image): {len(artifacts_to_delete)}")
229+
230+
# Count untagged vs tagged artifacts to delete
231+
untagged_count = sum(1 for _, _, reason in artifacts_to_delete if reason == "untagged")
232+
tagged_count = len(artifacts_to_delete) - untagged_count
233+
print(f" - Untagged artifacts: {untagged_count}")
234+
print(f" - Artifacts with all tags expired: {tagged_count}")
207235

208-
if not tags_to_delete:
209-
print("\nNo tags to delete. Exiting.")
236+
if not tags_to_delete and not artifacts_to_delete:
237+
print("\nNo tags or artifacts to delete. Exiting.")
210238
return 0
211239

212240
if dry_run:
213241
print("\n🔍 DRY RUN MODE - No changes made")
214-
print("\nTags that would be deleted:")
215-
for digest, tag_name in tags_to_delete:
216-
print(f" - {tag_name} (artifact: {digest[:20]}...)")
242+
if artifacts_to_delete:
243+
print("\nArtifacts (entire images) that would be deleted:")
244+
for digest, tag_names, reason in artifacts_to_delete:
245+
if reason == "untagged":
246+
print(f" - {digest[:20]}... (untagged)")
247+
else:
248+
print(f" - {digest[:20]}... (tags: {', '.join(tag_names)})")
249+
250+
# Show tags that would be deleted individually (where artifact has other tags to keep)
251+
artifact_digests_to_delete = {digest for digest, _, _ in artifacts_to_delete}
252+
individual_tags = [(d, t) for d, t in tags_to_delete if d not in artifact_digests_to_delete]
253+
if individual_tags:
254+
print("\nTags that would be deleted (artifact kept due to other tags):")
255+
for digest, tag_name in individual_tags:
256+
print(f" - {tag_name} (artifact: {digest[:20]}...)")
217257
return 0
218258

219259
# Perform deletions
220260
print("\n🗑️ PERFORMING DELETIONS...")
221-
deleted_count = 0
261+
deleted_artifacts_count = 0
262+
deleted_tags_count = 0
222263
error_count = 0
223264

224-
for digest, tag_name in tags_to_delete:
265+
# First, delete entire artifacts where all tags are expired or untagged
266+
artifact_digests_to_delete = {digest for digest, _, _ in artifacts_to_delete}
267+
268+
for digest, tag_names, reason in artifacts_to_delete:
225269
try:
226-
print(f" Deleting tag: {tag_name}...", end=" ")
227-
delete_tag(
228-
harbor_url, username, password, project_name, repository_name, digest, tag_name
270+
label = (
271+
"untagged artifact"
272+
if reason == "untagged"
273+
else f"artifact (tags: {', '.join(tag_names)})"
229274
)
275+
print(f" Deleting {label}: {digest[:20]}...", end=" ")
276+
delete_artifact(harbor_url, username, password, project_name, repository_name, digest)
230277
print("✓")
231-
deleted_count += 1
278+
deleted_artifacts_count += 1
232279
except requests.exceptions.RequestException as e:
233280
print(f"✗ Error: {e}")
234281
error_count += 1
235282

283+
# Then, delete individual tags where the artifact has other tags to keep
284+
for digest, tag_name in tags_to_delete:
285+
if digest not in artifact_digests_to_delete:
286+
try:
287+
print(f" Deleting tag: {tag_name}...", end=" ")
288+
delete_tag(
289+
harbor_url, username, password, project_name, repository_name, digest, tag_name
290+
)
291+
print("✓")
292+
deleted_tags_count += 1
293+
except requests.exceptions.RequestException as e:
294+
print(f"✗ Error: {e}")
295+
error_count += 1
296+
236297
print("\n" + "=" * 60)
237-
print(f"Deleted: {deleted_count} tags")
298+
print(f"Deleted: {deleted_artifacts_count} artifacts (entire images)")
299+
print(f"Deleted: {deleted_tags_count} individual tags")
238300
print(f"Errors: {error_count}")
239301
print("=" * 60)
240302

241-
# Note: Untagged artifacts can be cleaned up by Harbor's garbage collection
242-
if artifacts_to_check_deletion:
243-
print("\n⚠️ Some artifacts are now untagged.")
244-
print("Run Harbor garbage collection to reclaim storage space.")
245-
246303
return 0 if error_count == 0 else 1
247304

248305

0 commit comments

Comments
 (0)