99- Version tags (v1.0.0, latest, main): Never deleted
1010- SHA tags (sha-abc123): Deleted after SHA_RETENTION_DAYS
1111- PR tags (pr-123): Deleted after PR_RETENTION_DAYS
12+ - Untagged images: Deleted after SHA_RETENTION_DAYS
1213"""
1314
1415import os
2324# Tag patterns
2425SHA_PATTERN = re .compile (r"^sha-[a-f0-9]+$" )
2526PR_PATTERN = re .compile (r"^pr-\d+$" )
26- VERSION_PATTERN = re .compile (r"^v?\d+\.\d+(\.\d+)?(-.*)?$|^latest$|^main$" )
27+ VERSION_PATTERN = re .compile (r"^v?\d+\.\d+(\.\d+)? (-.*)?$|^latest$|^main$" )
2728
2829
2930def get_api_url (harbor_url : str , path : str ) -> str :
@@ -96,20 +97,19 @@ def delete_tag(
9697 response .raise_for_status ()
9798
9899
100+ def parse_push_time (push_time : str | datetime ) -> datetime :
101+ """Parse and normalize push time to timezone-aware datetime."""
102+ pushed_at : datetime = date_parser .parse (push_time ) if isinstance (push_time , str ) else push_time
103+ if pushed_at .tzinfo is None :
104+ pushed_at = pushed_at .replace (tzinfo = UTC )
105+ return pushed_at
106+
107+
99108def should_delete_tag (
100109 tag_name : str , push_time : str | datetime , sha_retention_days : int , pr_retention_days : int
101110) -> tuple [bool , str ]:
102111 """Determine if a tag should be deleted based on retention policy."""
103- now = datetime .now (UTC )
104-
105- # Parse push time
106- pushed_at = date_parser .parse (push_time ) if isinstance (push_time , str ) else push_time
107-
108- # Ensure timezone aware
109- if pushed_at .tzinfo is None :
110- pushed_at = pushed_at .replace (tzinfo = UTC )
111-
112- age_days = (now - pushed_at ).days
112+ age_days = (datetime .now (UTC ) - parse_push_time (push_time )).days
113113
114114 # Version tags (semver, latest, main) - NEVER delete
115115 if VERSION_PATTERN .match (tag_name ):
@@ -131,6 +131,16 @@ def should_delete_tag(
131131 return False , "unknown pattern (keeping as precaution)"
132132
133133
134+ def should_delete_untagged_artifact (
135+ push_time : str | datetime , sha_retention_days : int
136+ ) -> tuple [bool , str ]:
137+ """Determine if an untagged artifact should be deleted based on retention policy."""
138+ age_days = (datetime .now (UTC ) - parse_push_time (push_time )).days
139+ if age_days > sha_retention_days :
140+ return True , f"untagged artifact older than { sha_retention_days } days ({ age_days } days old)"
141+ return False , f"untagged artifact within retention ({ age_days } days old)"
142+
143+
134144def main () -> int :
135145 """Main cleanup logic."""
136146 # Configuration from environment variables
@@ -151,6 +161,7 @@ def main() -> int:
151161 print (f"Repository: { repository_name } " )
152162 print (f"SHA retention: { sha_retention_days } days" )
153163 print (f"PR retention: { pr_retention_days } days" )
164+ print (f"Untagged retention: { sha_retention_days } days (same as SHA)" )
154165 print (f"Dry run: { dry_run } " )
155166 print ("=" * 60 )
156167
@@ -161,9 +172,9 @@ def main() -> int:
161172 print (f"Error fetching artifacts: { e } " )
162173 return 1
163174
164- tags_to_delete = []
165- tags_to_keep = []
166- artifacts_to_check_deletion = []
175+ tags_to_delete : list [ tuple [ str , str ]] = []
176+ tags_to_keep : list [ str ] = []
177+ artifacts_to_delete : list [ tuple [ str , list [ str ], str ]] = [] # (digest, tag_names, reason)
167178
168179 for artifact in artifacts :
169180 digest = artifact .get ("digest" , "unknown" )
@@ -172,7 +183,17 @@ def main() -> int:
172183
173184 print (f"\n Artifact: { digest [:20 ]} ..." )
174185 print (f" Push time: { push_time } " )
175- print (f" Tags: { [t .get ('name' ) for t in tags ]} " )
186+ print (f" Tags: { [t .get ('name' ) for t in tags ] if tags else '(untagged)' } " )
187+
188+ # Handle untagged artifacts
189+ if not tags :
190+ delete , reason = should_delete_untagged_artifact (push_time , sha_retention_days )
191+ if delete :
192+ print (f" ❌ UNTAGGED: DELETE - { reason } " )
193+ artifacts_to_delete .append ((digest , [], "untagged" ))
194+ else :
195+ print (f" ✅ UNTAGGED: KEEP - { reason } " )
196+ continue
176197
177198 artifact_tags_to_delete = []
178199 artifact_tags_to_keep = []
@@ -194,55 +215,91 @@ def main() -> int:
194215 artifact_tags_to_keep .append (tag_name )
195216 tags_to_keep .append (tag_name )
196217
197- # If all tags are to be deleted, mark artifact for potential deletion
218+ # If all tags are to be deleted, mark the entire artifact for deletion
198219 if artifact_tags_to_delete and not artifact_tags_to_keep :
199- artifacts_to_check_deletion .append (digest )
220+ print (" 🗑️ Entire artifact marked for deletion (all tags expired)" )
221+ artifacts_to_delete .append ((digest , artifact_tags_to_delete , "all_tags_expired" ))
200222
201223 print ("\n " + "=" * 60 )
202224 print ("SUMMARY" )
203225 print ("=" * 60 )
204226 print (f"Tags to delete: { len (tags_to_delete )} " )
205227 print (f"Tags to keep: { len (tags_to_keep )} " )
206- print (f"Artifacts that may become untagged: { len (artifacts_to_check_deletion )} " )
228+ print (f"Artifacts to delete (entire image): { len (artifacts_to_delete )} " )
229+
230+ # Count untagged vs tagged artifacts to delete
231+ untagged_count = sum (1 for _ , _ , reason in artifacts_to_delete if reason == "untagged" )
232+ tagged_count = len (artifacts_to_delete ) - untagged_count
233+ print (f" - Untagged artifacts: { untagged_count } " )
234+ print (f" - Artifacts with all tags expired: { tagged_count } " )
207235
208- if not tags_to_delete :
209- print ("\n No tags to delete. Exiting." )
236+ if not tags_to_delete and not artifacts_to_delete :
237+ print ("\n No tags or artifacts to delete. Exiting." )
210238 return 0
211239
212240 if dry_run :
213241 print ("\n 🔍 DRY RUN MODE - No changes made" )
214- print ("\n Tags that would be deleted:" )
215- for digest , tag_name in tags_to_delete :
216- print (f" - { tag_name } (artifact: { digest [:20 ]} ...)" )
242+ if artifacts_to_delete :
243+ print ("\n Artifacts (entire images) that would be deleted:" )
244+ for digest , tag_names , reason in artifacts_to_delete :
245+ if reason == "untagged" :
246+ print (f" - { digest [:20 ]} ... (untagged)" )
247+ else :
248+ print (f" - { digest [:20 ]} ... (tags: { ', ' .join (tag_names )} )" )
249+
250+ # Show tags that would be deleted individually (where artifact has other tags to keep)
251+ artifact_digests_to_delete = {digest for digest , _ , _ in artifacts_to_delete }
252+ individual_tags = [(d , t ) for d , t in tags_to_delete if d not in artifact_digests_to_delete ]
253+ if individual_tags :
254+ print ("\n Tags that would be deleted (artifact kept due to other tags):" )
255+ for digest , tag_name in individual_tags :
256+ print (f" - { tag_name } (artifact: { digest [:20 ]} ...)" )
217257 return 0
218258
219259 # Perform deletions
220260 print ("\n 🗑️ PERFORMING DELETIONS..." )
221- deleted_count = 0
261+ deleted_artifacts_count = 0
262+ deleted_tags_count = 0
222263 error_count = 0
223264
224- for digest , tag_name in tags_to_delete :
265+ # First, delete entire artifacts where all tags are expired or untagged
266+ artifact_digests_to_delete = {digest for digest , _ , _ in artifacts_to_delete }
267+
268+ for digest , tag_names , reason in artifacts_to_delete :
225269 try :
226- print (f" Deleting tag: { tag_name } ..." , end = " " )
227- delete_tag (
228- harbor_url , username , password , project_name , repository_name , digest , tag_name
270+ label = (
271+ "untagged artifact"
272+ if reason == "untagged"
273+ else f"artifact (tags: { ', ' .join (tag_names )} )"
229274 )
275+ print (f" Deleting { label } : { digest [:20 ]} ..." , end = " " )
276+ delete_artifact (harbor_url , username , password , project_name , repository_name , digest )
230277 print ("✓" )
231- deleted_count += 1
278+ deleted_artifacts_count += 1
232279 except requests .exceptions .RequestException as e :
233280 print (f"✗ Error: { e } " )
234281 error_count += 1
235282
283+ # Then, delete individual tags where the artifact has other tags to keep
284+ for digest , tag_name in tags_to_delete :
285+ if digest not in artifact_digests_to_delete :
286+ try :
287+ print (f" Deleting tag: { tag_name } ..." , end = " " )
288+ delete_tag (
289+ harbor_url , username , password , project_name , repository_name , digest , tag_name
290+ )
291+ print ("✓" )
292+ deleted_tags_count += 1
293+ except requests .exceptions .RequestException as e :
294+ print (f"✗ Error: { e } " )
295+ error_count += 1
296+
236297 print ("\n " + "=" * 60 )
237- print (f"Deleted: { deleted_count } tags" )
298+ print (f"Deleted: { deleted_artifacts_count } artifacts (entire images)" )
299+ print (f"Deleted: { deleted_tags_count } individual tags" )
238300 print (f"Errors: { error_count } " )
239301 print ("=" * 60 )
240302
241- # Note: Untagged artifacts can be cleaned up by Harbor's garbage collection
242- if artifacts_to_check_deletion :
243- print ("\n ⚠️ Some artifacts are now untagged." )
244- print ("Run Harbor garbage collection to reclaim storage space." )
245-
246303 return 0 if error_count == 0 else 1
247304
248305
0 commit comments