@@ -7,6 +7,12 @@ const REDIRECT_CODES = [301, 302, 304, 307, 308]
77// other non standard codes, like 999 from linkedin
88const OTHER_CODES = [ 999 ]
99
10+ // URLs that we accept 429s for
11+ const ACCEPTED_RATE_LIMITED_URLS = [
12+ 'https://github.com/nitrictech/nitric' ,
13+ // Add more URLs here as needed
14+ ]
15+
1016const IGNORED_URLS = [
1117 'googleads.g.doubleclick.net' ,
1218 'youtube.com/api' ,
@@ -45,7 +51,18 @@ const isExternalUrl = (url: string) => {
4551 return ! url . includes ( 'localhost' )
4652}
4753
48- const req = ( url : string , retryCount = 0 , followRedirect = false ) : any => {
54+ const isAcceptedRateLimitedUrl = ( url : string ) => {
55+ return ACCEPTED_RATE_LIMITED_URLS . some ( ( acceptedUrl ) =>
56+ url . startsWith ( acceptedUrl ) ,
57+ )
58+ }
59+
60+ const req = (
61+ url : string ,
62+ retryCount = 0 ,
63+ followRedirect = false ,
64+ visitedLinks : Record < string , boolean > = { } ,
65+ ) : any => {
4966 return cy
5067 . request ( {
5168 url,
@@ -54,11 +71,34 @@ const req = (url: string, retryCount = 0, followRedirect = false): any => {
5471 gzip : false ,
5572 } )
5673 . then ( ( resp ) => {
57- // retry on timeout and too many requests
58- if ( [ 408 , 429 ] . includes ( resp . status ) && retryCount < 3 ) {
59- cy . log ( `request ${ url } timed out, retrying again...` )
60- cy . wait ( 500 )
61- return req ( url , retryCount + 1 )
74+ // Handle rate limiting (429) with exponential backoff
75+ if ( resp . status === 429 && retryCount < 5 ) {
76+ const retryAfter = resp . headers [ 'retry-after' ]
77+ ? parseInt (
78+ Array . isArray ( resp . headers [ 'retry-after' ] )
79+ ? resp . headers [ 'retry-after' ] [ 0 ]
80+ : resp . headers [ 'retry-after' ] ,
81+ )
82+ : null
83+ const waitTime = retryAfter
84+ ? retryAfter * 1000
85+ : Math . min ( 500 * Math . pow ( 2 , retryCount ) , 5000 )
86+
87+ cy . log (
88+ `Rate limited for ${ url } , waiting ${ waitTime } ms before retry ${ retryCount + 1 } /5` ,
89+ )
90+ cy . wait ( waitTime )
91+ return req ( url , retryCount + 1 , followRedirect , visitedLinks )
92+ }
93+
94+ // Handle timeouts with exponential backoff
95+ if ( resp . status === 408 && retryCount < 3 ) {
96+ const waitTime = Math . min ( 200 * Math . pow ( 2 , retryCount ) , 2000 )
97+ cy . log (
98+ `Request timeout for ${ url } , waiting ${ waitTime } ms before retry ${ retryCount + 1 } /3` ,
99+ )
100+ cy . wait ( waitTime )
101+ return req ( url , retryCount + 1 , followRedirect , visitedLinks )
62102 }
63103
64104 return resp
@@ -67,6 +107,7 @@ const req = (url: string, retryCount = 0, followRedirect = false): any => {
67107
68108describe ( 'Broken links test suite' , ( ) => {
69109 const VISITED_SUCCESSFUL_LINKS = { }
110+ const BATCH_SIZE = 10 // Process links in batches of 10
70111
71112 pages . forEach ( ( page ) => {
72113 it ( `Should visit page ${ page } and check all links` , ( ) => {
@@ -84,61 +125,108 @@ describe('Broken links test suite', () => {
84125 ( l ) => href ?. includes ( l ) || src ?. includes ( l ) ,
85126 )
86127 } )
87- . each ( ( link ) => {
88- cy . log ( `link: ${ link [ 0 ] . textContent } ` )
89- const baseUrl = link . prop ( 'href' ) || link . prop ( 'src' )
90-
91- const url = baseUrl . split ( '#' ) [ 0 ]
92-
93- if ( VISITED_SUCCESSFUL_LINKS [ url ] ) {
94- cy . log ( `link already checked` )
95- expect ( VISITED_SUCCESSFUL_LINKS [ url ] ) . to . be . true
96- } else {
97- // if the link is internal then check the link against the pages fixture (sitemap)
98- if ( isInternalUrl ( url ) ) {
99- // clean the url by removing the base url and query params
100- const rootBaseUrlRegex = new RegExp ( `^${ rootBaseUrl } ` )
101- let cleanUrl = url . replace ( rootBaseUrlRegex , '' )
102- const queryIndex = cleanUrl . indexOf ( '?' )
103- cleanUrl =
104- queryIndex !== - 1 ? cleanUrl . slice ( 0 , queryIndex ) : cleanUrl
105-
106- cy . log ( `checking internal link: ${ cleanUrl } ` )
107- if ( ! pages . includes ( cleanUrl ) ) {
108- assert . fail ( `${ cleanUrl } is not part of the pages fixture` )
109- } else {
110- VISITED_SUCCESSFUL_LINKS [ url ] = true
111- }
128+ . then ( ( $links ) => {
129+ const linkPromises = [ ]
130+ const linksToCheck = [ ]
131+
132+ $links . each ( ( _i , link ) => {
133+ const baseUrl =
134+ link . getAttribute ( 'href' ) || link . getAttribute ( 'src' )
135+ if ( ! baseUrl ) {
136+ cy . log ( 'Skipping link with no href/src:' , link )
137+ return
138+ }
112139
140+ // Skip if the URL is just a hash fragment
141+ if ( baseUrl . startsWith ( '#' ) ) {
142+ cy . log ( 'Skipping hash fragment:' , baseUrl )
113143 return
114144 }
115145
116- cy . wait ( 25 )
117-
118- req ( url ) . then ( ( res : Cypress . Response < any > ) => {
119- let acceptableCodes = CORRECT_CODES
120- if ( REDIRECT_CODES . includes ( res . status ) && ! isExternalUrl ( url ) ) {
121- assert . fail (
122- `${ url } returned ${ res . status } to ${ res . headers [ 'location' ] } ` ,
123- )
124- } else {
125- acceptableCodes = [
126- ...CORRECT_CODES ,
127- ...REDIRECT_CODES ,
128- ...OTHER_CODES ,
129- ]
146+ const url = baseUrl . split ( '#' ) [ 0 ]
147+ if ( ! url ) {
148+ cy . log ( 'Skipping empty URL from:' , baseUrl )
149+ return
150+ }
151+
152+ if ( VISITED_SUCCESSFUL_LINKS [ url ] ) {
153+ cy . log ( `Skipping already checked link: ${ url } ` )
154+ return
155+ }
156+
157+ linksToCheck . push ( url )
158+ } )
159+
160+ // Process links in batches
161+ for ( let i = 0 ; i < linksToCheck . length ; i += BATCH_SIZE ) {
162+ const batch = linksToCheck . slice ( i , i + BATCH_SIZE )
163+ const batchPromises = batch . map ( ( url ) => {
164+ if ( ! url ) {
165+ cy . log ( 'Skipping empty URL in batch' )
166+ return Promise . resolve ( )
130167 }
131168
132- if ( acceptableCodes . includes ( res . status ) ) {
169+ if ( isInternalUrl ( url ) ) {
170+ const rootBaseUrlRegex = new RegExp ( `^${ rootBaseUrl } ` )
171+ let cleanUrl = url . replace ( rootBaseUrlRegex , '' )
172+ const queryIndex = cleanUrl . indexOf ( '?' )
173+ cleanUrl =
174+ queryIndex !== - 1 ? cleanUrl . slice ( 0 , queryIndex ) : cleanUrl
175+
176+ if ( ! pages . includes ( cleanUrl ) ) {
177+ assert . fail ( `${ cleanUrl } is not part of the pages fixture` )
178+ }
133179 VISITED_SUCCESSFUL_LINKS [ url ] = true
180+ return Promise . resolve ( )
134181 }
135182
136- expect ( res . status ) . oneOf (
137- acceptableCodes ,
138- `${ url } returned ${ res . status } ` ,
183+ return req ( url , 0 , false , VISITED_SUCCESSFUL_LINKS ) . then (
184+ ( res : Cypress . Response < any > ) => {
185+ let acceptableCodes = CORRECT_CODES
186+ if (
187+ REDIRECT_CODES . includes ( res . status ) &&
188+ ! isExternalUrl ( url )
189+ ) {
190+ assert . fail (
191+ `${ url } returned ${ res . status } to ${ res . headers [ 'location' ] } ` ,
192+ )
193+ } else if ( res . status === 429 ) {
194+ // After all retries, if we still get a 429, only mark as successful for accepted URLs
195+ if ( isAcceptedRateLimitedUrl ( url ) ) {
196+ cy . log (
197+ `Rate limited for accepted URL ${ url } after all retries, marking as successful` ,
198+ )
199+ VISITED_SUCCESSFUL_LINKS [ url ] = true
200+ return
201+ } else {
202+ assert . fail (
203+ `${ url } returned 429 (Rate Limited) and is not in the accepted list` ,
204+ )
205+ }
206+ } else {
207+ acceptableCodes = [
208+ ...CORRECT_CODES ,
209+ ...REDIRECT_CODES ,
210+ ...OTHER_CODES ,
211+ ]
212+ }
213+
214+ if ( acceptableCodes . includes ( res . status ) ) {
215+ VISITED_SUCCESSFUL_LINKS [ url ] = true
216+ }
217+
218+ expect ( res . status ) . oneOf (
219+ acceptableCodes ,
220+ `${ url } returned ${ res . status } ` ,
221+ )
222+ } ,
139223 )
140224 } )
225+
226+ linkPromises . push ( Promise . all ( batchPromises ) )
141227 }
228+
229+ return Promise . all ( linkPromises )
142230 } )
143231 } )
144232 } )
0 commit comments