@@ -312,41 +312,63 @@ browser.runtime.onMessage.addListener(
312312 }
313313 }
314314
315- // Parse with defuddle first (better GitHub support, more tokens)
316- console . log ( "[Content Script] Parsing article with defuddle..." )
315+ // Parse content based on user's scraper preference
316+ const scraper = effectiveConfig . contentScraper || "auto"
317+ console . log ( `[Content Script] Using scraper: ${ scraper } ` )
318+
317319 let readableText = ""
318320 let pageTitle = ""
319321 let defuddleResult : ReturnType < Defuddle [ "parse" ] > | null = null
320322
321- try {
322- const defuddle = new Defuddle ( document , {
323- markdown : true ,
324- separateMarkdown : false ,
325- removeExactSelectors : true // Remove ads and social buttons
326- } )
327- defuddleResult = defuddle . parse ( )
323+ // Try Defuddle if user selected "auto" or "defuddle"
324+ if ( scraper === "auto" || scraper === "defuddle" ) {
325+ console . log ( "[Content Script] Parsing article with Defuddle..." )
326+ try {
327+ const defuddle = new Defuddle ( document , {
328+ markdown : true ,
329+ separateMarkdown : false ,
330+ removeExactSelectors : true // Remove ads and social buttons
331+ } )
332+ defuddleResult = defuddle . parse ( )
328333
329- // Prefer markdown if available, otherwise use HTML content
330- readableText =
331- defuddleResult ?. contentMarkdown || defuddleResult ?. content || ""
332- readableText = normalizeWhitespaceForLLM ( readableText )
333- pageTitle = defuddleResult ?. title || ""
334+ // Prefer markdown if available, otherwise use HTML content
335+ readableText =
336+ defuddleResult ?. contentMarkdown || defuddleResult ?. content || ""
337+ readableText = normalizeWhitespaceForLLM ( readableText )
338+ pageTitle = defuddleResult ?. title || ""
334339
335- console . log (
336- `[Content Script] defuddle extracted ${ readableText . length } chars (${ defuddleResult ?. contentMarkdown ? "markdown" : "HTML" } )`
337- )
338- } catch ( error ) {
339- console . warn (
340- "[Content Script] defuddle failed, falling back to Readability:" ,
341- error
342- )
340+ console . log (
341+ `[Content Script] Defuddle extracted ${ readableText . length } chars (${ defuddleResult ?. contentMarkdown ? "markdown" : "HTML" } )`
342+ )
343+ } catch ( error ) {
344+ console . warn ( "[Content Script] Defuddle failed:" , error )
345+ // If user selected "defuddle" only, don't fallback
346+ if ( scraper === "defuddle" ) {
347+ console . error (
348+ "[Content Script] Defuddle-only mode failed, no fallback available"
349+ )
350+ }
351+ }
343352 }
344353
345- // Fallback to Readability if defuddle failed or returned minimal content
346- if ( ! readableText || readableText . trim ( ) . length < 100 ) {
347- console . log (
348- "[Content Script] defuddle returned minimal content, trying Readability..."
349- )
354+ // Try Readability if:
355+ // 1. User selected "readability" only
356+ // 2. User selected "auto" and defuddle failed or returned minimal content
357+ if (
358+ scraper === "readability" ||
359+ ( scraper === "auto" &&
360+ ( ! readableText || readableText . trim ( ) . length < 100 ) )
361+ ) {
362+ if ( scraper === "auto" ) {
363+ console . log (
364+ "[Content Script] Defuddle returned minimal content, trying Readability..."
365+ )
366+ } else {
367+ console . log (
368+ "[Content Script] Parsing article with Readability..."
369+ )
370+ }
371+
350372 try {
351373 const article = new Readability (
352374 document . cloneNode ( true ) as Document
@@ -356,8 +378,11 @@ browser.runtime.onMessage.addListener(
356378 const normalizedReadability =
357379 normalizeWhitespaceForLLM ( readabilityText )
358380
359- // Use Readability result if it's better than defuddle
381+ // Use Readability result if:
382+ // - User selected "readability" only, OR
383+ // - Auto mode and Readability is better than defuddle
360384 if (
385+ scraper === "readability" ||
361386 normalizedReadability . length > readableText . length ||
362387 readableText . trim ( ) . length < 50
363388 ) {
@@ -367,15 +392,12 @@ browser.runtime.onMessage.addListener(
367392 )
368393 }
369394
370- // Use Readability title if defuddle didn't provide one
395+ // Use Readability title if not already set
371396 if ( ! pageTitle && article ?. title ) {
372397 pageTitle = article . title
373398 }
374399 } catch ( error ) {
375- console . error (
376- "[Content Script] Readability fallback failed:" ,
377- error
378- )
400+ console . error ( "[Content Script] Readability failed:" , error )
379401 }
380402 }
381403
0 commit comments