From 1c3021a30a5e32f5f6f3ff257636e4bffea9906a Mon Sep 17 00:00:00 2001 From: Yilin Jing Date: Fri, 27 Feb 2026 17:40:57 +0800 Subject: [PATCH 1/2] fix(#15): ERROR log when crawl upserts fewer than 50 campaigns Add post-crawl sanity check: if total upserted < 50, emit an ERROR-level log line naming the likely cause (HTML selector change or ScrapingBee issue). Distinguishes silent parse failure from a legitimately quiet day. --- backend/internal/service/cron.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/backend/internal/service/cron.go b/backend/internal/service/cron.go index d113dcf..311d274 100644 --- a/backend/internal/service/cron.go +++ b/backend/internal/service/cron.go @@ -112,6 +112,16 @@ func (s *CronService) RunCrawlNow() error { } log.Printf("Cron: crawl done, upserted %d campaigns", upserted) + // Sanity check: a full crawl across all categories should always yield + // at least some campaigns. Zero almost certainly means a parse failure + // (e.g. Kickstarter changed their HTML structure), not a genuinely empty site. + const minExpectedCampaigns = 50 + if upserted < minExpectedCampaigns { + log.Printf("ERROR: crawl sanity check FAILED — only %d campaigns upserted (expected >=%d). "+ + "Possible HTML structure change or ScrapingBee degradation. "+ + "Check kickstarter_parser.go [data-project] selector.", upserted, minExpectedCampaigns) + } + if len(allCampaigns) > 0 { s.storeSnapshots(allCampaigns) s.computeVelocity(allCampaigns) From 7721938c283915cf0a32ba198f5fd123718def88 Mon Sep 17 00:00:00 2001 From: Yilin Jing Date: Fri, 27 Feb 2026 18:21:30 +0800 Subject: [PATCH 2/2] fix(#22): use distinct PID count for crawl sanity check --- backend/internal/service/cron.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/backend/internal/service/cron.go b/backend/internal/service/cron.go index 311d274..2ec97d9 100644 --- a/backend/internal/service/cron.go +++ b/backend/internal/service/cron.go @@ -71,6 +71,7 @@ func (s *CronService) RunCrawlNow() error { s.syncCategories() upserted := 0 + seenPIDs := make(map[string]struct{}) var allCampaigns []model.Campaign for _, sortCfg := range crawlSorts { @@ -104,6 +105,9 @@ func (s *CronService) RunCrawlNow() error { log.Printf("Cron: upsert error: %v", result.Error) } else { upserted += len(campaigns) + for _, c := range campaigns { + seenPIDs[c.PID] = struct{}{} + } allCampaigns = append(allCampaigns, campaigns...) } time.Sleep(500 * time.Millisecond) @@ -116,10 +120,10 @@ func (s *CronService) RunCrawlNow() error { // at least some campaigns. Zero almost certainly means a parse failure // (e.g. Kickstarter changed their HTML structure), not a genuinely empty site. const minExpectedCampaigns = 50 - if upserted < minExpectedCampaigns { - log.Printf("ERROR: crawl sanity check FAILED — only %d campaigns upserted (expected >=%d). "+ + if len(seenPIDs) < minExpectedCampaigns { + log.Printf("ERROR: crawl sanity check FAILED — only %d distinct campaigns seen (expected >=%d). "+ "Possible HTML structure change or ScrapingBee degradation. "+ - "Check kickstarter_parser.go [data-project] selector.", upserted, minExpectedCampaigns) + "Check kickstarter_parser.go [data-project] selector.", len(seenPIDs), minExpectedCampaigns) } if len(allCampaigns) > 0 {