diff --git a/scripts/generate-news-enhanced.ts b/scripts/generate-news-enhanced.ts
index c99e7f96c..e1d44d0e4 100644
--- a/scripts/generate-news-enhanced.ts
+++ b/scripts/generate-news-enhanced.ts
@@ -244,6 +244,8 @@ const dryRunArg: boolean = args.includes('--dry-run');
const batchSizeArg: string | undefined = args.find(arg => arg.startsWith('--batch-size='));
const skipExistingArg: boolean = args.includes('--skip-existing');
const batchSize: number = batchSizeArg ? parseInt(batchSizeArg.split('=')[1] ?? '0', 10) : 0;
+const qualityThresholdArg: string | undefined = args.find(arg => arg.startsWith('--quality-threshold='));
+const qualityThreshold: number = qualityThresholdArg ? parseInt(qualityThresholdArg.split('=')[1] ?? '40', 10) : 40;
// --require-mcp flag: when true (default), abort if MCP server is unreachable after all retries.
// Set --require-mcp=false for local development/testing without a live MCP server.
@@ -417,6 +419,124 @@ const stats: { generated: number; errors: number; articles: string[]; timestamp:
timestamp: new Date().toISOString()
};
+// Track quality scores for all articles generated in this run
+const qualityScores: number[] = [];
+
+// ---------------------------------------------------------------------------
+// Article quality validation
+// ---------------------------------------------------------------------------
+
+/** Quality metrics and score for a single generated article. */
+export interface ArticleQualityReport {
+ readonly articleId: string;
+ readonly wordCount: number;
+ readonly unknownAuthorCount: number;
+ readonly totalEntryCount: number;
+ readonly untranslatedSpanCount: number;
+ readonly analyticalSectionCount: number;
+ readonly score: number;
+ readonly passed: boolean;
+ readonly issues: string[];
+}
+
+/**
+ * Validate the quality of a generated HTML article.
+ *
+ * Scoring (100 pts total):
+ * - Word count 25 pts (>= 500 full, >= 300 partial, < 300 = REJECT)
+ * - Unknown authors 25 pts (0% full, <= 50% partial, > 50% = 0)
+ * - Untranslated spans 25 pts (sv always full; non-sv: 0 spans full, <= 10 partial, > 10 = 0)
+ * - Analytical sections 25 pts (>= 3 full, >= 1 partial, 0 = 0)
+ *
+ * @param html - Full HTML content of the article
+ * @param lang - Language code (e.g. 'en', 'sv')
+ * @param articleType - Article type label for reporting (e.g. 'motions')
+ * @returns Quality report with score and per-metric details
+ */
+export function validateArticleQuality(html: string, lang: string, articleType: string): ArticleQualityReport {
+ const articleId = `${articleType}-${lang}`;
+
+ // Word count: strip tags and count whitespace-separated tokens
+ const textContent = html.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
+ const wordCount = textContent.length === 0 ? 0 : textContent.split(' ').filter(w => w.length > 0).length;
+
+ // Count "Unknown (Unknown)" sentinel entries (used when author/party is missing)
+ const unknownAuthorCount = (html.match(/Unknown \(Unknown\)/g) ?? []).length;
+
+ // Use list items as a proxy for total document entries
+ const listItemCount = (html.match(/
]*>/g) ?? []).length;
+ const totalEntryCount = Math.max(listItemCount, unknownAuthorCount);
+
+ // Count untranslated spans — only relevant for non-Swedish content
+ const untranslatedSpanCount = lang !== 'sv'
+ ? (html.match(/data-translate="true"/g) ?? []).length
+ : 0;
+
+ // Count analytical h2 sections (structural quality indicator)
+ const analyticalSectionCount = (html.match(/]*>/g) ?? []).length;
+
+ const issues: string[] = [];
+
+ // Word count score: 25 pts
+ let wordScore = 0;
+ if (wordCount >= 500) {
+ wordScore = 25;
+ } else if (wordCount >= 300) {
+ wordScore = 15;
+ } else {
+ issues.push(`Word count: ${wordCount} < 300 — REJECT`);
+ }
+
+ // Unknown authors score: 25 pts
+ let unknownScore = 0;
+ const unknownRatio = totalEntryCount > 0 ? unknownAuthorCount / totalEntryCount : 0;
+ if (unknownRatio === 0) {
+ unknownScore = 25;
+ } else if (unknownRatio <= 0.5) {
+ unknownScore = Math.round(25 * (1 - unknownRatio));
+ issues.push(`Unknown authors: ${unknownAuthorCount}/${totalEntryCount} ⚠️`);
+ } else {
+ issues.push(`Unknown authors: ${unknownAuthorCount}/${totalEntryCount} ⚠️`);
+ }
+
+ // Untranslated spans score: 25 pts
+ let untranslatedScore = 0;
+ if (lang === 'sv' || untranslatedSpanCount === 0) {
+ untranslatedScore = 25;
+ } else if (untranslatedSpanCount <= 10) {
+ untranslatedScore = Math.round(25 * (1 - untranslatedSpanCount / 10));
+ issues.push(`Untranslated spans: ${untranslatedSpanCount} ⚠️`);
+ } else {
+ issues.push(`Untranslated spans: ${untranslatedSpanCount} > 10 ⚠️`);
+ }
+
+ // Analytical sections score: 25 pts
+ let analyticalScore = 0;
+ if (analyticalSectionCount >= 3) {
+ analyticalScore = 25;
+ } else if (analyticalSectionCount >= 1) {
+ analyticalScore = Math.round(25 * analyticalSectionCount / 3);
+ issues.push(`Analytical sections: ${analyticalSectionCount}/3 ⚠️`);
+ } else {
+ issues.push(`Analytical sections: 0/3 ⚠️`);
+ }
+
+ const score = wordScore + unknownScore + untranslatedScore + analyticalScore;
+ const passed = score >= qualityThreshold;
+
+ return {
+ articleId,
+ wordCount,
+ unknownAuthorCount,
+ totalEntryCount,
+ untranslatedSpanCount,
+ analyticalSectionCount,
+ score,
+ passed,
+ issues
+ };
+}
+
// ---------------------------------------------------------------------------
// Helper functions
// ---------------------------------------------------------------------------
@@ -463,7 +583,26 @@ async function writeArticle(html: string, filename: string): Promise {
/**
* Write article in specified language
*/
-async function writeSingleArticle(html: string, slug: string, lang: Language): Promise {
+async function writeSingleArticle(html: string, slug: string, lang: Language, articleType?: string): Promise {
+ // Validate article quality before writing
+ const report = validateArticleQuality(html, lang, articleType ?? slug);
+ const unknownRatioStr = report.totalEntryCount > 0
+ ? `${report.unknownAuthorCount}/${report.totalEntryCount}`
+ : `${report.unknownAuthorCount}/0`;
+ const unknownIcon = report.unknownAuthorCount > 0 ? '⚠️' : '✅';
+ const untranslatedIcon = report.untranslatedSpanCount > 0 ? '⚠️' : '✅';
+ const analyticalIcon = report.analyticalSectionCount >= 3 ? '✅' : '⚠️';
+ console.log(`\n 📊 Article Quality Report: ${report.articleId}`);
+ console.log(` - Word count: ${report.wordCount} ${report.wordCount >= 300 ? '✅' : '❌'}`);
+ console.log(` - Unknown authors: ${unknownRatioStr} ${unknownIcon}`);
+ console.log(` - Untranslated spans: ${report.untranslatedSpanCount} ${untranslatedIcon}`);
+ console.log(` - Analytical sections: ${report.analyticalSectionCount}/3 ${analyticalIcon}`);
+ console.log(` - Quality Score: ${report.score}/100 — ${report.passed ? 'ABOVE THRESHOLD' : 'BELOW THRESHOLD'}`);
+ if (report.issues.length > 0) {
+ report.issues.forEach(issue => console.warn(` ⚠️ ${issue}`));
+ }
+ qualityScores.push(report.score);
+
const filename: string = `${slug}-${lang}.html`;
await writeArticle(html, filename);
stats.generated += 1;
@@ -582,7 +721,7 @@ async function generateWeekAhead(): Promise {
});
// Write article
- await writeSingleArticle(html, slug, lang);
+ await writeSingleArticle(html, slug, lang, 'week-ahead');
console.log(` ✅ ${lang.toUpperCase()} version generated`);
}
@@ -669,7 +808,7 @@ async function generateCommitteeReports(): Promise {
tags: metadata.tags
});
- await writeSingleArticle(html, slug, lang);
+ await writeSingleArticle(html, slug, lang, 'committee-reports');
}
return { success: true, files: languages.length, slug };
@@ -753,7 +892,7 @@ async function generatePropositions(): Promise {
tags: metadata.tags
});
- await writeSingleArticle(html, slug, lang);
+ await writeSingleArticle(html, slug, lang, 'propositions');
}
return { success: true, files: languages.length, slug };
@@ -837,7 +976,7 @@ async function generateMotions(): Promise {
tags: metadata.tags
});
- await writeSingleArticle(html, slug, lang);
+ await writeSingleArticle(html, slug, lang, 'motions');
}
return { success: true, files: languages.length, slug };
@@ -1019,7 +1158,16 @@ async function generateNews(): Promise {
if (import.meta.url === `file://${process.argv[1]}`) {
generateNews()
.then(result => {
- process.exit(result.errors > 0 ? 1 : 0);
+ if (result.errors > 0) {
+ process.exit(1);
+ }
+ // Soft failure: all articles in this run scored below the quality threshold
+ if (qualityScores.length > 0 && qualityScores.every(s => s < qualityThreshold)) {
+ console.warn(`\n⚠️ Quality Warning: ALL ${qualityScores.length} article(s) scored below threshold (${qualityThreshold})`);
+ console.warn(` Scores: ${qualityScores.join(', ')}`);
+ process.exit(2);
+ }
+ process.exit(0);
})
.catch((error: unknown) => {
console.error('❌ Fatal error:', error);
diff --git a/tests/article-quality.test.ts b/tests/article-quality.test.ts
new file mode 100644
index 000000000..e3fbe231f
--- /dev/null
+++ b/tests/article-quality.test.ts
@@ -0,0 +1,274 @@
+/**
+ * Unit Tests for Article Quality Validation
+ *
+ * Tests the validateArticleQuality function from generate-news-enhanced.ts:
+ * - Word count detection and scoring
+ * - Unknown (Unknown) author counting and scoring
+ * - Untranslated data-translate span detection (non-Swedish only)
+ * - Analytical section (h2 header) counting and scoring
+ * - Composite quality score calculation
+ * - Pass/fail against default threshold (40)
+ */
+
+import { describe, it, expect, vi, beforeAll, afterAll } from 'vitest';
+import fs from 'fs';
+import type { MCPClientConfig } from '../scripts/types/mcp.js';
+
+/** Shape of the quality report returned by validateArticleQuality */
+interface ArticleQualityReport {
+ readonly articleId: string;
+ readonly wordCount: number;
+ readonly unknownAuthorCount: number;
+ readonly totalEntryCount: number;
+ readonly untranslatedSpanCount: number;
+ readonly analyticalSectionCount: number;
+ readonly score: number;
+ readonly passed: boolean;
+ readonly issues: string[];
+}
+
+/** Partial shape of the generate-news-enhanced module we need for these tests */
+interface GenerateNewsEnhancedModule {
+ readonly validateArticleQuality: (html: string, lang: string, articleType: string) => ArticleQualityReport;
+}
+
+// ---------------------------------------------------------------------------
+// Mock MCPClient to prevent real HTTP calls during module import
+// ---------------------------------------------------------------------------
+
+const { MockMCPClient, mockClientInstance } = vi.hoisted(() => {
+ const mockClientInstance = {
+ fetchCalendarEvents: vi.fn().mockResolvedValue([]),
+ fetchCommitteeReports: vi.fn().mockResolvedValue([]),
+ fetchPropositions: vi.fn().mockResolvedValue([]),
+ fetchMotions: vi.fn().mockResolvedValue([]),
+ fetchVotingRecords: vi.fn().mockResolvedValue([]),
+ searchDocuments: vi.fn().mockResolvedValue([]),
+ enrichDocumentsWithContent: vi.fn().mockResolvedValue([]),
+ request: vi.fn().mockResolvedValue({ last_sync: '2026-02-23T00:00:00Z' }),
+ timeout: 30000,
+ baseURL: 'https://riksdag-regering-ai.onrender.com/mcp'
+ };
+ function MockMCPClient(_config?: MCPClientConfig) {
+ return mockClientInstance;
+ }
+ return { MockMCPClient, mockClientInstance };
+});
+
+vi.mock('../scripts/mcp-client.js', () => ({
+ MCPClient: MockMCPClient,
+ getDefaultClient: () => mockClientInstance
+}));
+
+// ---------------------------------------------------------------------------
+// Module import (dynamic, to handle top-level side-effects safely)
+// ---------------------------------------------------------------------------
+
+let mod: GenerateNewsEnhancedModule | null = null;
+
+beforeAll(async () => {
+ vi.spyOn(fs, 'writeFileSync').mockImplementation(() => {});
+ vi.spyOn(fs, 'mkdirSync').mockImplementation(() => undefined as unknown as string);
+ vi.spyOn(fs, 'existsSync').mockReturnValue(true);
+ vi.spyOn(fs, 'readdirSync').mockReturnValue([]);
+ try {
+ mod = await import('../scripts/generate-news-enhanced.js') as unknown as GenerateNewsEnhancedModule;
+ } catch (e) {
+ console.error('Module import failed:', e);
+ mod = null;
+ }
+});
+
+afterAll(() => {
+ vi.restoreAllMocks();
+});
+
+// ---------------------------------------------------------------------------
+// Helpers to build test HTML snippets
+// ---------------------------------------------------------------------------
+
+function buildHtml({
+ words = 600,
+ unknownCount = 0,
+ listItemCount = 0,
+ untranslatedSpans = 0,
+ h2Count = 3
+}: {
+ words?: number;
+ unknownCount?: number;
+ listItemCount?: number;
+ untranslatedSpans?: number;
+ h2Count?: number;
+}): string {
+ const wordText = Array.from({ length: words }, (_, i) => `word${i}`).join(' ');
+ const unknownEntries = Array.from({ length: unknownCount }, () => 'Filed by: Unknown (Unknown)').join('');
+ const normalEntries = Array.from({ length: Math.max(0, listItemCount - unknownCount) }, (_, i) => `Author ${i}`).join('');
+ const translationSpans = Array.from({ length: untranslatedSpans }, () => 'text').join('');
+ const h2Tags = Array.from({ length: h2Count }, (_, i) => `Section ${i + 1}
`).join('');
+ return `${wordText}
${unknownEntries}${normalEntries}${translationSpans}${h2Tags}`;
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe('Article Quality Validation', () => {
+ it('should be exported from the module', () => {
+ expect(mod).not.toBeNull();
+ expect(typeof mod?.validateArticleQuality).toBe('function');
+ });
+
+ describe('Word count scoring', () => {
+ it('scores 25 pts for articles with >= 500 words', () => {
+ if (!mod) return;
+ const html = buildHtml({ words: 600, h2Count: 0, unknownCount: 0, untranslatedSpans: 0, listItemCount: 0 });
+ const report = mod.validateArticleQuality(html, 'en', 'test');
+ // wordScore=25, unknownScore=25 (no entries), untranslatedScore=0 (no spans but 0<=0), analyticalScore=0
+ // unknownRatio: totalEntryCount=0 → unknownScore=25
+ // untranslated: 0 spans → 25pts
+ // analytical: 0 h2 → 0pts → issue
+ expect(report.wordCount).toBeGreaterThanOrEqual(500);
+ // word score contributes 25
+ // total = 25 (word) + 25 (unknown) + 25 (untranslated) + 0 (analytical) = 75
+ expect(report.score).toBe(75);
+ });
+
+ it('scores 15 pts for articles with 300-499 words', () => {
+ if (!mod) return;
+ const html = buildHtml({ words: 350, h2Count: 0, unknownCount: 0, untranslatedSpans: 0, listItemCount: 0 });
+ const report = mod.validateArticleQuality(html, 'en', 'test');
+ expect(report.wordCount).toBeGreaterThanOrEqual(300);
+ expect(report.wordCount).toBeLessThan(500);
+ // 15 (word) + 25 (unknown) + 25 (untranslated) + 0 (analytical) = 65
+ expect(report.score).toBe(65);
+ });
+
+ it('scores 0 pts and adds REJECT issue for articles with < 300 words', () => {
+ if (!mod) return;
+ const html = buildHtml({ words: 50, h2Count: 3, unknownCount: 0, untranslatedSpans: 0, listItemCount: 0 });
+ const report = mod.validateArticleQuality(html, 'en', 'test');
+ expect(report.wordCount).toBeLessThan(300);
+ expect(report.issues.some(i => i.includes('REJECT'))).toBe(true);
+ // 0 (word) + 25 (unknown) + 25 (untranslated) + 25 (analytical) = 75
+ expect(report.score).toBe(75);
+ });
+ });
+
+ describe('Unknown author detection', () => {
+ it('scores 25 pts when there are no Unknown (Unknown) entries', () => {
+ if (!mod) return;
+ const html = buildHtml({ words: 600, unknownCount: 0, listItemCount: 5, h2Count: 3, untranslatedSpans: 0 });
+ const report = mod.validateArticleQuality(html, 'en', 'test');
+ expect(report.unknownAuthorCount).toBe(0);
+ expect(report.score).toBe(100);
+ });
+
+ it('adds a warning issue when > 50% are Unknown (Unknown)', () => {
+ if (!mod) return;
+ const html = buildHtml({ words: 600, unknownCount: 8, listItemCount: 10, h2Count: 3, untranslatedSpans: 0 });
+ const report = mod.validateArticleQuality(html, 'en', 'test');
+ expect(report.unknownAuthorCount).toBe(8);
+ expect(report.totalEntryCount).toBe(10);
+ expect(report.issues.some(i => i.includes('Unknown authors'))).toBe(true);
+ });
+
+ it('counts Unknown (Unknown) occurrences correctly', () => {
+ if (!mod) return;
+ const html = 'Filed by: Unknown (Unknown)Filed by: Unknown (Unknown)Filed by: Real Author (M)';
+ const report = mod.validateArticleQuality(html, 'en', 'test');
+ expect(report.unknownAuthorCount).toBe(2);
+ expect(report.totalEntryCount).toBe(3);
+ });
+ });
+
+ describe('Untranslated span detection', () => {
+ it('scores 25 pts for Swedish articles regardless of data-translate spans', () => {
+ if (!mod) return;
+ const html = buildHtml({ words: 600, untranslatedSpans: 20, h2Count: 3, unknownCount: 0, listItemCount: 0 });
+ const report = mod.validateArticleQuality(html, 'sv', 'test');
+ expect(report.untranslatedSpanCount).toBe(0); // sv gets 0 count
+ // No untranslated penalty for Swedish
+ expect(report.score).toBe(100);
+ });
+
+ it('scores 0 pts and warns when > 10 untranslated spans in non-Swedish', () => {
+ if (!mod) return;
+ const html = buildHtml({ words: 600, untranslatedSpans: 21, h2Count: 3, unknownCount: 0, listItemCount: 0 });
+ const report = mod.validateArticleQuality(html, 'en', 'test');
+ expect(report.untranslatedSpanCount).toBe(21);
+ expect(report.issues.some(i => i.includes('Untranslated spans') && i.includes('10'))).toBe(true);
+ // 25 (word) + 25 (unknown) + 0 (untranslated) + 25 (analytical) = 75
+ expect(report.score).toBe(75);
+ });
+
+ it('scores 25 pts when there are 0 untranslated spans in non-Swedish', () => {
+ if (!mod) return;
+ const html = buildHtml({ words: 600, untranslatedSpans: 0, h2Count: 3, unknownCount: 0, listItemCount: 0 });
+ const report = mod.validateArticleQuality(html, 'de', 'test');
+ expect(report.untranslatedSpanCount).toBe(0);
+ expect(report.score).toBe(100);
+ });
+ });
+
+ describe('Analytical section detection', () => {
+ it('scores 25 pts when there are >= 3 h2 sections', () => {
+ if (!mod) return;
+ const html = buildHtml({ words: 600, h2Count: 3, unknownCount: 0, untranslatedSpans: 0, listItemCount: 0 });
+ const report = mod.validateArticleQuality(html, 'en', 'test');
+ expect(report.analyticalSectionCount).toBe(3);
+ expect(report.score).toBe(100);
+ });
+
+ it('scores partial pts and warns when there is 1 h2 section', () => {
+ if (!mod) return;
+ const html = buildHtml({ words: 600, h2Count: 1, unknownCount: 0, untranslatedSpans: 0, listItemCount: 0 });
+ const report = mod.validateArticleQuality(html, 'en', 'test');
+ expect(report.analyticalSectionCount).toBe(1);
+ expect(report.issues.some(i => i.includes('Analytical sections'))).toBe(true);
+ // analyticalScore = round(25 * 1/3) = 8
+ expect(report.score).toBe(25 + 25 + 25 + 8);
+ });
+
+ it('scores 0 pts and warns when there are no h2 sections', () => {
+ if (!mod) return;
+ const html = buildHtml({ words: 600, h2Count: 0, unknownCount: 0, untranslatedSpans: 0, listItemCount: 0 });
+ const report = mod.validateArticleQuality(html, 'en', 'test');
+ expect(report.analyticalSectionCount).toBe(0);
+ expect(report.issues.some(i => i.includes('Analytical sections') && i.includes('0/3'))).toBe(true);
+ expect(report.score).toBe(75);
+ });
+ });
+
+ describe('Quality score and pass/fail', () => {
+ it('returns passed=true for a perfect article (score=100)', () => {
+ if (!mod) return;
+ const html = buildHtml({ words: 600, h2Count: 3, unknownCount: 0, untranslatedSpans: 0, listItemCount: 5 });
+ const report = mod.validateArticleQuality(html, 'en', 'test');
+ expect(report.score).toBe(100);
+ expect(report.passed).toBe(true);
+ });
+
+ it('returns passed=false for a low-quality article (score < 40)', () => {
+ if (!mod) return;
+ // Very short, all unknowns, 21 untranslated spans, no sections
+ const html = buildHtml({ words: 10, h2Count: 0, unknownCount: 5, listItemCount: 5, untranslatedSpans: 21 });
+ const report = mod.validateArticleQuality(html, 'en', 'test');
+ expect(report.score).toBeLessThan(40);
+ expect(report.passed).toBe(false);
+ });
+
+ it('sets articleId as "-"', () => {
+ if (!mod) return;
+ const html = buildHtml({ words: 600, h2Count: 3 });
+ const report = mod.validateArticleQuality(html, 'fr', 'motions');
+ expect(report.articleId).toBe('motions-fr');
+ });
+
+ it('returns no issues for a high-quality article', () => {
+ if (!mod) return;
+ const html = buildHtml({ words: 600, h2Count: 3, unknownCount: 0, untranslatedSpans: 0, listItemCount: 5 });
+ const report = mod.validateArticleQuality(html, 'en', 'test');
+ expect(report.issues).toHaveLength(0);
+ });
+ });
+});
diff --git a/tests/generate-news-enhanced-mcp-abort.test.ts b/tests/generate-news-enhanced-mcp-abort.test.ts
index 6a1d43300..fe4f90c3b 100644
--- a/tests/generate-news-enhanced-mcp-abort.test.ts
+++ b/tests/generate-news-enhanced-mcp-abort.test.ts
@@ -11,7 +11,7 @@
* sharedClient=null, enabling warm-up failure to be triggered cleanly.
*/
-import { describe, it, expect, vi, beforeAll, afterAll } from 'vitest';
+import { describe, it, expect, vi, beforeAll, afterAll, beforeEach } from 'vitest';
import fs from 'fs';
import type { GenerationResult } from '../scripts/types/article.js';
import type { MCPClientConfig } from '../scripts/types/mcp.js';