Skip to content

Commit 7f5d574

Browse files
committed
Fix flakereport
1 parent 88ea88c commit 7f5d574

7 files changed

Lines changed: 122 additions & 25 deletions

File tree

.github/workflows/run-tests.yml

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -354,12 +354,19 @@ jobs:
354354
flags: unit-test
355355
report_type: test_results
356356

357+
- name: Get job ID
358+
id: get_job_id
359+
uses: ./.github/actions/get-job-id
360+
with:
361+
job_name: Unit test
362+
run_id: ${{ github.run_id }}
363+
357364
- name: Upload test results to GitHub
358365
# Can't pin to major because the action linter doesn't recognize the include-hidden-files flag.
359366
uses: actions/upload-artifact@v4.4.3
360367
if: ${{ !cancelled() }}
361368
with:
362-
name: junit-xml--${{github.run_id}}--${{github.run_attempt}}--unit-test
369+
name: junit-xml--${{ github.run_id }}--${{ steps.get_job_id.outputs.job_id }}--${{ github.run_attempt }}--unit-test
363370
path: ./.testoutput/junit.*.xml
364371
include-hidden-files: true
365372
retention-days: 28
@@ -447,12 +454,19 @@ jobs:
447454
flags: integration-test
448455
report_type: test_results
449456

457+
- name: Get job ID
458+
id: get_job_id
459+
uses: ./.github/actions/get-job-id
460+
with:
461+
job_name: Integration test
462+
run_id: ${{ github.run_id }}
463+
450464
- name: Upload test results to GitHub
451465
# Can't pin to major because the action linter doesn't recognize the include-hidden-files flag.
452466
uses: actions/upload-artifact@v4.4.3
453467
if: ${{ !cancelled() }}
454468
with:
455-
name: junit-xml--${{github.run_id}}--${{github.run_attempt}}--integration-test
469+
name: junit-xml--${{ github.run_id }}--${{ steps.get_job_id.outputs.job_id }}--${{ github.run_attempt }}--integration-test
456470
path: ./.testoutput/junit.*.xml
457471
include-hidden-files: true
458472
retention-days: 28

tools/flakereport/flakereport.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,8 +259,9 @@ func runGenerateCommand(c *cli.Context) (err error) {
259259
// Count test runs by name for failure rate calculation
260260
testRunCounts := countTestRuns(allTestRuns)
261261

262-
// Group failures by test name
262+
// Group failures by test name, then remove parent entries whose subtests were observed.
263263
grouped := groupFailuresByTest(allFailures)
264+
filterParentTests(grouped, testRunCounts)
264265
fmt.Printf("Unique tests with failures: %d\n", len(grouped))
265266

266267
// Classify failures

tools/flakereport/github.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,8 +191,8 @@ func extractArtifactZip(zipPath, outputDir string) ([]string, error) {
191191
return xmlFiles, nil
192192
}
193193

194-
// parseArtifactName extracts run_id and job_id from artifact name
195-
// Format: {prefix}--{run_id}--{job_id}--{suffix}
194+
// parseArtifactName extracts run_id and job_id from artifact name.
195+
// Format: junit-xml--{run_id}--{job_id}--{run_attempt}--...--{test-type}
196196
// Returns: runID, jobID (or "unknown" if not parseable)
197197
func parseArtifactName(artifactName string) (runID string, jobID string) {
198198
parts := strings.Split(artifactName, "--")

tools/flakereport/parallel.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,8 @@ func processArtifactJob(ctx context.Context, job ArtifactJob, totalArtifacts int
135135
result.Failures = append(result.Failures, failures...)
136136

137137
// Extract all test runs for failure rate calculation
138-
testRuns := extractAllTestRuns(suites, job.RunID)
138+
_, jobID := parseArtifactName(job.Artifact.Name)
139+
testRuns := extractAllTestRuns(suites, job.RunID, jobID)
139140
result.AllRuns = append(result.AllRuns, testRuns...)
140141
}
141142

tools/flakereport/parser.go

Lines changed: 43 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ func extractFailures(suites *junit.Testsuites, artifactName string, runID int64,
9494

9595
// extractAllTestRuns extracts all test runs (including successes) from parsed JUnit data
9696
// Used for calculating failure rates
97-
func extractAllTestRuns(suites *junit.Testsuites, runID int64) []TestRun {
97+
func extractAllTestRuns(suites *junit.Testsuites, runID int64, jobID string) []TestRun {
9898
var runs []TestRun
9999

100100
for _, suite := range suites.Suites {
@@ -105,6 +105,7 @@ func extractAllTestRuns(suites *junit.Testsuites, runID int64) []TestRun {
105105
Failed: testcase.Failure != nil,
106106
Skipped: testcase.Skipped != nil,
107107
RunID: runID,
108+
JobID: jobID,
108109
}
109110
runs = append(runs, run)
110111
}
@@ -237,6 +238,25 @@ func convertToReports(grouped map[string][]TestFailure, testRunCounts map[string
237238
return reports
238239
}
239240

241+
// filterParentTests removes top-level test names from grouped when subtests of
242+
// that parent were observed in testRunCounts. A top-level failure whose subtests
243+
// ran in other CI jobs is already captured (with a correct denominator) in the
244+
// Flaky Suites section, so including it in the per-test table produces a
245+
// misleading 1/1 entry.
246+
func filterParentTests(grouped map[string][]TestFailure, testRunCounts map[string]int) {
247+
suitePrefix := make(map[string]bool, len(testRunCounts))
248+
for name := range testRunCounts {
249+
if idx := strings.IndexByte(name, '/'); idx >= 0 {
250+
suitePrefix[name[:idx]] = true
251+
}
252+
}
253+
for testName := range grouped {
254+
if !strings.Contains(testName, "/") && suitePrefix[testName] {
255+
delete(grouped, testName)
256+
}
257+
}
258+
}
259+
240260
// isFinalRetry returns true if the test name has the "(final)" suffix,
241261
// indicating the test runner exhausted all retries.
242262
func isFinalRetry(testName string) bool {
@@ -341,23 +361,36 @@ func identifyCIBreakers(failures []TestFailure) (map[string][]TestFailure, map[s
341361
return ciBreakers, ciBreakCount
342362
}
343363

364+
// jobKey returns a string that uniquely identifies a single job execution.
365+
// When a real JobID is available it is globally unique; otherwise we fall back
366+
// to the RunID so that the set still grows correctly across CI runs.
367+
func jobKey(runID int64, jobID string) string {
368+
if jobID != "" && jobID != "unknown" {
369+
return jobID
370+
}
371+
return fmt.Sprintf("%d", runID)
372+
}
373+
344374
// generateSuiteReports creates per-suite flake breakdown from all failures and test runs.
345-
// Suite flake rate = % of workflow runs where the suite had at least one non-retry failure.
375+
// Suite flake rate = % of job executions where the suite had at least one non-retry failure.
346376
func generateSuiteReports(allFailures []TestFailure, allTestRuns []TestRun) []SuiteReport {
347-
// Track unique workflow runs per suite (denominator)
348-
suiteRuns := make(map[string]map[int64]bool)
377+
// Track unique job executions per suite (denominator).
378+
// Each matrix shard / DB-config combination is a separate job execution even
379+
// though it shares the same workflow RunID, so we key by JobID (falling back
380+
// to RunID when JobID is unavailable).
381+
suiteRuns := make(map[string]map[string]bool)
349382
for _, run := range allTestRuns {
350383
if run.Skipped || !isGoTestSuite(run.SuiteName) {
351384
continue
352385
}
353386
if suiteRuns[run.SuiteName] == nil {
354-
suiteRuns[run.SuiteName] = make(map[int64]bool)
387+
suiteRuns[run.SuiteName] = make(map[string]bool)
355388
}
356-
suiteRuns[run.SuiteName][run.RunID] = true
389+
suiteRuns[run.SuiteName][jobKey(run.RunID, run.JobID)] = true
357390
}
358391

359-
// Track workflow runs with non-retry failures per suite (numerator)
360-
suiteFailedRuns := make(map[string]map[int64]bool)
392+
// Track job executions with non-retry failures per suite (numerator)
393+
suiteFailedRuns := make(map[string]map[string]bool)
361394
suiteLastFailure := make(map[string]time.Time)
362395
for _, failure := range allFailures {
363396
if !isGoTestSuite(failure.SuiteName) {
@@ -368,9 +401,9 @@ func generateSuiteReports(allFailures []TestFailure, allTestRuns []TestRun) []Su
368401
continue
369402
}
370403
if suiteFailedRuns[failure.SuiteName] == nil {
371-
suiteFailedRuns[failure.SuiteName] = make(map[int64]bool)
404+
suiteFailedRuns[failure.SuiteName] = make(map[string]bool)
372405
}
373-
suiteFailedRuns[failure.SuiteName][failure.RunID] = true
406+
suiteFailedRuns[failure.SuiteName][jobKey(failure.RunID, failure.JobID)] = true
374407
if failure.Timestamp.After(suiteLastFailure[failure.SuiteName]) {
375408
suiteLastFailure[failure.SuiteName] = failure.Timestamp
376409
}

tools/flakereport/parser_test.go

Lines changed: 53 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,27 @@ func TestParseArtifactName(t *testing.T) {
5252
expectedJobID string
5353
}{
5454
{
55-
name: "valid artifact name",
56-
artifactName: "test-results--12345678--87654321--junit",
57-
expectedRunID: "12345678",
58-
expectedJobID: "87654321",
55+
name: "functional test artifact",
56+
artifactName: "junit-xml--22373551837--64609560060--1--integration-0--Integration--functional-test",
57+
expectedRunID: "22373551837",
58+
expectedJobID: "64609560060",
59+
},
60+
{
61+
name: "unit test artifact",
62+
artifactName: "junit-xml--22373551837--64609560061--1--unit-test",
63+
expectedRunID: "22373551837",
64+
expectedJobID: "64609560061",
65+
},
66+
{
67+
name: "integration test artifact",
68+
artifactName: "junit-xml--22373551837--64609560062--1--integration-test",
69+
expectedRunID: "22373551837",
70+
expectedJobID: "64609560062",
5971
},
6072
{
6173
name: "artifact name with empty job id",
62-
artifactName: "test-results--12345678----junit",
63-
expectedRunID: "12345678",
74+
artifactName: "junit-xml--22373551837----1--unit-test",
75+
expectedRunID: "22373551837",
6476
expectedJobID: "unknown",
6577
},
6678
{
@@ -157,6 +169,41 @@ func TestClassifyFailures(t *testing.T) {
157169
assert.Len(t, flaky["TestNormal"], 2)
158170
}
159171

172+
func TestFilterParentTests(t *testing.T) {
173+
makeFailures := func(names ...string) map[string][]TestFailure {
174+
m := make(map[string][]TestFailure, len(names))
175+
for _, n := range names {
176+
m[n] = []TestFailure{{Name: n}}
177+
}
178+
return m
179+
}
180+
181+
t.Run("removes parent when subtests observed", func(t *testing.T) {
182+
grouped := makeFailures("TestFooSuite", "TestFooSuite/TestBar")
183+
counts := map[string]int{
184+
"TestFooSuite/TestBar": 10,
185+
"TestFooSuite/TestBaz": 20,
186+
}
187+
filterParentTests(grouped, counts)
188+
require.NotContains(t, grouped, "TestFooSuite")
189+
require.Contains(t, grouped, "TestFooSuite/TestBar")
190+
})
191+
192+
t.Run("keeps parent when no subtests observed", func(t *testing.T) {
193+
grouped := makeFailures("TestStandalone")
194+
counts := map[string]int{"TestStandalone": 5}
195+
filterParentTests(grouped, counts)
196+
require.Contains(t, grouped, "TestStandalone")
197+
})
198+
199+
t.Run("keeps subtest entry regardless", func(t *testing.T) {
200+
grouped := makeFailures("TestFooSuite/TestBar")
201+
counts := map[string]int{"TestFooSuite/TestBar": 10}
202+
filterParentTests(grouped, counts)
203+
require.Contains(t, grouped, "TestFooSuite/TestBar")
204+
})
205+
}
206+
160207
func TestGenerateSuiteReports(t *testing.T) {
161208
now := time.Now()
162209
twoDaysAgo := now.Add(-48 * time.Hour)

tools/flakereport/types.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ type TestRun struct {
2020
Failed bool // Whether the test failed
2121
Skipped bool // Whether the test was skipped
2222
RunID int64 // Workflow run ID
23+
JobID string // GitHub Actions job ID (unique per matrix job/shard)
2324
}
2425

2526
// TestReport represents aggregated failures for a single test
@@ -35,9 +36,9 @@ type TestReport struct {
3536
// SuiteReport represents aggregated flake data for a test suite
3637
type SuiteReport struct {
3738
SuiteName string // Test suite name from JUnit XML
38-
FlakeRate float64 // Percentage of runs with at least one non-retry failure
39-
FailedRuns int // Number of runs with at least one non-retry failure
40-
TotalRuns int // Total number of workflow runs where this suite appeared
39+
FlakeRate float64 // Percentage of job executions with at least one non-retry failure
40+
FailedRuns int // Number of job executions with at least one non-retry failure
41+
TotalRuns int // Total number of job executions where this suite appeared
4142
LastFailure time.Time // Timestamp of the most recent failure
4243
}
4344

0 commit comments

Comments
 (0)