Skip to content

Commit 312def6

Browse files
committed
Fix off-by-one in query/cost
1 parent 66ecf89 commit 312def6

File tree

4 files changed

+135
-4
lines changed

4 files changed

+135
-4
lines changed

docs/changelog.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# Changelog
22

3+
## v0.74.5
4+
5+
### Fixed
6+
7+
- Fixed incorrect incorrect suggestions generated by [query/cost](checks/query/cost.md) check.
8+
39
## v0.74.4
410

511
### Fixed

internal/checks/query_cost.go

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package checks
33
import (
44
"context"
55
"fmt"
6+
"log/slog"
67
"math"
78
"net/url"
89
"strings"
@@ -85,6 +86,7 @@ func (c CostCheck) Check(ctx context.Context, entry discovery.Entry, entries []d
8586
return problems
8687
}
8788

89+
slog.Debug("Calculating cost of the raw query", slog.String("expr", expr.Value.Value))
8890
qr, series, err := c.getQueryCost(ctx, expr.Value.Value)
8991
if err != nil {
9092
problems = append(problems, problemFromError(err, entry.Rule, c.Reporter(), c.prom.Name(), Bug))
@@ -266,6 +268,7 @@ func (c CostCheck) suggestRecordingRules(
266268

267269
sq := c.rewriteRuleFragment(expr.Value.Value, op.PositionRange(), other.Rule.RecordingRule.Record.Value+extra)
268270
var details strings.Builder
271+
slog.Debug("Calculating cost of the new query", slog.String("expr", sq))
269272
qr, afterSeries, err := c.getQueryCost(ctx, sq)
270273
if err == nil {
271274
if qr.Stats.Samples.TotalQueryableSamples >= beforeStats.Samples.TotalQueryableSamples &&
@@ -343,7 +346,7 @@ func (c CostCheck) rewriteRuleFragment(expr string, fragment posrange.PositionRa
343346
buf.WriteString(expr[:int(fragment.Start)])
344347
}
345348
buf.WriteString(replacement)
346-
if int(fragment.End)+1 < len(expr) {
349+
if int(fragment.End) < len(expr) {
347350
buf.WriteString(expr[int(fragment.End):])
348351
}
349352
return buf.String()
@@ -354,18 +357,25 @@ func (c CostCheck) diffStatsInt(a, b int) string {
354357
if delta == 0 || math.IsNaN(delta) {
355358
return fmt.Sprintf("%d (no change)", a)
356359
}
357-
return fmt.Sprintf("%d instead of %d (%0.2f%%)", b, a, delta)
360+
return fmt.Sprintf("%d instead of %d (%s%%)", b, a, formatDelta(delta))
358361
}
359362

360363
func (c CostCheck) diffStatsDuration(a, b float64) string {
361364
delta := ((b - a) / a) * 100
362365
if delta == 0 || math.IsNaN(delta) {
363366
return output.HumanizeDuration(c.statToDuration(a)) + " (no change)"
364367
}
365-
return fmt.Sprintf("%s instead of %s (%0.2f%%)",
368+
return fmt.Sprintf("%s instead of %s (%s%%)",
366369
output.HumanizeDuration(c.statToDuration(b)),
367370
output.HumanizeDuration(c.statToDuration(a)),
368-
delta)
371+
formatDelta(delta))
372+
}
373+
374+
func formatDelta(delta float64) string {
375+
if delta <= 0 {
376+
return fmt.Sprintf("%0.2f", delta)
377+
}
378+
return fmt.Sprintf("+%0.2f", delta)
369379
}
370380

371381
func (c CostCheck) isSuggestionFor(src, potential utils.Source, join *utils.Join, unless *utils.Unless) (promParser.Node, string, bool, bool) {

internal/checks/query_cost_test.go

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1375,6 +1375,75 @@ func TestCostCheck(t *testing.T) {
13751375
},
13761376
},
13771377
},
1378+
{
1379+
description: "suggest recording rule / complex",
1380+
content: `- record: instance_job:fl2_hmd_request_phase_latency_30ms_good:rate5m
1381+
expr: sum without (le) (histogram_fraction(0, 0.03, rate(fl2_request_phase_duration_seconds[5m])) * histogram_count(rate(fl2_request_phase_duration_seconds[5m])))
1382+
`,
1383+
checker: func(prom *promapi.FailoverGroup) checks.RuleChecker {
1384+
return checks.NewCostCheck(prom, 100, 100, 0, 0, "check comment", checks.Warning)
1385+
},
1386+
prometheus: newSimpleProm,
1387+
entries: mustParseContent(`
1388+
1389+
- record: instance_job:fl2_hmd_request_phase_latency_count:rate5m
1390+
expr: histogram_count(rate(fl2_request_phase_duration_seconds[5m]))
1391+
`),
1392+
problems: true,
1393+
mocks: []*prometheusMock{
1394+
{
1395+
conds: []requestCondition{
1396+
requireQueryPath,
1397+
formCond{key: "query", value: "count(sum without (le) (histogram_fraction(0, 0.03, rate(fl2_request_phase_duration_seconds[5m])) * histogram_count(rate(fl2_request_phase_duration_seconds[5m]))))"},
1398+
},
1399+
resp: vectorResponse{
1400+
samples: []*model.Sample{
1401+
generateSample(map[string]string{}),
1402+
},
1403+
stats: promapi.QueryStats{
1404+
Samples: promapi.QuerySamples{
1405+
TotalQueryableSamples: 50,
1406+
PeakSamples: 50,
1407+
},
1408+
Timings: promapi.QueryTimings{
1409+
EvalTotalTime: 10,
1410+
},
1411+
},
1412+
},
1413+
},
1414+
{
1415+
conds: []requestCondition{
1416+
requireQueryPath,
1417+
formCond{key: "query", value: "count(sum without (le) (histogram_fraction(0, 0.03, rate(fl2_request_phase_duration_seconds[5m])) * instance_job:fl2_hmd_request_phase_latency_count:rate5m))"},
1418+
},
1419+
resp: vectorResponse{
1420+
samples: []*model.Sample{
1421+
generateSample(map[string]string{}),
1422+
},
1423+
stats: promapi.QueryStats{
1424+
Samples: promapi.QuerySamples{
1425+
TotalQueryableSamples: 30,
1426+
PeakSamples: 30,
1427+
},
1428+
Timings: promapi.QueryTimings{
1429+
EvalTotalTime: 11,
1430+
},
1431+
},
1432+
},
1433+
},
1434+
{
1435+
conds: []requestCondition{
1436+
requireQueryPath,
1437+
formCond{key: "query", value: checks.BytesPerSampleQuery},
1438+
},
1439+
resp: vectorResponse{
1440+
samples: []*model.Sample{
1441+
generateSampleWithValue(map[string]string{}, 2048),
1442+
},
1443+
},
1444+
},
1445+
},
1446+
},
13781447
}
13791448

13801449
runTests(t, testCases)

internal/checks/query_cost_test.snap

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,52 @@
442442

443443
---
444444

445+
[TestCostCheck/suggest_recording_rule_/_complex - 1]
446+
- description: suggest recording rule / complex
447+
content: |
448+
- record: instance_job:fl2_hmd_request_phase_latency_30ms_good:rate5m
449+
expr: sum without (le) (histogram_fraction(0, 0.03, rate(fl2_request_phase_duration_seconds[5m])) * histogram_count(rate(fl2_request_phase_duration_seconds[5m])))
450+
output: |
451+
2 | expr: sum without (le) (histogram_fraction(0, 0.03, rate(fl2_request_phase_duration_seconds[5m])) * histogram_count(rate(fl2_request_phase_duration_seconds[5m])))
452+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Use `instance_job:fl2_hmd_request_phase_latency_count:rate5m` here instead to speed up the query.
453+
problem:
454+
reporter: query/cost
455+
summary: query could use a recording rule
456+
details: |
457+
There is a recording rule that already stores the result of this query, use it here to speed up this query.
458+
459+
```yaml
460+
- record: instance_job:fl2_hmd_request_phase_latency_count:rate5m
461+
expr: histogram_count(rate(fl2_request_phase_duration_seconds[5m]))
462+
```
463+
464+
Using `instance_job:fl2_hmd_request_phase_latency_count:rate5m` rule would speed up this query:
465+
466+
- Total queried samples would be 30 instead of 50 (-40.00%)
467+
- Peak queried samples would be 30 instead of 50 (-40.00%)
468+
- Query evaluation time would be 11s instead of 10s (+10.00%)
469+
470+
To get results for both original and suggested query click below:
471+
472+
- [Original query](https://simple.example.com/graph?g0.expr=sum+without+%28le%29+%28histogram_fraction%280%2C+0.03%2C+rate%28fl2_request_phase_duration_seconds%5B5m%5D%29%29+%2A+histogram_count%28rate%28fl2_request_phase_duration_seconds%5B5m%5D%29%29%29&g0.tab=table)
473+
- [Suggested query](https://simple.example.com/graph?g0.expr=sum+without+%28le%29+%28histogram_fraction%280%2C+0.03%2C+rate%28fl2_request_phase_duration_seconds%5B5m%5D%29%29+%2A+instance_job%3Afl2_hmd_request_phase_latency_count%3Arate5m%29&g0.tab=table)
474+
diagnostics:
475+
- message: Use `instance_job:fl2_hmd_request_phase_latency_count:rate5m` here instead to speed up the query.
476+
pos:
477+
- line: 2
478+
firstcolumn: 9
479+
lastcolumn: 164
480+
firstcolumn: 95
481+
lastcolumn: 155
482+
kind: 0
483+
lines:
484+
first: 2
485+
last: 2
486+
severity: 0
487+
anchor: 0
488+
489+
---
490+
445491
[TestCostCheck/suggest_recording_rule_/_ignore_multi-source - 1]
446492
[]
447493

0 commit comments

Comments
 (0)