Skip to content

Commit df9b9a9

Browse files
authored
Merge pull request #1018 from imjaroiswebdev/fix-ags-sudden-diff
feat: add retry mechanism and consistency checks for alert grouping settings
2 parents b246719 + 82fcd0f commit df9b9a9

File tree

1 file changed

+54
-11
lines changed

1 file changed

+54
-11
lines changed

pagerdutyplugin/resource_pagerduty_alert_grouping_setting.go

Lines changed: 54 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -245,26 +245,49 @@ func (r *resourceAlertGroupingSetting) Create(ctx context.Context, req resource.
245245

246246
func (r *resourceAlertGroupingSetting) Read(ctx context.Context, req resource.ReadRequest, resp *resource.ReadResponse) {
247247
var id types.String
248+
var expected resourceAlertGroupingSettingModel
248249

249-
resp.Diagnostics.Append(req.State.GetAttribute(ctx, path.Root("id"), &id)...)
250+
resp.Diagnostics.Append(req.State.Get(ctx, &expected)...) // Get expected state
251+
if resp.Diagnostics.HasError() {
252+
return
253+
}
254+
resp.Diagnostics.Append(req.State.GetAttribute(ctx, path.Root("id"), &id)...) // Get ID
250255
if resp.Diagnostics.HasError() {
251256
return
252257
}
253258
log.Printf("[INFO] Reading PagerDuty alert grouping setting %s", id)
254259

255-
state, err := requestGetAlertGroupingSetting(ctx, r.client, id.ValueString(), false)
256-
if err != nil {
257-
if util.IsNotFoundError(err) {
258-
resp.State.RemoveResource(ctx)
260+
const maxRetries = 6
261+
const retryInterval = 10 * time.Second
262+
var lastErr error
263+
264+
for i := 0; i < maxRetries; i++ {
265+
state, err := requestGetAlertGroupingSetting(ctx, r.client, id.ValueString(), false)
266+
if err != nil {
267+
if util.IsNotFoundError(err) {
268+
resp.State.RemoveResource(ctx)
269+
return
270+
}
271+
lastErr = err
272+
log.Printf("[WARN] Error reading alert grouping setting (attempt %d/%d): %v", i+1, maxRetries, err)
273+
time.Sleep(retryInterval)
274+
continue
275+
}
276+
277+
if isAlertGroupingConfigConsistent(ctx, &expected, &state) {
278+
resp.Diagnostics.Append(resp.State.Set(ctx, state)...) // Only update state if config is consistent
259279
return
260280
}
261-
resp.Diagnostics.AddError(
262-
fmt.Sprintf("Error reading PagerDuty alert grouping setting %s", id),
263-
err.Error(),
264-
)
265-
return
281+
log.Printf("[WARN] Inconsistent config from PagerDuty API for alert grouping setting %s (attempt %d/%d). Retrying...", id.ValueString(), i+1, maxRetries)
282+
time.Sleep(retryInterval)
283+
}
284+
285+
msg := "PagerDuty API returned inconsistent or incomplete alert grouping setting config after retries. Keeping last known good state. Manual intervention may be required."
286+
log.Printf("[ERROR] %s ID=%s", msg, id.ValueString())
287+
resp.Diagnostics.AddWarning("PagerDuty API inconsistency", msg)
288+
if lastErr != nil {
289+
resp.Diagnostics.AddError("Last error from PagerDuty API", lastErr.Error())
266290
}
267-
resp.Diagnostics.Append(resp.State.Set(ctx, state)...)
268291
}
269292

270293
func (r *resourceAlertGroupingSetting) Update(ctx context.Context, req resource.UpdateRequest, resp *resource.UpdateResponse) {
@@ -618,3 +641,23 @@ func (r *resourceAlertGroupingSetting) UsesTimeout(ctx context.Context, s Schema
618641
t := pagerduty.AlertGroupingSettingType(typeValue.ValueString())
619642
return t == pagerduty.AlertGroupingSettingTimeType
620643
}
644+
645+
// isAlertGroupingConfigConsistent compares the expected and actual alert grouping config.
646+
func isAlertGroupingConfigConsistent(ctx context.Context, expected, actual *resourceAlertGroupingSettingModel) bool {
647+
if expected == nil || actual == nil {
648+
return false
649+
}
650+
// Compare Type
651+
if !expected.Type.Equal(actual.Type) {
652+
return false
653+
}
654+
// Compare Config (object)
655+
if !expected.Config.Equal(actual.Config) {
656+
return false
657+
}
658+
// Compare Services
659+
if !expected.Services.Equal(actual.Services) {
660+
return false
661+
}
662+
return true
663+
}

0 commit comments

Comments
 (0)