diff --git a/src/RockBot.Agent/agent/contradiction-sweep.md b/src/RockBot.Agent/agent/contradiction-sweep.md new file mode 100644 index 0000000..efa102e --- /dev/null +++ b/src/RockBot.Agent/agent/contradiction-sweep.md @@ -0,0 +1,28 @@ +You are a memory contradiction reviewer. Inspect the listed claim/feedback memory +entries and identify pairs that contradict each other on the same subject — same +tool for capability claims, same rule subject for feedback memories. + +Rules for choosing the winner of a contradicting pair: +- If exactly one entry is marked (user-correction), it ALWAYS wins regardless of + recency. The other becomes the loser. +- Otherwise the more recent entry (later created date) wins. +- If you cannot decide unambiguously — for example, both entries make different but + not opposite claims — OMIT the pair. Do not guess. + +Be conservative. Phase 3 is intentionally narrow: this pass exists only to catch +contradictions the deterministic hot-path detector missed. False positives here +quietly evict valid memories, so when in doubt, skip. + +Return ONLY valid JSON in this shape and nothing else: + +{ + "pairs": [ + { + "winnerId": "", + "loserId": "", + "reason": "" + } + ] +} + +If you find no contradictions, return: {"pairs": []} diff --git a/src/RockBot.Host.Abstractions/ContradictionResolution.cs b/src/RockBot.Host.Abstractions/ContradictionResolution.cs new file mode 100644 index 0000000..0ae1052 --- /dev/null +++ b/src/RockBot.Host.Abstractions/ContradictionResolution.cs @@ -0,0 +1,48 @@ +namespace RockBot.Host; + +/// +/// Outcome of running the Phase 3 contradiction detector against an incoming +/// . Encodes which side of the contradiction wins: +/// the incoming entry (in which case +/// names the older entries to mark with ), +/// or an existing user-correction entry (in which case +/// is set so the caller saves the incoming +/// entry already marked as superseded). +/// +/// +/// Exactly one of or +/// carries content. Use +/// for "no contradiction detected". +/// +public sealed record ContradictionResolution +{ + /// "No contradiction detected" sentinel. + public static ContradictionResolution None { get; } = new(); + + private ContradictionResolution() { } + + /// + /// Older entries that the incoming entry contradicts and replaces. + /// Caller marks each one's with the incoming entry id. + /// + public IReadOnlyList ExistingIdsToSupersede { get; init; } = []; + + /// + /// Id of an existing user-correction entry that contradicts and supersedes the incoming + /// entry. When set, the caller persists the incoming entry with + /// equal to this id. + /// + public string? IncomingSupersededBy { get; init; } + + /// The incoming entry wins; caller marks the listed older entries as superseded. + public static ContradictionResolution NewerWins(IReadOnlyList existingIds) => + new() { ExistingIdsToSupersede = existingIds }; + + /// An existing user-correction wins; caller marks the incoming entry as superseded. + public static ContradictionResolution UserCorrectionWins(string existingId) => + new() { IncomingSupersededBy = existingId }; + + /// True when the resolution would change any state. + public bool HasContradiction => + ExistingIdsToSupersede.Count > 0 || IncomingSupersededBy is not null; +} diff --git a/src/RockBot.Host.Abstractions/DreamOptions.cs b/src/RockBot.Host.Abstractions/DreamOptions.cs index d2f7c25..b597bff 100644 --- a/src/RockBot.Host.Abstractions/DreamOptions.cs +++ b/src/RockBot.Host.Abstractions/DreamOptions.cs @@ -173,6 +173,19 @@ public sealed class DreamOptions /// public bool ObservationEnabled { get; set; } = true; + /// + /// Whether the Phase 3 self-repair contradiction sweep pass is enabled. + /// LLM-mediated backstop for claim/capability/* and feedback/* + /// contradictions the hot-path keyword detector missed. + /// + public bool ContradictionSweepEnabled { get; set; } = true; + + /// + /// Path to the contradiction sweep directive file, relative to . + /// When the file does not exist, a built-in fallback directive is used. + /// + public string ContradictionSweepDirectivePath { get; set; } = "contradiction-sweep.md"; + /// /// Days of no reinforcement (measured against ) /// before importance decay begins. Entries younger than this are left alone regardless diff --git a/src/RockBot.Host.Abstractions/FeedbackMemoryCategories.cs b/src/RockBot.Host.Abstractions/FeedbackMemoryCategories.cs new file mode 100644 index 0000000..f981cfc --- /dev/null +++ b/src/RockBot.Host.Abstractions/FeedbackMemoryCategories.cs @@ -0,0 +1,54 @@ +namespace RockBot.Host; + +/// +/// Well-known long-term memory category names for feedback-shaped entries (rules, +/// directives, user reversals). Phase 3 self-repair contradiction detection scopes +/// to entries whose category sits under . +/// +/// +/// User-tagged corrections (entries under or carrying +/// the tag) are treated as authoritative: they always win +/// over agent-self entries when a contradiction is detected, regardless of recency. +/// +public static class FeedbackMemoryCategories +{ + /// Category prefix for all feedback entries. + public const string Prefix = "feedback"; + + /// Category prefix for user-issued corrections (always-wins). + public const string UserCorrectionPrefix = "feedback/from-user"; + + /// Tag value that marks an entry as a user correction (always-wins). + public const string UserCorrectionTag = "correction"; + + /// + /// Returns true when the given category names a feedback memory. + /// Accepts null and returns false. + /// + public static bool IsFeedbackMemory(string? category) => + category is not null + && (category.Equals(Prefix, StringComparison.Ordinal) + || category.StartsWith(Prefix + "/", StringComparison.Ordinal)); + + /// + /// Returns true when the entry should be treated as a user correction — + /// either by category prefix or by the well-known tag. + /// + public static bool IsUserCorrection(MemoryEntry entry) + { + if (entry.Category is not null + && (entry.Category.Equals(UserCorrectionPrefix, StringComparison.Ordinal) + || entry.Category.StartsWith(UserCorrectionPrefix + "/", StringComparison.Ordinal))) + { + return true; + } + + foreach (var tag in entry.Tags) + { + if (string.Equals(tag, UserCorrectionTag, StringComparison.OrdinalIgnoreCase)) + return true; + } + + return false; + } +} diff --git a/src/RockBot.Host.Abstractions/IMemoryContradictionDetector.cs b/src/RockBot.Host.Abstractions/IMemoryContradictionDetector.cs new file mode 100644 index 0000000..2a3f71b --- /dev/null +++ b/src/RockBot.Host.Abstractions/IMemoryContradictionDetector.cs @@ -0,0 +1,22 @@ +namespace RockBot.Host; + +/// +/// Hot-path contradiction detector for Phase 3 self-repair. Resolves conflicting beliefs +/// at memory-write time, narrowly scoped to capability claims (claim/capability/*) +/// and feedback memories (feedback/*). Saves outside those categories return +/// without scanning. +/// +/// +/// Detection is keyword-based and deterministic; the LLM-mediated dream contradiction +/// sweep is the backstop for cases this hot path misses. User-tagged corrections always +/// win over agent-self entries regardless of recency (see ). +/// +public interface IMemoryContradictionDetector +{ + /// + /// Scans existing entries in the same narrow category as and + /// returns a describing which entries (if any) + /// should be marked as superseded. + /// + Task ResolveAsync(MemoryEntry incoming, CancellationToken cancellationToken = default); +} diff --git a/src/RockBot.Host.Abstractions/MemoryEntry.cs b/src/RockBot.Host.Abstractions/MemoryEntry.cs index e985e77..4781d63 100644 --- a/src/RockBot.Host.Abstractions/MemoryEntry.cs +++ b/src/RockBot.Host.Abstractions/MemoryEntry.cs @@ -44,4 +44,14 @@ public sealed record MemoryEntry( /// agent context builder evaluate this shape before injection. /// public VerifyShape? Verify { get; init; } + + /// + /// Id of the memory entry that contradicted and replaced this one. Set by the + /// Phase 3 contradiction detector (hot path on save) or the dream contradiction + /// sweep (LLM-mediated backstop). Superseded entries are excluded from + /// and from recall surfaces, but remain + /// retrievable by id via for audit. + /// Always null for live entries. + /// + public string? SupersededBy { get; init; } } diff --git a/src/RockBot.Host.Abstractions/MemorySearchCriteria.cs b/src/RockBot.Host.Abstractions/MemorySearchCriteria.cs index 9add1f0..17f85f7 100644 --- a/src/RockBot.Host.Abstractions/MemorySearchCriteria.cs +++ b/src/RockBot.Host.Abstractions/MemorySearchCriteria.cs @@ -25,6 +25,12 @@ namespace RockBot.Host; /// When is , controls case sensitivity of the regex. /// Default false mirrors Claude Code's Grep tool. Ignored in hybrid mode. /// +/// +/// When true, entries with set are included in +/// search results. Default false hides them from recall, mirroring Phase 3 self-repair +/// semantics. Used by audit tooling and the dream contradiction sweep that need to inspect +/// the full corpus. +/// public sealed record MemorySearchCriteria( string? Query = null, string? Category = null, @@ -34,4 +40,5 @@ public sealed record MemorySearchCriteria( int MaxResults = 20, float[]? QueryEmbedding = null, MemorySearchMode Mode = MemorySearchMode.Hybrid, - bool RegexCaseSensitive = false); + bool RegexCaseSensitive = false, + bool IncludeSuperseded = false); diff --git a/src/RockBot.Host/AgentMemoryExtensions.cs b/src/RockBot.Host/AgentMemoryExtensions.cs index 32d0f51..95494e7 100644 --- a/src/RockBot.Host/AgentMemoryExtensions.cs +++ b/src/RockBot.Host/AgentMemoryExtensions.cs @@ -60,6 +60,10 @@ public static AgentHostBuilder WithLongTermMemory( builder.Services.AddSingleton(); builder.Services.AddSingleton(); + // Phase 3 self-repair: hot-path contradiction detector. Narrowly scoped to + // claim/capability/* and feedback/* writes; other categories short-circuit. + builder.Services.AddSingleton(); + return builder; } diff --git a/src/RockBot.Host/CapabilityClaimWriter.cs b/src/RockBot.Host/CapabilityClaimWriter.cs index 00b1e38..1c239b0 100644 --- a/src/RockBot.Host/CapabilityClaimWriter.cs +++ b/src/RockBot.Host/CapabilityClaimWriter.cs @@ -1,6 +1,7 @@ using System.Globalization; using System.Security.Cryptography; using System.Text; +using Microsoft.Extensions.Logging; namespace RockBot.Host; @@ -12,13 +13,23 @@ namespace RockBot.Host; internal sealed class CapabilityClaimWriter : ICapabilityClaimWriter { private readonly ILongTermMemory _memory; + private readonly IMemoryContradictionDetector? _contradictionDetector; + private readonly ILogger? _logger; public CapabilityClaimWriter(ILongTermMemory memory) + : this(memory, contradictionDetector: null, logger: null) { } + + public CapabilityClaimWriter( + ILongTermMemory memory, + IMemoryContradictionDetector? contradictionDetector, + ILogger? logger) { _memory = memory ?? throw new ArgumentNullException(nameof(memory)); + _contradictionDetector = contradictionDetector; + _logger = logger; } - public Task SaveCapabilityClaimAsync(CapabilityClaim claim, CancellationToken cancellationToken = default) + public async Task SaveCapabilityClaimAsync(CapabilityClaim claim, CancellationToken cancellationToken = default) { ArgumentNullException.ThrowIfNull(claim); Validate(claim); @@ -34,7 +45,43 @@ public Task SaveCapabilityClaimAsync(CapabilityClaim claim, CancellationToken ca Verify = claim.Verify }; - return _memory.SaveAsync(entry, cancellationToken); + if (_contradictionDetector is not null) + { + var resolution = await _contradictionDetector.ResolveAsync(entry, cancellationToken); + if (resolution.IncomingSupersededBy is not null) + { + // An existing user-correction wins — the new claim lands on disk already marked + // as superseded so it is excluded from search/recall but preserved for audit. + entry = entry with { SupersededBy = resolution.IncomingSupersededBy }; + _logger?.LogInformation( + "CapabilityClaimWriter: incoming claim {Id} marked superseded by user-correction {ExistingId}", + entry.Id, resolution.IncomingSupersededBy); + } + else if (resolution.ExistingIdsToSupersede.Count > 0) + { + await ApplySupersessionAsync(resolution.ExistingIdsToSupersede, entry.Id, cancellationToken); + } + } + + await _memory.SaveAsync(entry, cancellationToken); + } + + private async Task ApplySupersessionAsync(IReadOnlyList ids, string winnerId, CancellationToken ct) + { + foreach (var id in ids) + { + var existing = await _memory.GetAsync(id, ct); + if (existing is null) continue; + if (existing.SupersededBy is not null) continue; + + await _memory.SaveAsync( + existing with { SupersededBy = winnerId, UpdatedAt = DateTimeOffset.UtcNow }, + ct); + + _logger?.LogInformation( + "CapabilityClaimWriter: marked {ExistingId} superseded by {WinnerId} ({Category})", + id, winnerId, existing.Category ?? "(none)"); + } } private static void Validate(CapabilityClaim claim) diff --git a/src/RockBot.Host/DreamService.cs b/src/RockBot.Host/DreamService.cs index c1bef84..4fb40e5 100644 --- a/src/RockBot.Host/DreamService.cs +++ b/src/RockBot.Host/DreamService.cs @@ -41,6 +41,7 @@ internal sealed class DreamService : IHostedService, IDisposable private readonly ILogger _logger; private readonly RockBot.Observation.IObservationPipelineCoordinator? _observationCoordinator; private readonly ConversationLogTranscriptAdapter? _observationTranscriptAdapter; + private readonly IMemoryContradictionDetector? _contradictionDetector; private Timer? _timer; private CronExpression? _cron; private string? _dreamDirective; @@ -58,6 +59,7 @@ internal sealed class DreamService : IHostedService, IDisposable private string? _identityDirective; private string? _wispFailureDirective; private string? _toolSuccessLearningDirective; + private string? _contradictionSweepDirective; public DreamService( ILongTermMemory memory, @@ -79,7 +81,8 @@ public DreamService( IWispExecutionLog? wispExecutionLog = null, IWorkingMemory? workingMemory = null, RockBot.Observation.IObservationPipelineCoordinator? observationCoordinator = null, - ConversationLogTranscriptAdapter? observationTranscriptAdapter = null) + ConversationLogTranscriptAdapter? observationTranscriptAdapter = null, + IMemoryContradictionDetector? contradictionDetector = null) { _memory = memory; _skillStore = skillStores.FirstOrDefault(); @@ -101,6 +104,7 @@ public DreamService( _logger = logger; _observationCoordinator = observationCoordinator; _observationTranscriptAdapter = observationTranscriptAdapter; + _contradictionDetector = contradictionDetector; } public Task StartAsync(CancellationToken cancellationToken) @@ -304,6 +308,19 @@ public Task StartAsync(CancellationToken cancellationToken) _logger.LogInformation("DreamService: loaded tool-success-learning directive from {Path}", path); } + if (_options.ContradictionSweepEnabled) + { + var path = ResolvePath(_options.ContradictionSweepDirectivePath, _profileOptions.BasePath); + _contradictionSweepDirective = File.Exists(path) + ? File.ReadAllText(path) + : null; // null → BuiltInContradictionSweepDirective used in RunContradictionSweepPassAsync + + if (!File.Exists(path)) + _logger.LogDebug("DreamService: contradiction sweep directive not found at {Path}; using built-in", path); + else + _logger.LogInformation("DreamService: loaded contradiction sweep directive from {Path}", path); + } + try { _cron = CronExpression.Parse(_options.CronSchedule, @@ -551,6 +568,8 @@ private async Task DreamAsync() ct.ThrowIfCancellationRequested(); await RunIdentityReflectionPassAsync(ct); + ct.ThrowIfCancellationRequested(); await RunContradictionSweepPassAsync(ct); + sw.Stop(); _logger.LogInformation( "DreamService: dream cycle complete — {Deleted} deleted, {Saved} saved, elapsed {Elapsed}", @@ -3056,6 +3075,140 @@ await RunPassAsync("identity reflection", async () => }); } + /// + /// Phase 3 self-repair contradiction sweep — LLM-mediated backstop for cases the + /// hot-path keyword detector missed. Narrowly scoped to claim/capability/* + /// and feedback/*; entries elsewhere are not loaded. Includes already-superseded + /// entries in the corpus so the LLM can reason about chains, but only marks live + /// entries as superseded. + /// + private async Task RunContradictionSweepPassAsync(CancellationToken ct) + { + if (!_options.ContradictionSweepEnabled) return; + + await RunPassAsync("contradiction sweep", async () => + { + var claims = await _memory.SearchAsync( + new MemorySearchCriteria( + Category: CapabilityClaimCategories.Prefix, + MaxResults: 500), + ct); + var feedback = await _memory.SearchAsync( + new MemorySearchCriteria( + Category: FeedbackMemoryCategories.Prefix, + MaxResults: 500), + ct); + + var corpus = claims.Concat(feedback) + .GroupBy(e => e.Id, StringComparer.OrdinalIgnoreCase) + .Select(g => g.First()) + .ToList(); + + if (corpus.Count < 2) + { + _logger.LogInformation( + "DreamService: contradiction sweep — only {Count} claim/feedback entry/entries; skipping", + corpus.Count); + return; + } + + var directive = _contradictionSweepDirective ?? BuiltInContradictionSweepDirective; + + var userMessage = new StringBuilder(); + userMessage.AppendLine($"Review {corpus.Count} claim/feedback memory entries for contradictions:"); + userMessage.AppendLine(); + + for (var i = 0; i < corpus.Count; i++) + { + var e = corpus[i]; + var tags = e.Tags.Count > 0 ? string.Join(", ", e.Tags) : "(none)"; + var marker = FeedbackMemoryCategories.IsUserCorrection(e) ? " (user-correction)" : string.Empty; + userMessage.AppendLine( + $"{i + 1}. [ID:{e.Id}] category={e.Category ?? "uncategorized"}{marker} " + + $"created={e.CreatedAt:yyyy-MM-dd} tags=[{tags}]"); + userMessage.AppendLine($" {e.Content}"); + } + + var result = await InvokeDreamPassAsync( + "contradiction sweep", + directive, + userMessage.ToString(), + ct); + if (result is null) return; + + var supersededCount = await ApplyContradictionSweepResultAsync( + _memory, corpus, result.Pairs, _logger, ct); + + _logger.LogInformation( + "DreamService: contradiction sweep complete — {Count} entry/entries marked superseded out of {Corpus} reviewed", + supersededCount, corpus.Count); + }); + } + + /// + /// Applies LLM-proposed contradiction pairs to the long-term memory store, enforcing + /// the user-correction protection invariant: a user correction may never be superseded + /// by a non-correction. Internal to enable direct unit testing without a real LLM call. + /// + internal static async Task ApplyContradictionSweepResultAsync( + ILongTermMemory memory, + IReadOnlyList corpus, + IReadOnlyList? pairs, + ILogger logger, + CancellationToken ct) + { + var byId = corpus.ToDictionary(e => e.Id, StringComparer.OrdinalIgnoreCase); + var supersededCount = 0; + + foreach (var pair in pairs ?? []) + { + if (string.IsNullOrWhiteSpace(pair.WinnerId) || string.IsNullOrWhiteSpace(pair.LoserId)) + continue; + if (string.Equals(pair.WinnerId, pair.LoserId, StringComparison.OrdinalIgnoreCase)) + continue; + if (!byId.TryGetValue(pair.WinnerId, out var winner)) continue; + if (!byId.TryGetValue(pair.LoserId, out var loser)) continue; + if (loser.SupersededBy is not null) continue; + + if (FeedbackMemoryCategories.IsUserCorrection(loser) + && !FeedbackMemoryCategories.IsUserCorrection(winner)) + { + logger.LogInformation( + "DreamService: contradiction sweep ignored — sweep tried to supersede user-correction {LoserId} with non-correction {WinnerId}", + pair.LoserId, pair.WinnerId); + continue; + } + + await memory.SaveAsync( + loser with { SupersededBy = winner.Id, UpdatedAt = DateTimeOffset.UtcNow }, + ct); + supersededCount++; + logger.LogInformation( + "DreamService: contradiction sweep marked {LoserId} superseded by {WinnerId} (reason: {Reason})", + pair.LoserId, pair.WinnerId, pair.Reason ?? "(none)"); + } + + return supersededCount; + } + + internal sealed record ContradictionSweepResultDto(IReadOnlyList? Pairs); + internal sealed record ContradictionPairDto(string? WinnerId, string? LoserId, string? Reason); + + private const string BuiltInContradictionSweepDirective = """ + You are a memory contradiction reviewer. Inspect the listed claim/feedback memory entries + and identify pairs that contradict each other on the same subject (same tool, same rule). + + Rules for choosing the winner of a contradicting pair: + - If exactly one entry is marked (user-correction), it ALWAYS wins. + - Otherwise the more recent entry (later created date) wins. + - If you cannot decide unambiguously, omit the pair. + + Return ONLY valid JSON in this shape and nothing else: + { "pairs": [ { "winnerId": "...", "loserId": "...", "reason": "..." } ] } + + If you find no contradictions, return: { "pairs": [] } + """; + /// /// Wraps a single dream-pass body so unhandled exceptions become a per-pass /// error log without aborting the whole cycle. diff --git a/src/RockBot.Host/FileMemoryStore.cs b/src/RockBot.Host/FileMemoryStore.cs index 1fc4a58..f20cc53 100644 --- a/src/RockBot.Host/FileMemoryStore.cs +++ b/src/RockBot.Host/FileMemoryStore.cs @@ -293,6 +293,12 @@ internal static string BuildRegexSurface(MemoryEntry entry) private static bool PassesStructuralFilters(MemoryEntry entry, MemorySearchCriteria criteria) { + // Phase 3 self-repair: entries marked as superseded by a contradicting save are + // hidden from search/recall by default but remain on disk for audit. + // Direct GetAsync still returns them; supersession traversal needs the by-id path. + if (entry.SupersededBy is not null && !criteria.IncludeSuperseded) + return false; + if (criteria.Category is not null) { if (entry.Category is null) return false; diff --git a/src/RockBot.Host/MemoryContradictionDetector.cs b/src/RockBot.Host/MemoryContradictionDetector.cs new file mode 100644 index 0000000..4fea043 --- /dev/null +++ b/src/RockBot.Host/MemoryContradictionDetector.cs @@ -0,0 +1,265 @@ +using System.Text.RegularExpressions; +using Microsoft.Extensions.Logging; + +namespace RockBot.Host; + +/// +/// Default . Hot-path keyword detector for the +/// two narrow shapes Phase 3 covers: capability-claim valence inversion and feedback-rule +/// directive inversion. Outside claim/capability/* and feedback/* the +/// detector short-circuits to so general +/// memory writes pay zero detection cost. +/// +internal sealed partial class MemoryContradictionDetector : IMemoryContradictionDetector +{ + /// + /// Negation markers used to detect opposite valence on capability-claim statements. + /// Matches a known intent ("cannot", "blocked", etc.) regardless of placement. + /// + [GeneratedRegex( + @"\b(cannot|can't|cant|does not|doesn't|isn't|is not|are not|aren't|won't|will not|" + + @"never|no longer|not supported|not exposed|does not expose|wrapper limitation|blocked|fails|broken|" + + @"unable to|cannot pass|can not)\b", + RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, + matchTimeoutMilliseconds: 200)] + private static partial Regex NegationMarkerPattern(); + + /// Affirmative-directive markers (feedback path). + [GeneratedRegex( + @"\b(always|prefer|use|do|please)\b", + RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, + matchTimeoutMilliseconds: 200)] + private static partial Regex AffirmativeDirectivePattern(); + + /// Negative-directive markers (feedback path). + [GeneratedRegex( + @"\b(never|avoid|stop|don't|do not|dont|skip|disable)\b", + RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, + matchTimeoutMilliseconds: 200)] + private static partial Regex NegativeDirectivePattern(); + + private static readonly HashSet Stopwords = new(StringComparer.OrdinalIgnoreCase) + { + "the", "a", "an", "and", "or", "but", "if", "then", "to", "of", "for", + "in", "on", "at", "by", "with", "is", "are", "was", "were", "be", "been", + "being", "do", "does", "did", "have", "has", "had", "i", "you", "we", "they", + "it", "this", "that", "these", "those", "as", "from", "into", "about", + "always", "never", "prefer", "avoid", "stop", "don't", "dont", "do", + "cannot", "can't", "cant", "not", "no", "use", "using", "skip", "disable", + "please", "should", "would", "could", "may", "might", "will", "shall" + }; + + private const float MinFeedbackOverlap = 0.3f; + + private readonly ILongTermMemory _memory; + private readonly ILogger _logger; + + public MemoryContradictionDetector( + ILongTermMemory memory, + ILogger logger) + { + _memory = memory ?? throw new ArgumentNullException(nameof(memory)); + _logger = logger; + } + + public async Task ResolveAsync( + MemoryEntry incoming, CancellationToken cancellationToken = default) + { + ArgumentNullException.ThrowIfNull(incoming); + + if (CapabilityClaimCategories.IsCapabilityClaim(incoming.Category)) + return await ResolveCapabilityClaimAsync(incoming, cancellationToken); + + if (FeedbackMemoryCategories.IsFeedbackMemory(incoming.Category)) + return await ResolveFeedbackAsync(incoming, cancellationToken); + + return ContradictionResolution.None; + } + + // ── Capability-claim path ───────────────────────────────────────────────── + + private async Task ResolveCapabilityClaimAsync( + MemoryEntry incoming, CancellationToken ct) + { + // Same (server, tool) is the join key. The capability-claim writer always builds + // the category as claim/capability/{server}/{tool}, so we match on the full prefix. + var category = incoming.Category!; + var existing = await _memory.SearchAsync( + new MemorySearchCriteria(Category: category, MaxResults: 200), + ct); + + var incomingHasNegation = NegationMarkerPattern().IsMatch(incoming.Content); + var candidates = new List(); + foreach (var e in existing) + { + if (string.Equals(e.Id, incoming.Id, StringComparison.OrdinalIgnoreCase)) + continue; + if (e.SupersededBy is not null) + continue; + var existingHasNegation = NegationMarkerPattern().IsMatch(e.Content); + if (existingHasNegation == incomingHasNegation) + continue; + candidates.Add(e); + } + + if (candidates.Count == 0) + return ContradictionResolution.None; + + // User-correction wins regardless of recency. + var correction = candidates.FirstOrDefault(FeedbackMemoryCategories.IsUserCorrection); + if (correction is not null) + { + _logger.LogInformation( + "ContradictionDetector: incoming claim {IncomingId} superseded by user correction {ExistingId} ({Category})", + incoming.Id, correction.Id, incoming.Category); + return ContradictionResolution.UserCorrectionWins(correction.Id); + } + + var loserIds = candidates.Select(c => c.Id).ToList(); + _logger.LogInformation( + "ContradictionDetector: capability-claim {IncomingId} supersedes {Count} older entry/entries in {Category}: {Ids}", + incoming.Id, loserIds.Count, incoming.Category, string.Join(", ", loserIds)); + return ContradictionResolution.NewerWins(loserIds); + } + + // ── Feedback path ───────────────────────────────────────────────────────── + + private async Task ResolveFeedbackAsync( + MemoryEntry incoming, CancellationToken ct) + { + // Match within the feedback subtree. We use the incoming entry's category (which + // already starts with "feedback/...") so a directive in feedback/style only matches + // other feedback/style entries — the design's "same rule subject" approximation. + var existing = await _memory.SearchAsync( + new MemorySearchCriteria( + Category: FeedbackMemoryCategories.Prefix, + MaxResults: 500), + ct); + + var incomingValence = ClassifyDirective(incoming.Content); + if (incomingValence == DirectiveValence.Ambiguous) + return ContradictionResolution.None; + + var incomingTokens = TokenizeNonStopwords(incoming.Content); + if (incomingTokens.Count < 2) + return ContradictionResolution.None; + + var contradicted = new List(); + foreach (var e in existing) + { + if (string.Equals(e.Id, incoming.Id, StringComparison.OrdinalIgnoreCase)) + continue; + if (e.SupersededBy is not null) + continue; + // Same subtree only — different categories under feedback/* are different rule subjects. + if (!CategoriesShareSubtree(incoming.Category, e.Category)) + continue; + + var existingValence = ClassifyDirective(e.Content); + if (existingValence == DirectiveValence.Ambiguous || existingValence == incomingValence) + continue; + + var existingTokens = TokenizeNonStopwords(e.Content); + if (existingTokens.Count < 2) + continue; + + if (JaccardOverlap(incomingTokens, existingTokens) < MinFeedbackOverlap) + continue; + + contradicted.Add(e); + } + + if (contradicted.Count == 0) + return ContradictionResolution.None; + + var correction = contradicted.FirstOrDefault(FeedbackMemoryCategories.IsUserCorrection); + if (correction is not null) + { + _logger.LogInformation( + "ContradictionDetector: incoming feedback {IncomingId} superseded by user correction {ExistingId}", + incoming.Id, correction.Id); + return ContradictionResolution.UserCorrectionWins(correction.Id); + } + + // Multiple candidates here are not ambiguous: every entry in `contradicted` already + // shares the inverse valence of the incoming entry (filtered above) and clears the + // category + lexical-overlap gates. The original "skip when count>1" rule was overly + // defensive and produced false-negatives in real workloads where the same affirmative + // rule had been written multiple times before a single negative reversal. + var ids = contradicted.Select(c => c.Id).ToList(); + _logger.LogInformation( + "ContradictionDetector: feedback {IncomingId} supersedes {Count} older entry/entries: {Ids}", + incoming.Id, ids.Count, string.Join(", ", ids)); + return ContradictionResolution.NewerWins(ids); + } + + // ── Helpers ─────────────────────────────────────────────────────────────── + + private enum DirectiveValence { Affirmative, Negative, Ambiguous } + + private static DirectiveValence ClassifyDirective(string content) + { + // Negation dominates: "never use X" is a negative directive, even though "use" + // also matches the affirmative pattern (the action verb the directive operates on). + // Affirmative is only chosen when no negative marker is present. + if (NegativeDirectivePattern().IsMatch(content)) + return DirectiveValence.Negative; + if (AffirmativeDirectivePattern().IsMatch(content)) + return DirectiveValence.Affirmative; + return DirectiveValence.Ambiguous; + } + + private static bool CategoriesShareSubtree(string? a, string? b) + { + if (a is null || b is null) return false; + if (string.Equals(a, b, StringComparison.Ordinal)) return true; + + // Allow feedback/from-user/style and feedback/from-agent/style to share the rule subject "style". + var aLeaf = LeafSegment(a); + var bLeaf = LeafSegment(b); + return aLeaf.Length > 0 && string.Equals(aLeaf, bLeaf, StringComparison.OrdinalIgnoreCase); + + static string LeafSegment(string category) + { + var slash = category.LastIndexOf('/'); + return slash < 0 ? category : category[(slash + 1)..]; + } + } + + private static HashSet TokenizeNonStopwords(string content) + { + var tokens = new HashSet(StringComparer.OrdinalIgnoreCase); + foreach (Match m in TokenPattern().Matches(content)) + { + var token = m.Value; + if (token.Length < 3) continue; + if (Stopwords.Contains(token)) continue; + tokens.Add(StripPluralS(token)); + } + return tokens; + } + + /// + /// Strips a trailing 's' from tokens of length 4+ so that singular/plural pairs + /// (report/reports, list/lists) collapse to the same key. Naive but adequate for the + /// rule-subject overlap heuristic — false-merges (e.g. "process" → "proces") don't + /// hurt because the comparison is symmetric, and over-merging only increases recall. + /// + private static string StripPluralS(string token) => + token.Length >= 4 && (token[^1] == 's' || token[^1] == 'S') + ? token[..^1] + : token; + + private static float JaccardOverlap(HashSet a, HashSet b) + { + if (a.Count == 0 || b.Count == 0) return 0f; + var intersection = 0; + foreach (var t in a) + if (b.Contains(t)) intersection++; + var union = a.Count + b.Count - intersection; + return union == 0 ? 0f : (float)intersection / union; + } + + [GeneratedRegex(@"[A-Za-z][A-Za-z0-9_'-]*", RegexOptions.CultureInvariant, matchTimeoutMilliseconds: 200)] + private static partial Regex TokenPattern(); +} diff --git a/src/RockBot.Memory/MemoryTools.cs b/src/RockBot.Memory/MemoryTools.cs index eed196f..80e2be9 100644 --- a/src/RockBot.Memory/MemoryTools.cs +++ b/src/RockBot.Memory/MemoryTools.cs @@ -62,6 +62,7 @@ when the thing the fact is about actually happened private readonly ILongTermMemory _memory; private readonly ILlmClient _llmClient; + private readonly IMemoryContradictionDetector? _contradictionDetector; private readonly ILogger _logger; private readonly IList _tools; private readonly string _extractionSystemPrompt; @@ -70,10 +71,12 @@ public MemoryTools( ILongTermMemory memory, ILlmClient llmClient, IOptions profileOptions, - ILogger logger) + ILogger logger, + IMemoryContradictionDetector? contradictionDetector = null) { _memory = memory; _llmClient = llmClient; + _contradictionDetector = contradictionDetector; _logger = logger; // Load shared memory rules and prepend to the extraction prompt @@ -111,7 +114,10 @@ private static string ResolvePath(string path, string basePath) [Description("Save an important fact, user preference, or learned pattern to long-term memory. " + "Returns immediately — the actual enrichment happens in the background. " + "The system will automatically expand the content into focused, keyword-rich entries. " + - "Pass a natural-language description; no pre-structuring needed.")] + "Pass a natural-language description; no pre-structuring needed. " + + "Reserved categories — 'feedback/...' (feedback rules and corrections) and " + + "'claim/capability/...' (capability claims) — bypass expansion and persist " + + "the content verbatim under the supplied category for the contradiction detector.")] public Task SaveMemory( [Description("The content to remember — can be a natural-language sentence or a compound fact")] string content, [Description("Optional category hint (e.g. 'user-preferences/pets')")] string? category = null, @@ -124,11 +130,53 @@ public Task SaveMemory( // (write proceeds; the appended observation tag rides through to the persisted entry). var (augmentedTags, hint) = ApplyObservationSoftGate(content, tags); + // Phase 3: scoped categories (feedback/* and claim/capability/*) are contracts, + // not hints. The default LLM extraction pass freely rewrites the category, which + // would defeat the contradiction detector's narrow-scope rule. Save scoped writes + // direct so the category and content survive verbatim. + if (IsScopedCategory(category)) + { + _ = Task.Run(() => SaveScopedDirectAsync(content, category!, augmentedTags)); + return Task.FromResult($"Memory save queued.{hint}"); + } + _ = Task.Run(() => SaveMemoryBackgroundAsync(content, category, augmentedTags)); return Task.FromResult($"Memory save queued.{hint}"); } + private static bool IsScopedCategory(string? category) => + FeedbackMemoryCategories.IsFeedbackMemory(category) + || CapabilityClaimCategories.IsCapabilityClaim(category); + + /// + /// Direct save path for scoped categories — bypasses LLM extraction so the caller's + /// category and tags are preserved exactly. The contradiction detector still runs. + /// + private async Task SaveScopedDirectAsync(string content, string category, string? tags) + { + try + { + var entry = new MemoryEntry( + Id: Guid.NewGuid().ToString("N")[..12], + Content: content, + Category: category, + Tags: ParseTagsList(tags) ?? [], + CreatedAt: DateTimeOffset.UtcNow); + + var resolved = await ApplyContradictionResolutionAsync(entry); + await _memory.SaveAsync(resolved); + + _logger.LogInformation( + "MemoryTools: scoped direct save {Id} ({Category}, superseded={Superseded}): {Content}", + resolved.Id, resolved.Category, resolved.SupersededBy ?? "no", resolved.Content); + } + catch (Exception ex) + { + _logger.LogError(ex, "MemoryTools: scoped direct save failed for content: {Content}", content); + } + } + private static (string? Tags, string Hint) ApplyObservationSoftGate(string content, string? tags) { var existing = ParseTagsList(tags); @@ -331,9 +379,10 @@ private async Task SaveMemoryBackgroundAsync(string content, string? category, s foreach (var entry in entries) { - await _memory.SaveAsync(entry); + var resolved = await ApplyContradictionResolutionAsync(entry); + await _memory.SaveAsync(resolved); _logger.LogInformation("Background save: {Id} ({Category}): {Content}", - entry.Id, entry.Category ?? "(none)", entry.Content); + resolved.Id, resolved.Category ?? "(none)", resolved.Content); } _logger.LogInformation("Background SaveMemory complete: {Count} new entries saved for content '{Content}'", @@ -345,6 +394,45 @@ private async Task SaveMemoryBackgroundAsync(string content, string? category, s } } + /// + /// Phase 3 self-repair: when the incoming entry sits under feedback/*, ask the + /// contradiction detector to resolve any conflicts with existing entries. Saves outside + /// that subtree skip the detector entirely, satisfying the design's narrow-scope rule + /// (no impact on saves outside claim/capability/* and feedback/*). + /// + private async Task ApplyContradictionResolutionAsync(MemoryEntry entry) + { + if (_contradictionDetector is null) return entry; + if (!FeedbackMemoryCategories.IsFeedbackMemory(entry.Category)) return entry; + + var resolution = await _contradictionDetector.ResolveAsync(entry, CancellationToken.None); + if (!resolution.HasContradiction) return entry; + + if (resolution.IncomingSupersededBy is not null) + { + _logger.LogInformation( + "MemoryTools: incoming feedback {Id} marked superseded by user-correction {ExistingId}", + entry.Id, resolution.IncomingSupersededBy); + return entry with { SupersededBy = resolution.IncomingSupersededBy }; + } + + foreach (var loserId in resolution.ExistingIdsToSupersede) + { + var existing = await _memory.GetAsync(loserId); + if (existing is null || existing.SupersededBy is not null) continue; + await _memory.SaveAsync(existing with + { + SupersededBy = entry.Id, + UpdatedAt = DateTimeOffset.UtcNow + }); + _logger.LogInformation( + "MemoryTools: marked {ExistingId} superseded by {WinnerId}", + loserId, entry.Id); + } + + return entry; + } + /// /// Calls the LLM to expand into one or more focused, /// keyword-rich records. Falls back to a single direct diff --git a/tests/RockBot.Agent.Tests/MemoryToolsTests.cs b/tests/RockBot.Agent.Tests/MemoryToolsTests.cs index a59cebe..4f1cab5 100644 --- a/tests/RockBot.Agent.Tests/MemoryToolsTests.cs +++ b/tests/RockBot.Agent.Tests/MemoryToolsTests.cs @@ -324,6 +324,148 @@ public async Task SaveMemory_BenignContent_ReturnsBareQueuedMessage() Assert.IsFalse(result.Contains("capability claim"), "Benign content must not produce a soft-gate hint."); } + + // ------------------------------------------------------------------------- + // SaveMemory — Phase 3 scoped-category direct save + // ------------------------------------------------------------------------- + + [TestMethod] + public async Task SaveMemory_FeedbackCategory_BypassesLlmExtraction_AndPreservesCategoryVerbatim() + { + var memory = new StubLongTermMemory(); + var llm = new StubChatClient(); + var tools = MakeToolsWith(memory, llm); + + await tools.SaveMemory( + "Always include a TL;DR section at the top of status reports", + category: "feedback/from-agent/status-reports", + tags: "style,reports"); + + var saved = await WaitForSavedAsync(memory, expected: 1); + Assert.AreEqual(0, llm.CallCount, + "Scoped feedback saves must bypass the LLM extraction pass entirely."); + Assert.AreEqual("feedback/from-agent/status-reports", saved[0].Category, + "Caller-supplied scoped category must be preserved verbatim."); + Assert.AreEqual( + "Always include a TL;DR section at the top of status reports", + saved[0].Content, + "Scoped saves must persist content verbatim, no LLM rewriting."); + CollectionAssert.AreEquivalent(new[] { "style", "reports" }, saved[0].Tags.ToArray()); + } + + [TestMethod] + public async Task SaveMemory_CapabilityClaimCategory_BypassesLlmExtraction() + { + var memory = new StubLongTermMemory(); + var llm = new StubChatClient(); + var tools = MakeToolsWith(memory, llm); + + await tools.SaveMemory( + "wrapper cannot pass arguments", + category: "claim/capability/calendar-mcp/get_calendar_events"); + + var saved = await WaitForSavedAsync(memory, expected: 1); + Assert.AreEqual(0, llm.CallCount); + Assert.AreEqual("claim/capability/calendar-mcp/get_calendar_events", saved[0].Category); + } + + [TestMethod] + public async Task SaveMemory_FeedbackCategory_OppositeDirective_SupersedesEarlierEntry() + { + var memory = new StubLongTermMemory(); + // Test the wiring contract: a detector returning NewerWins drives the supersession path. + // The real keyword detector behaviour is covered by MemoryContradictionDetectorTests. + var detector = new FakeContradictionDetector(memory); + var tools = MakeToolsWith(memory, new StubChatClient(), detector); + + await tools.SaveMemory( + "Always include a TL;DR section at the top of status reports", + category: "feedback/from-agent/status-reports"); + await WaitForSavedAsync(memory, expected: 1); + + await tools.SaveMemory( + "Never include a TL;DR section in status reports — they should be concise without one", + category: "feedback/from-agent/status-reports"); + await WaitForSavedAsync(memory, expected: 2); + + // Both entries land on disk; the older one carries SupersededBy after the + // contradiction detector runs. + var entries = memory.SnapshotAll(); + Assert.AreEqual(2, entries.Count); + var loser = entries.Single(e => e.Content.Contains("Always")); + var winner = entries.Single(e => e.Content.Contains("Never")); + Assert.AreEqual(winner.Id, loser.SupersededBy, + "Older entry in the same scoped category should be marked superseded."); + Assert.IsNull(winner.SupersededBy); + } + + /// + /// Test detector that supersedes any prior live entry sharing the incoming entry's + /// category. Validates the MemoryTools wiring without coupling to the real keyword + /// detector's heuristics. + /// + private sealed class FakeContradictionDetector : IMemoryContradictionDetector + { + private readonly StubLongTermMemory _memory; + public FakeContradictionDetector(StubLongTermMemory memory) => _memory = memory; + + public Task ResolveAsync(MemoryEntry incoming, CancellationToken cancellationToken = default) + { + var existing = _memory.SnapshotAll() + .Where(e => e.Id != incoming.Id + && e.SupersededBy is null + && string.Equals(e.Category, incoming.Category, StringComparison.OrdinalIgnoreCase)) + .Select(e => e.Id) + .ToList(); + return Task.FromResult(existing.Count > 0 + ? ContradictionResolution.NewerWins(existing) + : ContradictionResolution.None); + } + } + + [TestMethod] + public async Task SaveMemory_NonScopedCategory_StillUsesLlmExtractionPath() + { + var memory = new StubLongTermMemory(); + var llm = new StubChatClient(); + var tools = MakeToolsWith(memory, llm); + + await tools.SaveMemory( + "Loves dogs and lives in Minneapolis", + category: "user-preferences/pets"); + + // Wait for the LLM call (regression: the existing extractor path must remain wired up). + for (var i = 0; i < 50 && llm.CallCount == 0; i++) + await Task.Delay(20); + + Assert.AreEqual(1, llm.CallCount, + "Non-scoped categories should still go through the LLM extraction pass."); + } + + private static MemoryTools MakeToolsWith( + StubLongTermMemory memory, + StubChatClient llm, + IMemoryContradictionDetector? detector = null) => + new( + memory, + llm, + Microsoft.Extensions.Options.Options.Create(new AgentProfileOptions()), + NullLogger.Instance, + detector); + + private static async Task> WaitForSavedAsync( + StubLongTermMemory memory, int expected, int timeoutMs = 2000) + { + var deadline = DateTime.UtcNow.AddMilliseconds(timeoutMs); + while (DateTime.UtcNow < deadline) + { + var snapshot = memory.SnapshotAll(); + if (snapshot.Count >= expected) return snapshot; + await Task.Delay(20); + } + Assert.Fail($"Timed out waiting for {expected} saved entry/entries; got {memory.SnapshotAll().Count}."); + return []; + } } // --------------------------------------------------------------------------- @@ -344,10 +486,19 @@ internal sealed class StubLongTermMemory : ILongTermMemory public void Add(MemoryEntry entry) => _entries.Add(entry); + public IReadOnlyList SnapshotAll() + { + lock (_entries) + return _entries.ToList(); + } + public Task SaveAsync(MemoryEntry entry, CancellationToken cancellationToken = default) { - _entries.RemoveAll(e => e.Id == entry.Id); - _entries.Add(entry); + lock (_entries) + { + _entries.RemoveAll(e => e.Id == entry.Id); + _entries.Add(entry); + } return Task.CompletedTask; } @@ -382,13 +533,19 @@ public Task> ListCategoriesAsync(CancellationToken cancell /// internal sealed class StubChatClient : ILlmClient { + private int _callCount; + public bool IsIdle => true; + public int CallCount => Volatile.Read(ref _callCount); public Task GetResponseAsync( IEnumerable messages, ChatOptions? options = null, - CancellationToken cancellationToken = default) => - Task.FromResult(new ChatResponse(new ChatMessage(ChatRole.Assistant, "[]"))); + CancellationToken cancellationToken = default) + { + Interlocked.Increment(ref _callCount); + return Task.FromResult(new ChatResponse(new ChatMessage(ChatRole.Assistant, "[]"))); + } public Task GetResponseAsync( IEnumerable messages, diff --git a/tests/RockBot.Host.Tests/DreamServiceContradictionSweepTests.cs b/tests/RockBot.Host.Tests/DreamServiceContradictionSweepTests.cs new file mode 100644 index 0000000..d34c274 --- /dev/null +++ b/tests/RockBot.Host.Tests/DreamServiceContradictionSweepTests.cs @@ -0,0 +1,158 @@ +using Microsoft.Extensions.Logging.Abstractions; +using Microsoft.Extensions.Options; + +namespace RockBot.Host.Tests; + +/// +/// Phase 3 self-repair: dream-pass backstop tests. Exercise +/// directly with a +/// real so the supersession marker round-trips +/// through disk, then verify acceptance criterion 2 — existing user-correction +/// memories displace conflicting agent-self memories on the next sweep. +/// +[TestClass] +public class DreamServiceContradictionSweepTests +{ + private string _tempDir = null!; + + [TestInitialize] + public void Init() + { + _tempDir = Path.Combine(Path.GetTempPath(), "rockbot-sweep-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(_tempDir); + } + + [TestCleanup] + public void Cleanup() + { + if (Directory.Exists(_tempDir)) + Directory.Delete(_tempDir, recursive: true); + } + + [TestMethod] + public async Task Acceptance2_DreamSweep_UserCorrectionDisplacesAgentSelf() + { + var ltm = NewLtm(); + + var userCorrection = new MemoryEntry( + Id: "user-correction-1", + Content: "wrapper does pass arguments to get_calendar_events", + Category: "claim/capability/calendar-mcp/get_calendar_events", + Tags: ["correction", "capability-claim"], + CreatedAt: new DateTimeOffset(2026, 5, 1, 0, 0, 0, TimeSpan.Zero)); + + var agentSelf = new MemoryEntry( + Id: "agent-self-1", + Content: "wrapper cannot pass arguments to get_calendar_events", + Category: "claim/capability/calendar-mcp/get_calendar_events", + Tags: ["capability-claim"], + CreatedAt: new DateTimeOffset(2026, 5, 8, 0, 0, 0, TimeSpan.Zero)); + + await ltm.SaveAsync(userCorrection); + await ltm.SaveAsync(agentSelf); + + // LLM in the sweep would emit this pair: user-correction wins. + var corpus = new[] { userCorrection, agentSelf }; + var pairs = new[] + { + new DreamService.ContradictionPairDto( + WinnerId: "user-correction-1", + LoserId: "agent-self-1", + Reason: "user correction"), + }; + + var count = await DreamService.ApplyContradictionSweepResultAsync( + ltm, corpus, pairs, NullLogger.Instance, CancellationToken.None); + + Assert.AreEqual(1, count); + + var live = await ltm.SearchAsync(new MemorySearchCriteria( + Category: CapabilityClaimCategories.Prefix, MaxResults: 50)); + Assert.AreEqual(1, live.Count, "Only the user correction should remain live after the sweep."); + Assert.AreEqual("user-correction-1", live[0].Id); + + var loser = await ltm.GetAsync("agent-self-1"); + Assert.IsNotNull(loser); + Assert.AreEqual("user-correction-1", loser!.SupersededBy); + } + + [TestMethod] + public async Task Sweep_ProtectsUserCorrection_FromBeingSupersededByNonCorrection() + { + var ltm = NewLtm(); + + var userCorrection = new MemoryEntry( + Id: "user-correction-1", + Content: "Always use bullet points", + Category: "feedback/from-user/style", + Tags: ["correction"], + CreatedAt: DateTimeOffset.UtcNow); + + var agentSelf = new MemoryEntry( + Id: "agent-self-1", + Content: "Never use bullet points", + Category: "feedback/from-agent/style", + Tags: [], + CreatedAt: DateTimeOffset.UtcNow); + + await ltm.SaveAsync(userCorrection); + await ltm.SaveAsync(agentSelf); + + // LLM erroneously proposes superseding the user correction with the agent-self entry. + var corpus = new[] { userCorrection, agentSelf }; + var pairs = new[] + { + new DreamService.ContradictionPairDto( + WinnerId: "agent-self-1", + LoserId: "user-correction-1", + Reason: "more recent"), + }; + + var count = await DreamService.ApplyContradictionSweepResultAsync( + ltm, corpus, pairs, NullLogger.Instance, CancellationToken.None); + + Assert.AreEqual(0, count, "Sweep must refuse to supersede a user correction with a non-correction."); + + var fetched = await ltm.GetAsync("user-correction-1"); + Assert.IsNotNull(fetched); + Assert.IsNull(fetched!.SupersededBy); + } + + [TestMethod] + public async Task Sweep_IgnoresPairs_WithUnknownIdsOrSelfReferences() + { + var ltm = NewLtm(); + var entry = new MemoryEntry( + Id: "real-1", + Content: "Always X", + Category: "feedback/style", + Tags: [], + CreatedAt: DateTimeOffset.UtcNow); + await ltm.SaveAsync(entry); + + var pairs = new[] + { + new DreamService.ContradictionPairDto("ghost-1", "real-1", "no winner"), + new DreamService.ContradictionPairDto("real-1", "ghost-2", "no loser"), + new DreamService.ContradictionPairDto("real-1", "real-1", "self"), + new DreamService.ContradictionPairDto(null, "real-1", "missing"), + new DreamService.ContradictionPairDto("real-1", "", "empty"), + }; + + var count = await DreamService.ApplyContradictionSweepResultAsync( + ltm, [entry], pairs, NullLogger.Instance, CancellationToken.None); + + Assert.AreEqual(0, count); + + var fetched = await ltm.GetAsync("real-1"); + Assert.IsNull(fetched!.SupersededBy); + } + + private FileMemoryStore NewLtm() + { + var memOpts = Options.Create(new MemoryOptions { BasePath = Path.Combine(_tempDir, "ltm") }); + var profOpts = Options.Create(new AgentProfileOptions { BasePath = _tempDir }); + var embedOpts = Options.Create(new EmbeddingOptions()); + return new FileMemoryStore(memOpts, profOpts, embedOpts, NullLogger.Instance); + } +} diff --git a/tests/RockBot.Host.Tests/FileMemoryStoreSupersededByTests.cs b/tests/RockBot.Host.Tests/FileMemoryStoreSupersededByTests.cs new file mode 100644 index 0000000..0abf1d6 --- /dev/null +++ b/tests/RockBot.Host.Tests/FileMemoryStoreSupersededByTests.cs @@ -0,0 +1,116 @@ +using Microsoft.Extensions.Logging.Abstractions; +using Microsoft.Extensions.Options; + +namespace RockBot.Host.Tests; + +/// +/// Phase 3 self-repair: confirms that entries +/// are hidden from by default but remain +/// retrievable by for audit and supersession +/// traversal. Round-trip serialisation is also verified. +/// +[TestClass] +public class FileMemoryStoreSupersededByTests +{ + private string _tempDir = null!; + + [TestInitialize] + public void Init() + { + _tempDir = Path.Combine(Path.GetTempPath(), "rockbot-superseded-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(_tempDir); + } + + [TestCleanup] + public void Cleanup() + { + if (Directory.Exists(_tempDir)) + Directory.Delete(_tempDir, recursive: true); + } + + [TestMethod] + public async Task SearchAsync_HidesSupersededEntriesByDefault() + { + var store = NewStore(); + var winner = NewEntry("w-1", "wrapper does pass arguments", "claim/capability/calendar-mcp/get_calendar_events"); + var loser = NewEntry("l-1", "wrapper cannot pass arguments", "claim/capability/calendar-mcp/get_calendar_events") + with + { SupersededBy = "w-1" }; + + await store.SaveAsync(winner); + await store.SaveAsync(loser); + + var results = await store.SearchAsync(new MemorySearchCriteria( + Category: "claim/capability", MaxResults: 50)); + + Assert.AreEqual(1, results.Count, "Superseded entry should not appear in SearchAsync results."); + Assert.AreEqual("w-1", results[0].Id); + } + + [TestMethod] + public async Task SearchAsync_IncludeSuperseded_ReturnsAllEntries() + { + var store = NewStore(); + await store.SaveAsync(NewEntry("w-1", "wrapper does pass arguments", "claim/capability/x/y")); + await store.SaveAsync(NewEntry("l-1", "wrapper cannot pass arguments", "claim/capability/x/y") + with + { SupersededBy = "w-1" }); + + var results = await store.SearchAsync(new MemorySearchCriteria( + Category: "claim/capability", MaxResults: 50, IncludeSuperseded: true)); + + Assert.AreEqual(2, results.Count); + } + + [TestMethod] + public async Task GetAsync_ReturnsSupersededEntryByIdForAudit() + { + var store = NewStore(); + var loser = NewEntry("l-1", "wrapper cannot pass arguments", "claim/capability/x/y") + with + { SupersededBy = "w-1" }; + await store.SaveAsync(loser); + + var fetched = await store.GetAsync("l-1"); + + Assert.IsNotNull(fetched); + Assert.AreEqual("w-1", fetched!.SupersededBy); + } + + [TestMethod] + public async Task SaveAsync_RoundTripsSupersededByThroughDisk() + { + var saved = NewEntry("l-1", "old claim", "claim/capability/x/y") with { SupersededBy = "winner" }; + + // First store writes the entry; second store reads from disk via index. + var basePath = Path.Combine(_tempDir, "ltm"); + Directory.CreateDirectory(basePath); + + var store1 = NewStore(basePath); + await store1.SaveAsync(saved); + + var store2 = NewStore(basePath); + var loaded = await store2.GetAsync("l-1"); + + Assert.IsNotNull(loaded); + Assert.AreEqual("winner", loaded!.SupersededBy, + "SupersededBy must round-trip through JSON serialisation."); + } + + private FileMemoryStore NewStore(string? basePath = null) + { + var ltmPath = basePath ?? Path.Combine(_tempDir, Guid.NewGuid().ToString("N")); + var memOpts = Options.Create(new MemoryOptions { BasePath = ltmPath }); + var profOpts = Options.Create(new AgentProfileOptions { BasePath = _tempDir }); + var embedOpts = Options.Create(new EmbeddingOptions()); + return new FileMemoryStore(memOpts, profOpts, embedOpts, NullLogger.Instance); + } + + private static MemoryEntry NewEntry(string id, string content, string category) => + new( + Id: id, + Content: content, + Category: category, + Tags: [], + CreatedAt: DateTimeOffset.UtcNow); +} diff --git a/tests/RockBot.Host.Tests/MemoryContradictionDetectorTests.cs b/tests/RockBot.Host.Tests/MemoryContradictionDetectorTests.cs new file mode 100644 index 0000000..19c3be8 --- /dev/null +++ b/tests/RockBot.Host.Tests/MemoryContradictionDetectorTests.cs @@ -0,0 +1,363 @@ +using Microsoft.Extensions.Logging.Abstractions; + +namespace RockBot.Host.Tests; + +/// +/// Unit tests for the Phase 3 hot-path contradiction detector. +/// Covers narrow scope (capability claims and feedback only), valence inversion, +/// user-correction always-wins protection, and ambiguity-skip behaviour. +/// +[TestClass] +public class MemoryContradictionDetectorTests +{ + [TestMethod] + public async Task ResolveAsync_OutsideScopedCategories_ReturnsNoneWithoutScanningStore() + { + var memory = new RecordingMemory(); // SearchAsync throws if called + var detector = NewDetector(memory); + + var entry = NewEntry("user-preferences/pets", "Loves dogs", id: "x-1"); + + var resolution = await detector.ResolveAsync(entry); + + Assert.IsFalse(resolution.HasContradiction); + Assert.AreEqual(0, memory.SearchCallCount, + "Narrow scope: detector must not even query the store for non-claim, non-feedback writes."); + } + + [TestMethod] + public async Task ResolveAsync_CapabilityClaim_OppositeValence_NewerWins() + { + var memory = new RecordingMemory(); + var oldClaim = NewEntry( + category: "claim/capability/calendar-mcp/get_calendar_events", + content: "wrapper cannot pass arguments to get_calendar_events", + id: "old-1"); + memory.Existing.Add(oldClaim); + + var detector = NewDetector(memory); + var incoming = NewEntry( + category: "claim/capability/calendar-mcp/get_calendar_events", + content: "wrapper does pass arguments to get_calendar_events; verified by call", + id: "new-1"); + + var resolution = await detector.ResolveAsync(incoming); + + Assert.IsTrue(resolution.HasContradiction); + Assert.IsNull(resolution.IncomingSupersededBy); + CollectionAssert.AreEqual(new[] { "old-1" }, resolution.ExistingIdsToSupersede.ToArray()); + } + + [TestMethod] + public async Task ResolveAsync_CapabilityClaim_SameValence_NoContradiction() + { + var memory = new RecordingMemory(); + memory.Existing.Add(NewEntry( + category: "claim/capability/calendar-mcp/get_calendar_events", + content: "wrapper cannot pass arguments to get_calendar_events", + id: "old-1")); + + var detector = NewDetector(memory); + var incoming = NewEntry( + category: "claim/capability/calendar-mcp/get_calendar_events", + content: "wrapper cannot enumerate accounts either", + id: "new-1"); + + var resolution = await detector.ResolveAsync(incoming); + + Assert.IsFalse(resolution.HasContradiction); + } + + [TestMethod] + public async Task ResolveAsync_CapabilityClaim_DifferentTool_NoContradiction() + { + var memory = new RecordingMemory(); + memory.Existing.Add(NewEntry( + category: "claim/capability/calendar-mcp/list_accounts", + content: "list_accounts cannot return more than 10 entries", + id: "old-1")); + + var detector = NewDetector(memory); + var incoming = NewEntry( + category: "claim/capability/calendar-mcp/get_calendar_events", + content: "get_calendar_events does support timeZone", + id: "new-1"); + + var resolution = await detector.ResolveAsync(incoming); + + Assert.IsFalse(resolution.HasContradiction, + "Different (server, tool) pairs are different rule subjects."); + } + + [TestMethod] + public async Task ResolveAsync_CapabilityClaim_UserCorrectionExists_IncomingMarkedSuperseded() + { + var memory = new RecordingMemory(); + memory.Existing.Add(NewEntry( + category: "claim/capability/calendar-mcp/get_calendar_events", + content: "wrapper does pass arguments — verified manually", + id: "user-correction-1", + tags: ["correction"])); + + var detector = NewDetector(memory); + var incoming = NewEntry( + category: "claim/capability/calendar-mcp/get_calendar_events", + content: "wrapper cannot pass arguments to get_calendar_events", + id: "agent-self-1"); + + var resolution = await detector.ResolveAsync(incoming); + + Assert.IsTrue(resolution.HasContradiction); + Assert.AreEqual("user-correction-1", resolution.IncomingSupersededBy, + "User-correction should win even when the incoming claim was saved later."); + Assert.AreEqual(0, resolution.ExistingIdsToSupersede.Count); + } + + [TestMethod] + public async Task ResolveAsync_CapabilityClaim_SkipsAlreadySupersededExistingEntries() + { + var memory = new RecordingMemory(); + memory.Existing.Add(NewEntry( + category: "claim/capability/calendar-mcp/get_calendar_events", + content: "wrapper cannot pass arguments", + id: "old-1") with { SupersededBy = "even-older-winner" }); + + var detector = NewDetector(memory); + var incoming = NewEntry( + category: "claim/capability/calendar-mcp/get_calendar_events", + content: "wrapper does pass arguments", + id: "new-1"); + + var resolution = await detector.ResolveAsync(incoming); + + Assert.IsFalse(resolution.HasContradiction, + "Already-superseded entries should not appear as contradiction candidates."); + } + + [TestMethod] + public async Task ResolveAsync_Feedback_RealisticPhrasing_PluralAndTrailingClause_StillMatches() + { + // Mirrors the real prompt that previously slipped past the threshold: + // "report" vs "reports" plus a trailing clause introducing extra tokens. + // After plural-s stemming and the 0.3 Jaccard threshold, this must contradict. + var memory = new RecordingMemory(); + memory.Existing.Add(NewEntry( + category: "feedback/from-agent/status-reports", + content: "Always include a TL;DR section at the top of every status report I send", + id: "old-1")); + + var detector = NewDetector(memory); + var incoming = NewEntry( + category: "feedback/from-agent/status-reports", + content: "Never include a TL;DR section in status reports — they should be concise without one", + id: "new-1"); + + var resolution = await detector.ResolveAsync(incoming); + + Assert.IsTrue(resolution.HasContradiction, + "Singular/plural mismatch and trailing clauses must not break realistic supersession."); + CollectionAssert.AreEqual(new[] { "old-1" }, resolution.ExistingIdsToSupersede.ToArray()); + } + + [TestMethod] + public async Task ResolveAsync_Feedback_OppositeDirective_NewerWins() + { + var memory = new RecordingMemory(); + memory.Existing.Add(NewEntry( + category: "feedback/from-agent/style", + content: "Always use bullet points for status reports", + id: "old-1")); + + var detector = NewDetector(memory); + var incoming = NewEntry( + category: "feedback/from-agent/style", + content: "Never use bullet points for status reports", + id: "new-1"); + + var resolution = await detector.ResolveAsync(incoming); + + Assert.IsTrue(resolution.HasContradiction); + CollectionAssert.AreEqual(new[] { "old-1" }, resolution.ExistingIdsToSupersede.ToArray()); + } + + [TestMethod] + public async Task ResolveAsync_Feedback_UserCorrectionWins_OverAgentSelf() + { + var memory = new RecordingMemory(); + memory.Existing.Add(NewEntry( + category: "feedback/from-user/style", + content: "Always use bullet points for status reports", + id: "user-correction-1")); + + var detector = NewDetector(memory); + var incoming = NewEntry( + category: "feedback/from-agent/style", + content: "Never use bullet points for status reports", + id: "agent-self-1"); + + var resolution = await detector.ResolveAsync(incoming); + + Assert.IsTrue(resolution.HasContradiction); + Assert.AreEqual("user-correction-1", resolution.IncomingSupersededBy); + } + + [TestMethod] + public async Task ResolveAsync_Feedback_MultipleSameValenceMatches_AllSupersededByNewer() + { + // Every candidate in `contradicted` already shares the inverse valence of the + // incoming entry (the loop filters that), so multi-match is not ambiguous — + // it just means the same rule was written more than once before a reversal. + // The detector should supersede them all. + var memory = new RecordingMemory(); + memory.Existing.Add(NewEntry( + category: "feedback/from-agent/style", + content: "Always use bullet points for status reports", + id: "old-1")); + memory.Existing.Add(NewEntry( + category: "feedback/from-agent/style", + content: "Always use bullet points for status reports as default", + id: "old-2")); + + var detector = NewDetector(memory); + var incoming = NewEntry( + category: "feedback/from-agent/style", + content: "Never use bullet points for status reports", + id: "new-1"); + + var resolution = await detector.ResolveAsync(incoming); + + Assert.IsTrue(resolution.HasContradiction); + CollectionAssert.AreEquivalent( + new[] { "old-1", "old-2" }, + resolution.ExistingIdsToSupersede.ToArray()); + } + + [TestMethod] + public async Task ResolveAsync_Feedback_UserCorrectionAmongMultipleCandidates_StillWins() + { + // The user-correction protection still trumps everything: if any candidate is a + // user correction, the incoming agent-self save is the one marked superseded, + // even when other candidates would otherwise be supersed-able. + var memory = new RecordingMemory(); + memory.Existing.Add(NewEntry( + category: "feedback/from-agent/style", + content: "Always use bullet points for status reports", + id: "old-1")); + memory.Existing.Add(NewEntry( + category: "feedback/from-user/style", + content: "Always use bullet points for status reports — user said so", + id: "user-correction-1", + tags: ["correction"])); + + var detector = NewDetector(memory); + var incoming = NewEntry( + category: "feedback/from-agent/style", + content: "Never use bullet points for status reports", + id: "new-1"); + + var resolution = await detector.ResolveAsync(incoming); + + Assert.IsTrue(resolution.HasContradiction); + Assert.AreEqual("user-correction-1", resolution.IncomingSupersededBy, + "User-correction protection trumps the supersede-all rule."); + Assert.AreEqual(0, resolution.ExistingIdsToSupersede.Count); + } + + [TestMethod] + public async Task ResolveAsync_Feedback_DifferentSubject_NoContradiction() + { + var memory = new RecordingMemory(); + memory.Existing.Add(NewEntry( + category: "feedback/from-agent/style", + content: "Always use bullet points for status reports", + id: "old-1")); + + var detector = NewDetector(memory); + var incoming = NewEntry( + category: "feedback/from-agent/scheduling", + content: "Never schedule meetings on Friday afternoons", + id: "new-1"); + + var resolution = await detector.ResolveAsync(incoming); + + Assert.IsFalse(resolution.HasContradiction, + "Different rule subjects (style vs scheduling) are not a contradiction."); + } + + [TestMethod] + public async Task ResolveAsync_Feedback_AmbiguousValence_Skipped() + { + var memory = new RecordingMemory(); + memory.Existing.Add(NewEntry( + category: "feedback/from-agent/style", + content: "Always use bullet points for status reports", + id: "old-1")); + + var detector = NewDetector(memory); + var incoming = NewEntry( + category: "feedback/from-agent/style", + content: "Bullet points format for status reports", // no clear directive + id: "new-1"); + + var resolution = await detector.ResolveAsync(incoming); + + Assert.IsFalse(resolution.HasContradiction); + } + + // --- helpers -------------------------------------------------------------- + + private static MemoryContradictionDetector NewDetector(ILongTermMemory memory) => + new(memory, NullLogger.Instance); + + private static MemoryEntry NewEntry( + string category, + string content, + string id, + IReadOnlyList? tags = null) => + new( + Id: id, + Content: content, + Category: category, + Tags: tags ?? [], + CreatedAt: DateTimeOffset.UtcNow); + + private sealed class RecordingMemory : ILongTermMemory + { + public List Existing { get; } = new(); + public int SearchCallCount { get; private set; } + + public Task SaveAsync(MemoryEntry entry, CancellationToken cancellationToken = default) => Task.CompletedTask; + + public Task> SearchAsync(MemorySearchCriteria criteria, CancellationToken cancellationToken = default) + { + SearchCallCount++; + // Simulate the file store: filter by category prefix, hide superseded by default. + IEnumerable q = Existing; + if (criteria.Category is not null) + { + var cat = criteria.Category; + q = q.Where(e => e.Category is not null + && (string.Equals(e.Category, cat, StringComparison.OrdinalIgnoreCase) + || e.Category.StartsWith(cat + "/", StringComparison.OrdinalIgnoreCase))); + } + if (!criteria.IncludeSuperseded) + q = q.Where(e => e.SupersededBy is null); + return Task.FromResult>(q.Take(criteria.MaxResults).ToList()); + } + + public Task GetAsync(string id, CancellationToken cancellationToken = default) => + Task.FromResult(Existing.FirstOrDefault(e => string.Equals(e.Id, id, StringComparison.OrdinalIgnoreCase))); + + public Task DeleteAsync(string id, CancellationToken cancellationToken = default) + { + Existing.RemoveAll(e => e.Id == id); + return Task.CompletedTask; + } + + public Task> ListTagsAsync(CancellationToken cancellationToken = default) => + Task.FromResult>([]); + + public Task> ListCategoriesAsync(CancellationToken cancellationToken = default) => + Task.FromResult>([]); + } +} diff --git a/tests/RockBot.Host.Tests/Phase3ContradictionEndToEndTests.cs b/tests/RockBot.Host.Tests/Phase3ContradictionEndToEndTests.cs new file mode 100644 index 0000000..3b8fe3a --- /dev/null +++ b/tests/RockBot.Host.Tests/Phase3ContradictionEndToEndTests.cs @@ -0,0 +1,174 @@ +using System.Text.Json; +using Microsoft.Extensions.Logging.Abstractions; +using Microsoft.Extensions.Options; + +namespace RockBot.Host.Tests; + +/// +/// Phase 3 self-repair acceptance tests against the real +/// and . Mirrors the acceptance criteria on +/// GitHub issue #347: +/// +/// Saving "wrapper does pass arguments" supersedes the older "wrapper cannot pass arguments" claim. +/// Existing user-correction memories displace conflicting agent-self memories. +/// Saves outside claim/capability/* and feedback/* are unaffected. +/// +/// +[TestClass] +public class Phase3ContradictionEndToEndTests +{ + private string _tempDir = null!; + + [TestInitialize] + public void Init() + { + _tempDir = Path.Combine(Path.GetTempPath(), "rockbot-phase3-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(_tempDir); + } + + [TestCleanup] + public void Cleanup() + { + if (Directory.Exists(_tempDir)) + Directory.Delete(_tempDir, recursive: true); + } + + [TestMethod] + public async Task Acceptance1_NewerCapabilityClaim_SupersedesOlderOppositeClaim() + { + var ltm = NewLtm(); + var detector = new MemoryContradictionDetector(ltm, NullLogger.Instance); + var writer = new CapabilityClaimWriter(ltm, detector, NullLogger.Instance); + + // 1. Save the older (negative) claim. + await writer.SaveCapabilityClaimAsync(new CapabilityClaim( + Server: "calendar-mcp", + Tool: "get_calendar_events", + Statement: "wrapper cannot pass arguments to get_calendar_events", + Verify: NewVerify(), + Evidence: ["recovery exhausted"], + CreatedAt: new DateTimeOffset(2026, 5, 1, 0, 0, 0, TimeSpan.Zero))); + + // 2. Save the contradicting (positive) claim later. + await writer.SaveCapabilityClaimAsync(new CapabilityClaim( + Server: "calendar-mcp", + Tool: "get_calendar_events", + Statement: "wrapper does pass arguments to get_calendar_events; verified", + Verify: NewVerify(), + Evidence: ["recovery succeeded"], + CreatedAt: new DateTimeOffset(2026, 5, 8, 0, 0, 0, TimeSpan.Zero))); + + // 3. Search returns only the newer claim — the older one is hidden as superseded. + var live = await ltm.SearchAsync(new MemorySearchCriteria( + Category: CapabilityClaimCategories.Prefix, MaxResults: 50)); + Assert.AreEqual(1, live.Count, "Only the newer claim should be live after supersession."); + StringAssert.Contains(live[0].Content, "does pass arguments"); + + // 4. Including superseded entries shows both, with the older marked. + var all = await ltm.SearchAsync(new MemorySearchCriteria( + Category: CapabilityClaimCategories.Prefix, MaxResults: 50, IncludeSuperseded: true)); + Assert.AreEqual(2, all.Count); + var loser = all.Single(e => e.Content.Contains("cannot pass")); + Assert.IsNotNull(loser.SupersededBy, "Older claim must carry SupersededBy pointing at the winner."); + var winner = all.Single(e => e.Content.Contains("does pass")); + Assert.AreEqual(winner.Id, loser.SupersededBy); + } + + [TestMethod] + public async Task Acceptance2_UserCorrectionWinsOverAgentSelf() + { + var ltm = NewLtm(); + var detector = new MemoryContradictionDetector(ltm, NullLogger.Instance); + + // 1. Pre-existing user correction (saved directly with the correction tag). + await ltm.SaveAsync(new MemoryEntry( + Id: "user-correction-1", + Content: "wrapper does pass arguments to get_calendar_events", + Category: "claim/capability/calendar-mcp/get_calendar_events", + Tags: ["correction", "capability-claim"], + CreatedAt: new DateTimeOffset(2026, 5, 1, 0, 0, 0, TimeSpan.Zero))); + + // 2. Agent-self writes a contradicting claim through the writer. + var writer = new CapabilityClaimWriter(ltm, detector, NullLogger.Instance); + await writer.SaveCapabilityClaimAsync(new CapabilityClaim( + Server: "calendar-mcp", + Tool: "get_calendar_events", + Statement: "wrapper cannot pass arguments to get_calendar_events", + Verify: NewVerify(), + Evidence: ["agent observation"], + CreatedAt: new DateTimeOffset(2026, 5, 8, 0, 0, 0, TimeSpan.Zero))); + + // 3. Live search shows only the user correction; agent-self claim is hidden. + var live = await ltm.SearchAsync(new MemorySearchCriteria( + Category: CapabilityClaimCategories.Prefix, MaxResults: 50)); + Assert.AreEqual(1, live.Count); + Assert.AreEqual("user-correction-1", live[0].Id); + + // 4. The agent-self claim is on disk but marked superseded by the user correction. + var all = await ltm.SearchAsync(new MemorySearchCriteria( + Category: CapabilityClaimCategories.Prefix, MaxResults: 50, IncludeSuperseded: true)); + Assert.AreEqual(2, all.Count); + var agentSelf = all.Single(e => e.Id != "user-correction-1"); + Assert.AreEqual("user-correction-1", agentSelf.SupersededBy); + } + + [TestMethod] + public async Task Acceptance3_SavesOutsideScopedCategories_DoNotInvokeDetector() + { + var ltm = NewLtm(); + var spy = new ThrowingDetector(); + + // The capability-claim writer is wired with the spy; saving claims would throw. + // For this test we exercise direct LTM saves with non-scoped categories — those + // should never reach the detector regardless of caller. We verify the detector + // contract directly: ResolveAsync on a non-scoped entry must short-circuit. + var realDetector = new MemoryContradictionDetector(ltm, NullLogger.Instance); + + var entry = new MemoryEntry( + Id: "x-1", + Content: "Loves dogs and lives in Minneapolis", + Category: "user-preferences/pets", + Tags: [], + CreatedAt: DateTimeOffset.UtcNow); + + // Pre-seed an entry that *would* contradict if the detector wasn't narrow. + await ltm.SaveAsync(new MemoryEntry( + Id: "old-1", + Content: "Does not like dogs", + Category: "user-preferences/pets", + Tags: [], + CreatedAt: DateTimeOffset.UtcNow)); + + var resolution = await realDetector.ResolveAsync(entry); + + Assert.IsFalse(resolution.HasContradiction, + "Acceptance criterion 3: detector must not affect saves outside claim/capability/* and feedback/*."); + + // Sanity: spy throws if invoked, but we never invoke it for a non-scoped entry. + Assert.AreEqual(0, spy.Calls); + } + + private FileMemoryStore NewLtm() + { + var memOpts = Options.Create(new MemoryOptions { BasePath = Path.Combine(_tempDir, "ltm") }); + var profOpts = Options.Create(new AgentProfileOptions { BasePath = _tempDir }); + var embedOpts = Options.Create(new EmbeddingOptions()); + return new FileMemoryStore(memOpts, profOpts, embedOpts, NullLogger.Instance); + } + + private static VerifyShape NewVerify() => new( + Server: "calendar-mcp", + Tool: "get_calendar_events", + Arguments: JsonDocument.Parse("""{"accountId":"x","timeZone":"America/Chicago","startDate":"2026-05-08","endDate":"2026-05-08"}""").RootElement, + Expect: new VerifyExpectation(VerifyExpectationKind.Success)); + + private sealed class ThrowingDetector : IMemoryContradictionDetector + { + public int Calls { get; private set; } + public Task ResolveAsync(MemoryEntry incoming, CancellationToken cancellationToken = default) + { + Calls++; + throw new InvalidOperationException("Detector should not be called for non-scoped categories."); + } + } +}