From c364fd46d78b3e625b3df8824fcc51013947c879 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 7 Apr 2026 11:28:23 -0400 Subject: [PATCH 1/3] Port changes from https://github.com/sillsdev/machine.py/pull/279 --- src/SIL.Machine/Translation/HybridTranslationEngine.cs | 1 + src/SIL.Machine/Translation/TransferEngine.cs | 1 + src/SIL.Machine/Translation/TranslationExtensions.cs | 1 + src/SIL.Machine/Translation/TranslationResult.cs | 3 +++ src/SIL.Machine/Translation/TranslationResultBuilder.cs | 4 ++++ 5 files changed, 10 insertions(+) diff --git a/src/SIL.Machine/Translation/HybridTranslationEngine.cs b/src/SIL.Machine/Translation/HybridTranslationEngine.cs index 34af37ad2..181cdc78e 100644 --- a/src/SIL.Machine/Translation/HybridTranslationEngine.cs +++ b/src/SIL.Machine/Translation/HybridTranslationEngine.cs @@ -488,6 +488,7 @@ private TranslationResult Merge(TranslationResult interactiveResult, Translation interactiveResult.SourceTokens, mergedTargetSegment, mergedConfidences, + -1.0, mergedSources, alignment, interactiveResult.Phrases diff --git a/src/SIL.Machine/Translation/TransferEngine.cs b/src/SIL.Machine/Translation/TransferEngine.cs index 56ac1d6eb..7ba7ff021 100644 --- a/src/SIL.Machine/Translation/TransferEngine.cs +++ b/src/SIL.Machine/Translation/TransferEngine.cs @@ -186,6 +186,7 @@ public IReadOnlyList Translate(int n, IReadOnlyList s segment, targetTokens, confidences, + -1.0, sources, alignment, new[] { new Phrase(Range.Create(0, normalizedSourceTokens.Count), targetWords.Count) } diff --git a/src/SIL.Machine/Translation/TranslationExtensions.cs b/src/SIL.Machine/Translation/TranslationExtensions.cs index a25e2c71a..dd9e07736 100644 --- a/src/SIL.Machine/Translation/TranslationExtensions.cs +++ b/src/SIL.Machine/Translation/TranslationExtensions.cs @@ -120,6 +120,7 @@ public static TranslationResult Truecase( result.SourceTokens, targetTokens, result.Confidences, + result.SequenceConfidence, result.Sources, result.Alignment, result.Phrases diff --git a/src/SIL.Machine/Translation/TranslationResult.cs b/src/SIL.Machine/Translation/TranslationResult.cs index 8fd257fe6..2e373dc5f 100644 --- a/src/SIL.Machine/Translation/TranslationResult.cs +++ b/src/SIL.Machine/Translation/TranslationResult.cs @@ -11,6 +11,7 @@ public TranslationResult( IEnumerable sourceTokens, IEnumerable targetTokens, IEnumerable confidences, + double sequenceConfidence, IEnumerable sources, WordAlignmentMatrix alignment, IEnumerable phrases @@ -27,6 +28,7 @@ IEnumerable phrases nameof(confidences) ); } + SequenceConfidence = sequenceConfidence; Sources = sources.ToArray(); if (Sources.Count != TargetTokens.Count) { @@ -58,6 +60,7 @@ IEnumerable phrases public IReadOnlyList SourceTokens { get; } public IReadOnlyList TargetTokens { get; } public IReadOnlyList Confidences { get; } + public double SequenceConfidence { get; } public IReadOnlyList Sources { get; } public WordAlignmentMatrix Alignment { get; } public IReadOnlyList Phrases { get; } diff --git a/src/SIL.Machine/Translation/TranslationResultBuilder.cs b/src/SIL.Machine/Translation/TranslationResultBuilder.cs index 461e20b98..d71ca616f 100644 --- a/src/SIL.Machine/Translation/TranslationResultBuilder.cs +++ b/src/SIL.Machine/Translation/TranslationResultBuilder.cs @@ -11,6 +11,7 @@ public class TranslationResultBuilder private readonly List _confidences; private readonly List _sources; private readonly List _phrases; + private readonly double _sequenceConfidences; public TranslationResultBuilder(IReadOnlyList sourceTokens) { @@ -19,6 +20,7 @@ public TranslationResultBuilder(IReadOnlyList sourceTokens) _confidences = new List(); _sources = new List(); _phrases = new List(); + _sequenceConfidences = -1.0; } public IReadOnlyList SourceTokens { get; } @@ -29,6 +31,7 @@ public TranslationResultBuilder(IReadOnlyList sourceTokens) public IReadOnlyList Confidences => _confidences; public IReadOnlyList Sources => _sources; public IReadOnlyList Phrases => _phrases; + public double SequenceConfidences => _sequenceConfidences; public void AppendToken(string token, TranslationSources source, double confidence) { @@ -246,6 +249,7 @@ public TranslationResult ToResult(string translation = null) SourceTokens, _targetTokens, _confidences, + _sequenceConfidences, sources, alignment, phrases From 4ff7fb3a21aa170e24e9f4778c237f78d3c9c60c Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Thu, 9 Apr 2026 14:27:29 -0400 Subject: [PATCH 2/3] Compute sequence confidences for TranslationEngines --- .../ChrF3QualityEstimator.cs | 27 ++++++------------- .../Translation/HybridTranslationEngine.cs | 3 ++- src/SIL.Machine/Translation/TransferEngine.cs | 3 ++- src/SIL.Machine/Utils/ConfidenceHelper.cs | 25 +++++++++++++++++ 4 files changed, 37 insertions(+), 21 deletions(-) create mode 100644 src/SIL.Machine/Utils/ConfidenceHelper.cs diff --git a/src/SIL.Machine/QualityEstimation/ChrF3QualityEstimator.cs b/src/SIL.Machine/QualityEstimation/ChrF3QualityEstimator.cs index c0a3e0bf3..444240b37 100644 --- a/src/SIL.Machine/QualityEstimation/ChrF3QualityEstimator.cs +++ b/src/SIL.Machine/QualityEstimation/ChrF3QualityEstimator.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.Linq; using SIL.Machine.Corpora; +using SIL.Machine.Utils; namespace SIL.Machine.QualityEstimation { @@ -83,22 +84,6 @@ ScriptureBookScores bookScores return ComputeSegmentUsability(segmentScores, chapterScores, bookScores); } - /// - /// Calculates the geometric mean for a collection of values. - /// - /// - /// The geometric mean. - private static double GeometricMean(IList values) - { - // Geometric mean requires positive values - if (values == null || !values.Any() || values.Any(x => x <= 0)) - return 0; - - // Compute the sum of the natural logarithms of all values, - // and divide by the count of numbers and take the exponential - return Math.Exp(values.Sum(Math.Log) / values.Count); - } - private double CalculateUsableProbability(double chrF3) { double usableWeight = Math.Exp(-Math.Pow(chrF3 - Usable.Mean, 2) / (2 * Usable.Variance)) * Usable.Count; @@ -267,7 +252,7 @@ private List ComputeTextUsability(TextScores textScores) { textScores.AddScore( textIdConfidences.Key, - new Score(_slope, confidence: GeometricMean(textIdConfidences.Value), _intercept) + new Score(_slope, confidence: ConfidenceHelper.GeometricMean(textIdConfidences.Value), _intercept) ); } @@ -325,7 +310,11 @@ out List bookAndChapterConfidences chapterScores.AddScore( bookAndChapterConfidences.Key.Book, bookAndChapterConfidences.Key.Chapter, - new Score(_slope, confidence: GeometricMean(bookAndChapterConfidences.Value), _intercept) + new Score( + _slope, + confidence: ConfidenceHelper.GeometricMean(bookAndChapterConfidences.Value), + _intercept + ) ); } @@ -334,7 +323,7 @@ out List bookAndChapterConfidences { bookScores.AddScore( bookConfidences.Key, - new Score(_slope, confidence: GeometricMean(bookConfidences.Value), _intercept) + new Score(_slope, confidence: ConfidenceHelper.GeometricMean(bookConfidences.Value), _intercept) ); } diff --git a/src/SIL.Machine/Translation/HybridTranslationEngine.cs b/src/SIL.Machine/Translation/HybridTranslationEngine.cs index 181cdc78e..37dda202b 100644 --- a/src/SIL.Machine/Translation/HybridTranslationEngine.cs +++ b/src/SIL.Machine/Translation/HybridTranslationEngine.cs @@ -4,6 +4,7 @@ using System.Threading; using System.Threading.Tasks; using SIL.Machine.Tokenization; +using SIL.Machine.Utils; using SIL.ObjectModel; namespace SIL.Machine.Translation @@ -488,7 +489,7 @@ private TranslationResult Merge(TranslationResult interactiveResult, Translation interactiveResult.SourceTokens, mergedTargetSegment, mergedConfidences, - -1.0, + ConfidenceHelper.GeometricMean(mergedConfidences), mergedSources, alignment, interactiveResult.Phrases diff --git a/src/SIL.Machine/Translation/TransferEngine.cs b/src/SIL.Machine/Translation/TransferEngine.cs index 7ba7ff021..50a490da9 100644 --- a/src/SIL.Machine/Translation/TransferEngine.cs +++ b/src/SIL.Machine/Translation/TransferEngine.cs @@ -6,6 +6,7 @@ using SIL.Machine.Corpora; using SIL.Machine.Morphology; using SIL.Machine.Tokenization; +using SIL.Machine.Utils; using SIL.ObjectModel; namespace SIL.Machine.Translation @@ -186,7 +187,7 @@ public IReadOnlyList Translate(int n, IReadOnlyList s segment, targetTokens, confidences, - -1.0, + ConfidenceHelper.GeometricMean(confidences), sources, alignment, new[] { new Phrase(Range.Create(0, normalizedSourceTokens.Count), targetWords.Count) } diff --git a/src/SIL.Machine/Utils/ConfidenceHelper.cs b/src/SIL.Machine/Utils/ConfidenceHelper.cs new file mode 100644 index 000000000..b6bfc18cb --- /dev/null +++ b/src/SIL.Machine/Utils/ConfidenceHelper.cs @@ -0,0 +1,25 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace SIL.Machine.Utils +{ + public static class ConfidenceHelper + { + /// + /// Calculates the geometric mean for a collection of values. + /// + /// + /// The geometric mean. + public static double GeometricMean(IList values) + { + // Geometric mean requires positive values + if (values == null || !values.Any() || values.Any(x => x <= 0)) + return 0; + + // Compute the sum of the natural logarithms of all values, + // and divide by the count of numbers and take the exponential + return Math.Exp(values.Sum(Math.Log) / values.Count); + } + } +} From c1a8c07cb988ee8dc4306989950633f392c4e5ac Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Thu, 9 Apr 2026 17:02:43 -0400 Subject: [PATCH 3/3] Move geometric mean to StatisticalMethods --- .../ChrF3QualityEstimator.cs | 8 +++--- .../Statistics/StatisticalMethods.cs | 16 ++++++++++++ .../Translation/HybridTranslationEngine.cs | 4 +-- src/SIL.Machine/Translation/TransferEngine.cs | 4 +-- src/SIL.Machine/Utils/ConfidenceHelper.cs | 25 ------------------- 5 files changed, 24 insertions(+), 33 deletions(-) delete mode 100644 src/SIL.Machine/Utils/ConfidenceHelper.cs diff --git a/src/SIL.Machine/QualityEstimation/ChrF3QualityEstimator.cs b/src/SIL.Machine/QualityEstimation/ChrF3QualityEstimator.cs index 444240b37..0249835c6 100644 --- a/src/SIL.Machine/QualityEstimation/ChrF3QualityEstimator.cs +++ b/src/SIL.Machine/QualityEstimation/ChrF3QualityEstimator.cs @@ -2,7 +2,7 @@ using System.Collections.Generic; using System.Linq; using SIL.Machine.Corpora; -using SIL.Machine.Utils; +using SIL.Machine.Statistics; namespace SIL.Machine.QualityEstimation { @@ -252,7 +252,7 @@ private List ComputeTextUsability(TextScores textScores) { textScores.AddScore( textIdConfidences.Key, - new Score(_slope, confidence: ConfidenceHelper.GeometricMean(textIdConfidences.Value), _intercept) + new Score(_slope, confidence: StatisticalMethods.GeometricMean(textIdConfidences.Value), _intercept) ); } @@ -312,7 +312,7 @@ out List bookAndChapterConfidences bookAndChapterConfidences.Key.Chapter, new Score( _slope, - confidence: ConfidenceHelper.GeometricMean(bookAndChapterConfidences.Value), + confidence: StatisticalMethods.GeometricMean(bookAndChapterConfidences.Value), _intercept ) ); @@ -323,7 +323,7 @@ out List bookAndChapterConfidences { bookScores.AddScore( bookConfidences.Key, - new Score(_slope, confidence: ConfidenceHelper.GeometricMean(bookConfidences.Value), _intercept) + new Score(_slope, confidence: StatisticalMethods.GeometricMean(bookConfidences.Value), _intercept) ); } diff --git a/src/SIL.Machine/Statistics/StatisticalMethods.cs b/src/SIL.Machine/Statistics/StatisticalMethods.cs index 081818c91..33e1b69ee 100644 --- a/src/SIL.Machine/Statistics/StatisticalMethods.cs +++ b/src/SIL.Machine/Statistics/StatisticalMethods.cs @@ -43,5 +43,21 @@ public static double KullbackLeiblerDivergence(IEnumerable dist1, IEnume { return dist1.Zip(dist2, (p1, p2) => p1 == 0 || p2 == 0 ? 0 : Math.Log(p1 / p2, 2) * p1).Sum(); } + + /// + /// Calculates the geometric mean for a collection of values. + /// + /// + /// The geometric mean. + public static double GeometricMean(IList values) + { + // Geometric mean requires positive values + if (values == null || !values.Any() || values.Any(x => x <= 0)) + return 0; + + // Compute the sum of the natural logarithms of all values, + // and divide by the count of numbers and take the exponential + return Math.Exp(values.Sum(Math.Log) / values.Count); + } } } diff --git a/src/SIL.Machine/Translation/HybridTranslationEngine.cs b/src/SIL.Machine/Translation/HybridTranslationEngine.cs index 37dda202b..f0e5a9033 100644 --- a/src/SIL.Machine/Translation/HybridTranslationEngine.cs +++ b/src/SIL.Machine/Translation/HybridTranslationEngine.cs @@ -3,8 +3,8 @@ using System.Linq; using System.Threading; using System.Threading.Tasks; +using SIL.Machine.Statistics; using SIL.Machine.Tokenization; -using SIL.Machine.Utils; using SIL.ObjectModel; namespace SIL.Machine.Translation @@ -489,7 +489,7 @@ private TranslationResult Merge(TranslationResult interactiveResult, Translation interactiveResult.SourceTokens, mergedTargetSegment, mergedConfidences, - ConfidenceHelper.GeometricMean(mergedConfidences), + StatisticalMethods.GeometricMean(mergedConfidences), mergedSources, alignment, interactiveResult.Phrases diff --git a/src/SIL.Machine/Translation/TransferEngine.cs b/src/SIL.Machine/Translation/TransferEngine.cs index 50a490da9..9551de0c7 100644 --- a/src/SIL.Machine/Translation/TransferEngine.cs +++ b/src/SIL.Machine/Translation/TransferEngine.cs @@ -5,8 +5,8 @@ using SIL.Machine.Annotations; using SIL.Machine.Corpora; using SIL.Machine.Morphology; +using SIL.Machine.Statistics; using SIL.Machine.Tokenization; -using SIL.Machine.Utils; using SIL.ObjectModel; namespace SIL.Machine.Translation @@ -187,7 +187,7 @@ public IReadOnlyList Translate(int n, IReadOnlyList s segment, targetTokens, confidences, - ConfidenceHelper.GeometricMean(confidences), + StatisticalMethods.GeometricMean(confidences), sources, alignment, new[] { new Phrase(Range.Create(0, normalizedSourceTokens.Count), targetWords.Count) } diff --git a/src/SIL.Machine/Utils/ConfidenceHelper.cs b/src/SIL.Machine/Utils/ConfidenceHelper.cs deleted file mode 100644 index b6bfc18cb..000000000 --- a/src/SIL.Machine/Utils/ConfidenceHelper.cs +++ /dev/null @@ -1,25 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; - -namespace SIL.Machine.Utils -{ - public static class ConfidenceHelper - { - /// - /// Calculates the geometric mean for a collection of values. - /// - /// - /// The geometric mean. - public static double GeometricMean(IList values) - { - // Geometric mean requires positive values - if (values == null || !values.Any() || values.Any(x => x <= 0)) - return 0; - - // Compute the sum of the natural logarithms of all values, - // and divide by the count of numbers and take the exponential - return Math.Exp(values.Sum(Math.Log) / values.Count); - } - } -}