Skip to content

Commit aaea78a

Browse files
committed
WIP: Porting usability score computation from confidence scores
TODO: Input data contract TODO: Instance config, clearing TODO: Interface for CI, Visibility
1 parent d1390f7 commit aaea78a

19 files changed

+461
-0
lines changed
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using SIL.Machine.QualityEstimation.Scores;
5+
using SIL.Machine.QualityEstimation.Thresholds;
6+
using SIL.Machine.QualityEstimation.Usability;
7+
8+
namespace SIL.Machine.QualityEstimation
9+
{
10+
public class QualityEstimation
11+
{
12+
public BookThresholds BookThresholds { get; set; } = new BookThresholds();
13+
14+
public ChapterThresholds ChapterThresholds { get; set; } = new ChapterThresholds();
15+
16+
public VerseThresholds VerseThresholds { get; set; } = new VerseThresholds();
17+
18+
public UsabilityParameters Usable { get; set; } = UsabilityParameters.Usable;
19+
20+
public UsabilityParameters Unusable { get; set; } = UsabilityParameters.Unusable;
21+
22+
public List<BookUsability> UsabilityBooks { get; } = new List<BookUsability>();
23+
24+
public List<ChapterUsability> UsabilityChapters { get; } = new List<ChapterUsability>();
25+
26+
public List<SequenceUsability> UsabilitySequences { get; } = new List<SequenceUsability>();
27+
28+
public List<TxtFileUsability> UsabilityTxtFiles { get; } = new List<TxtFileUsability>();
29+
30+
public List<VerseUsability> UsabilityVerses { get; } = new List<VerseUsability>();
31+
32+
public double CalculateUsableProbability(double chrF3)
33+
{
34+
double usableWeight = Math.Exp(-Math.Pow(chrF3 - Usable.Mean, 2) / (2 * Usable.Variance)) * Usable.Count;
35+
double unusableWeight =
36+
Math.Exp(-Math.Pow(chrF3 - Unusable.Mean, 2) / (2 * Unusable.Variance)) * Unusable.Count;
37+
return usableWeight / (usableWeight + unusableWeight);
38+
}
39+
40+
public void ComputeBookUsability(BookScores bookScores)
41+
{
42+
foreach (string book in bookScores.Scores.Keys)
43+
{
44+
Score score = bookScores.GetScore(book);
45+
if (score is null)
46+
{
47+
continue;
48+
}
49+
50+
List<double> bookUsabilities = bookScores.GetVerseUsabilities(book);
51+
double averageProbability = bookUsabilities.Average();
52+
UsabilityBooks.Add(
53+
new BookUsability
54+
{
55+
Book = book,
56+
Usability = averageProbability,
57+
ProjectedChrF3 = score.ProjectedChrF3,
58+
Label = BookThresholds.ReturnLabel(averageProbability),
59+
}
60+
);
61+
}
62+
}
63+
64+
public void ComputeChapterUsability(ChapterScores chapterScores)
65+
{
66+
foreach (KeyValuePair<string, Dictionary<int, Score>> chapterScoresByBook in chapterScores.Scores)
67+
{
68+
string book = chapterScoresByBook.Key;
69+
foreach (int chapter in chapterScoresByBook.Value.Keys)
70+
{
71+
Score score = chapterScores.GetScore(book, chapter);
72+
if (score is null)
73+
{
74+
continue;
75+
}
76+
77+
List<double> chapterUsabilities = chapterScores.GetVerseUsabilities(book, chapter);
78+
double averageProbability = chapterUsabilities.Average();
79+
UsabilityChapters.Add(
80+
new ChapterUsability
81+
{
82+
Book = book,
83+
Chapter = chapter,
84+
Usability = averageProbability,
85+
ProjectedChrF3 = score.ProjectedChrF3,
86+
Label = ChapterThresholds.ReturnLabel(averageProbability),
87+
}
88+
);
89+
}
90+
}
91+
}
92+
93+
public void ComputeTxtFileUsability(TxtFileScores txtFileScores)
94+
{
95+
foreach (string targetDraftFileStem in txtFileScores.Scores.Keys)
96+
{
97+
Score score = txtFileScores.GetScore(targetDraftFileStem);
98+
if (score is null)
99+
{
100+
continue;
101+
}
102+
103+
List<double> txtFileUsabilities = txtFileScores.GetSequenceUsabilities(targetDraftFileStem);
104+
double averageProbability = txtFileUsabilities.Average();
105+
UsabilityTxtFiles.Add(
106+
new TxtFileUsability
107+
{
108+
TargetDraftFile = targetDraftFileStem,
109+
Usability = averageProbability,
110+
ProjectedChrF3 = score.ProjectedChrF3,
111+
Label = VerseThresholds.ReturnLabel(averageProbability),
112+
}
113+
);
114+
}
115+
}
116+
117+
public void ComputeUsableProportions(
118+
List<VerseScore> verseScores,
119+
ref ChapterScores chapterScores,
120+
ref BookScores bookScores
121+
)
122+
{
123+
foreach (VerseScore verseScore in verseScores.Where(v => v.VerseRef.VerseNum > 0))
124+
{
125+
double probability = CalculateUsableProbability(verseScore.ProjectedChrF3);
126+
chapterScores.AppendVerseUsability(
127+
verseScore.VerseRef.Book,
128+
verseScore.VerseRef.ChapterNum,
129+
probability
130+
);
131+
bookScores.AppendVerseUsability(verseScore.VerseRef.Book, probability);
132+
UsabilityVerses.Add(
133+
new VerseUsability
134+
{
135+
Book = verseScore.VerseRef.Book,
136+
Chapter = verseScore.VerseRef.ChapterNum,
137+
Verse = verseScore.VerseRef.Verse,
138+
Usability = probability,
139+
ProjectedChrF3 = verseScore.ProjectedChrF3,
140+
Label = VerseThresholds.ReturnLabel(probability),
141+
}
142+
);
143+
}
144+
145+
ComputeChapterUsability(chapterScores);
146+
ComputeBookUsability(bookScores);
147+
}
148+
149+
public void ComputeUsableProportions(List<SequenceScore> sequenceScores, ref TxtFileScores txtFileScores)
150+
{
151+
foreach (SequenceScore sequenceScore in sequenceScores)
152+
{
153+
double probability = CalculateUsableProbability(sequenceScore.ProjectedChrF3);
154+
txtFileScores.AppendSequenceUsability(sequenceScore.TargetDraftFileStem, probability);
155+
UsabilitySequences.Add(
156+
new SequenceUsability
157+
{
158+
TargetDraftFile = sequenceScore.TargetDraftFileStem,
159+
SequenceNumber = sequenceScore.SequenceNumber,
160+
Usability = probability,
161+
ProjectedChrF3 = sequenceScore.ProjectedChrF3,
162+
Label = VerseThresholds.ReturnLabel(probability),
163+
}
164+
);
165+
}
166+
167+
ComputeTxtFileUsability(txtFileScores);
168+
}
169+
}
170+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
using System.Collections.Generic;
2+
3+
namespace SIL.Machine.QualityEstimation.Scores
4+
{
5+
public class BookScores
6+
{
7+
private readonly Dictionary<string, List<double>> _verseUsabilities = new Dictionary<string, List<double>>();
8+
9+
public readonly Dictionary<string, Score> Scores = new Dictionary<string, Score>();
10+
11+
public void AddScore(string book, Score score) => Scores[book] = score;
12+
13+
public Score GetScore(string book) => Scores.TryGetValue(book, out Score score) ? score : null;
14+
15+
public void AppendVerseUsability(string book, double usability)
16+
{
17+
if (!_verseUsabilities.TryGetValue(book, out List<double> list))
18+
{
19+
list = new List<double>();
20+
_verseUsabilities[book] = list;
21+
}
22+
23+
list.Add(usability);
24+
}
25+
26+
public List<double> GetVerseUsabilities(string book) =>
27+
_verseUsabilities.TryGetValue(book, out List<double> list) ? new List<double>(list) : new List<double>();
28+
}
29+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
using System.Collections.Generic;
2+
3+
namespace SIL.Machine.QualityEstimation.Scores
4+
{
5+
public class ChapterScores
6+
{
7+
private readonly Dictionary<string, Dictionary<int, List<double>>> _verseUsabilities =
8+
new Dictionary<string, Dictionary<int, List<double>>>();
9+
10+
public readonly Dictionary<string, Dictionary<int, Score>> Scores =
11+
new Dictionary<string, Dictionary<int, Score>>();
12+
13+
public void AddScore(string book, int chapter, Score score)
14+
{
15+
if (!Scores.TryGetValue(book, out Dictionary<int, Score> chapters))
16+
{
17+
chapters = new Dictionary<int, Score>();
18+
Scores[book] = chapters;
19+
}
20+
21+
chapters[chapter] = score;
22+
}
23+
24+
public Score GetScore(string book, int chapter) =>
25+
Scores.TryGetValue(book, out Dictionary<int, Score> chapters)
26+
&& chapters.TryGetValue(chapter, out Score score)
27+
? score
28+
: null;
29+
30+
public void AppendVerseUsability(string book, int chapter, double usability)
31+
{
32+
if (!_verseUsabilities.TryGetValue(book, out Dictionary<int, List<double>> chapters))
33+
{
34+
chapters = new Dictionary<int, List<double>>();
35+
_verseUsabilities[book] = chapters;
36+
}
37+
38+
if (!chapters.TryGetValue(chapter, out List<double> list))
39+
{
40+
list = new List<double>();
41+
chapters[chapter] = list;
42+
}
43+
44+
list.Add(usability);
45+
}
46+
47+
public List<double> GetVerseUsabilities(string book, int chapter) =>
48+
_verseUsabilities.TryGetValue(book, out Dictionary<int, List<double>> chapters)
49+
&& chapters.TryGetValue(chapter, out List<double> list)
50+
? new List<double>(list)
51+
: new List<double>();
52+
}
53+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
namespace SIL.Machine.QualityEstimation.Scores
2+
{
3+
public class Score
4+
{
5+
public Score(double slope, double confidence, double intercept)
6+
{
7+
Confidence = confidence;
8+
ProjectedChrF3 = slope * confidence + intercept;
9+
}
10+
11+
public double Confidence { get; }
12+
13+
public double ProjectedChrF3 { get; }
14+
}
15+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
namespace SIL.Machine.QualityEstimation.Scores
2+
{
3+
public class SequenceScore : Score
4+
{
5+
public SequenceScore(
6+
double slope,
7+
double confidence,
8+
double intercept,
9+
string sequenceNumber,
10+
string targetDraftFileStem
11+
)
12+
: base(slope, confidence, intercept)
13+
{
14+
SequenceNumber = sequenceNumber;
15+
TargetDraftFileStem = targetDraftFileStem;
16+
}
17+
18+
public string SequenceNumber { get; }
19+
public string TargetDraftFileStem { get; }
20+
}
21+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
using System.Collections.Generic;
2+
3+
namespace SIL.Machine.QualityEstimation.Scores
4+
{
5+
public class TxtFileScores
6+
{
7+
private readonly Dictionary<string, List<double>> _sequenceUsabilities = new Dictionary<string, List<double>>();
8+
9+
public readonly Dictionary<string, Score> Scores = new Dictionary<string, Score>();
10+
11+
public void AddScore(string targetDraftFileStem, Score score) => Scores[targetDraftFileStem] = score;
12+
13+
public Score GetScore(string targetDraftFileStem) =>
14+
Scores.TryGetValue(targetDraftFileStem, out Score score) ? score : null;
15+
16+
public void AppendSequenceUsability(string targetDraftFileStem, double usability)
17+
{
18+
if (!_sequenceUsabilities.TryGetValue(targetDraftFileStem, out List<double> list))
19+
{
20+
list = new List<double>();
21+
_sequenceUsabilities[targetDraftFileStem] = list;
22+
}
23+
24+
list.Add(usability);
25+
}
26+
27+
public List<double> GetSequenceUsabilities(string targetDraftFileStem) =>
28+
_sequenceUsabilities.TryGetValue(targetDraftFileStem, out List<double> list)
29+
? new List<double>(list)
30+
: new List<double>();
31+
}
32+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
using SIL.Scripture;
2+
3+
namespace SIL.Machine.QualityEstimation.Scores
4+
{
5+
public class VerseScore : Score
6+
{
7+
public VerseScore(double slope, double confidence, double intercept, VerseRef verseRef)
8+
: base(slope, confidence, intercept)
9+
{
10+
VerseRef = verseRef;
11+
}
12+
13+
public VerseRef VerseRef { get; }
14+
}
15+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
namespace SIL.Machine.QualityEstimation.Thresholds
2+
{
3+
public class BookThresholds : Thresholds
4+
{
5+
public override double GreenThreshold => 0.745;
6+
7+
public override double YellowThreshold => 0.62;
8+
}
9+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
namespace SIL.Machine.QualityEstimation.Thresholds
2+
{
3+
public class ChapterThresholds : Thresholds
4+
{
5+
public override double GreenThreshold => 0.745;
6+
7+
public override double YellowThreshold => 0.62;
8+
}
9+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
namespace SIL.Machine.QualityEstimation.Thresholds
2+
{
3+
public abstract class Thresholds
4+
{
5+
public abstract double GreenThreshold { get; }
6+
7+
public abstract double YellowThreshold { get; }
8+
9+
public UsabilityLabel ReturnLabel(double probability) =>
10+
probability >= GreenThreshold ? UsabilityLabel.Green
11+
: probability >= YellowThreshold ? UsabilityLabel.Yellow
12+
: UsabilityLabel.Red;
13+
}
14+
}

0 commit comments

Comments
 (0)