From d25e88ab1d6c902ed6d0c9db298f3a9b69d01dbe Mon Sep 17 00:00:00 2001 From: omaus Date: Fri, 28 Jan 2022 00:12:15 +0100 Subject: [PATCH 1/3] Add U test :sparkles: --- src/FSharp.Stats/FSharp.Stats.fsproj | 1 + src/FSharp.Stats/Testing/TestStatistics.fs | 24 +++++++++- src/FSharp.Stats/Testing/UTest.fs | 51 ++++++++++++++++++++++ 3 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 src/FSharp.Stats/Testing/UTest.fs diff --git a/src/FSharp.Stats/FSharp.Stats.fsproj b/src/FSharp.Stats/FSharp.Stats.fsproj index fd456944..ce317cc5 100644 --- a/src/FSharp.Stats/FSharp.Stats.fsproj +++ b/src/FSharp.Stats/FSharp.Stats.fsproj @@ -113,6 +113,7 @@ + diff --git a/src/FSharp.Stats/Testing/TestStatistics.fs b/src/FSharp.Stats/Testing/TestStatistics.fs index e81a5279..9b5839f0 100644 --- a/src/FSharp.Stats/Testing/TestStatistics.fs +++ b/src/FSharp.Stats/Testing/TestStatistics.fs @@ -96,4 +96,26 @@ module TestStatistics = let cdf = Distributions.Continuous.Normal.CDF 0. 1. statistic let pvalue = 1.- cdf let pvalueTwoTailed = pvalue * 2. - {Statistic=statistic; PValueLeft=pvalue;PValueRight = cdf; PValueTwoTailed = pvalueTwoTailed} \ No newline at end of file + {Statistic=statistic; PValueLeft=pvalue;PValueRight = cdf; PValueTwoTailed = pvalueTwoTailed} + + + /// + /// Computes the Mann-Whitney U-test statistics for a given statistic. + /// + /// The test statistic. + /// One Tailed/Sided. + /// Two Tailed/Sided. + type UTestTestStatistics = { + Statistic : float + PValueLeft : float + PValueRight : float + PValueTwoTailed : float + } + let createUTest statistic : UTestTestStatistics = + let cdf = Distributions.Continuous.Normal.CDF 0. 1. statistic + { + Statistic = statistic + PValueLeft = 1. - cdf + PValueRight = cdf + PValueTwoTailed = cdf * 2. + } \ No newline at end of file diff --git a/src/FSharp.Stats/Testing/UTest.fs b/src/FSharp.Stats/Testing/UTest.fs new file mode 100644 index 00000000..35bcbf66 --- /dev/null +++ b/src/FSharp.Stats/Testing/UTest.fs @@ -0,0 +1,51 @@ +namespace FSharp.Stats.Testing + +// taken/implemented from: https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test#U_statistic +module UTest = + + open FSharp.Stats + open FSharp.Stats.Testing + + // TO DO: Bergmann et al. (2000) showed that there are different implementations of this test that lead to different results. + // They implied that some of them are using a false algorithm. Check if the mathematical derivation from above is wrong too. + // Read: https://www.jstor.org/stable/2685616 + let inline private compute (seq1 : seq<'T>) (seq2 : seq<'T>) = + let sortedMerge = + (seq1 |> Seq.map (fun v -> float v, 0), seq2 |> Seq.map (fun v -> float v, 1)) // 0 = first group; 1 = second group + ||> Seq.append + |> Seq.sortByDescending (fun (v,groupIndex) -> v) + |> Array.ofSeq + // let abundance = // method for equal ranks instead of mean ranks when identical values occur. + // sortedMerge + // |> Array.map ( + // fun v -> Array.filter (fun v2 -> v2 = v) sortedMerge + // >> Array.length + // ) + // let myMap = sortedMerge |> Array.mapi (fun i x -> x, i + 2 - Array.item i abundance) |> Map // wrong: must return mean of ranksums with equal ranks, not always the same rank! + // let rankedMerge = sortedMerge |> Array.map (fun (v,group) -> float myMap.[(v,group)],v,group) + let rankedMerge = // method for mean ranks instead of equal ranks when identical values occur. + sortedMerge + |> Array.map fst + |> Rank.rankAverage + |> fun res -> + (sortedMerge, res) + ||> Array.map2 (fun (v,group) rank -> rank, v, group) + let calcRankSum group = + rankedMerge + |> Array.filter (fun (rank,v,group') -> group' = group) + |> Array.fold (fun state (rank,v,group') -> state + rank) 0. + let rankSumSeq1 = calcRankSum 0 + let rankSumSeq2 = calcRankSum 1 + let seq1Length = Seq.length seq1 |> float + let seq2Length = Seq.length seq2 |> float + let u1 = seq1Length * seq2Length + (seq1Length * (seq1Length + 1.) / 2.) - rankSumSeq1 + let u2 = seq1Length * seq2Length + (seq2Length * (seq2Length + 1.) / 2.) - rankSumSeq2 + let uMin = min u1 u2 + let z = (uMin - seq1Length * seq2Length / 2.) / System.Math.Sqrt (seq1Length * seq2Length * (seq1Length + seq2Length + 1.) / 12.) + z + + /// Computes a Mann-Whitney U-test. Aka Wilcoxon-Mann-Whitney test. + /// Use this test for independent samples and the Wilcoxon test (= Wilcoxon ranksum test) for dependent samples. + let inline computeUtest (seq1 : seq<'T>) (seq2 : seq<'T>) = + let z = compute seq1 seq2 + TestStatistics.createUTest z \ No newline at end of file From 1dc5b74effd998acb914e67c6c89b5e2e383a6f4 Mon Sep 17 00:00:00 2001 From: omaus Date: Fri, 28 Jan 2022 00:12:38 +0100 Subject: [PATCH 2/3] Work on unit test for U test :construction: --- tests/FSharp.Stats.Tests/Testing.fs | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/FSharp.Stats.Tests/Testing.fs b/tests/FSharp.Stats.Tests/Testing.fs index a3d475b9..be2416cf 100644 --- a/tests/FSharp.Stats.Tests/Testing.fs +++ b/tests/FSharp.Stats.Tests/Testing.fs @@ -235,6 +235,32 @@ let tTestTests = Expect.floatClose Accuracy.low tTest4.Statistic 0.514 "t statistic should be equal." ] + +let uTestTests = + // taken from https://de.wikipedia.org/wiki/Wilcoxon-Mann-Whitney-Test#Beispiel + let testList1 = + ([0;400;500;550;600;650;750;800;900;950;1000;1100;1200;1500;1600;1800;1900;2000;2200;3500 ],["M";"W";"M";"W";"M";"W";"M";"M";"W";"W";"M";"M";"W";"M";"W";"M";"M";"M";"M";"M"]) + ||> List.map2 (fun pay sex -> sex, pay) |> List.sortBy fst + + let testList1A = testList1 |> List.choose (fun (sex,pay) -> if sex = "W" then Some pay else None) + let testList1B = testList1 |> List.choose (fun (sex,pay) -> if sex = "M" then Some pay else None) + + let observedResult1 = UTest.computeUtest testList1A testList1B + let expectedResult1 : TestStatistics.UTestTestStatistics = { + Statistic = -1.15 + PValueTwoTailed = 0.267 + PV + } + + testList "Testing.UTest" [ + testCase "TwoSample" <| fun () -> + Expect.floatClose Accuracy.low observedResult1.PValueLeft expectedResult1.PValueLeft "left p-value should be equal" + Expect.floatClose Accuracy.low observedResult1.PValueRight expectedResult1.PValueRight "right p-value should be equal" + Expect.floatClose Accuracy.low observedResult1.PValueTwoTailed expectedResult1.PValueTwoTailed "p-value should be equal" + Expect.floatClose Accuracy.low observedResult1.Statistic expectedResult1.Statistic "test statistic should be equal" + ] + + [] let chiSquaredTests = // ChiSquared https://www.graphpad.com/quickcalcs/chisquared2/ From 619442421542fc6c8e16612c7fd422716d325f78 Mon Sep 17 00:00:00 2001 From: omaus Date: Fri, 28 Jan 2022 16:28:39 +0100 Subject: [PATCH 3/3] Finish unit test for U test :white_check_mark: --- tests/FSharp.Stats.Tests/Testing.fs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/FSharp.Stats.Tests/Testing.fs b/tests/FSharp.Stats.Tests/Testing.fs index be2416cf..cad3b779 100644 --- a/tests/FSharp.Stats.Tests/Testing.fs +++ b/tests/FSharp.Stats.Tests/Testing.fs @@ -236,6 +236,7 @@ let tTestTests = ] +[] let uTestTests = // taken from https://de.wikipedia.org/wiki/Wilcoxon-Mann-Whitney-Test#Beispiel let testList1 = @@ -248,8 +249,9 @@ let uTestTests = let observedResult1 = UTest.computeUtest testList1A testList1B let expectedResult1 : TestStatistics.UTestTestStatistics = { Statistic = -1.15 - PValueTwoTailed = 0.267 - PV + PValueTwoTailed = 0.2505 + PValueLeft = 0.875 + PValueRight = 0.1253 } testList "Testing.UTest" [