33# Modifying it to work with WeightedOnlineStats
44# #############################################################
55
6+ import LinearAlgebra
67abstract type WeightedHistogramStat{T} <: WeightedOnlineStat{T} end
8+ abstract type WeightedHist{T} <: WeightedHistogramStat{T} end
79split_candidates (o:: WeightedHistogramStat ) = midpoints (o)
810Statistics. mean (o:: WeightedHistogramStat ) = mean (midpoints (o), fweights (counts (o)))
911Statistics. var (o:: WeightedHistogramStat ) = var (midpoints (o), fweights (counts (o)); corrected= true )
10- Statistics. std (o:: WeightedHistogramStat ) = sqrt (var (o))
12+ Statistics. std (o:: WeightedHistogramStat ) = sqrt . (var (o))
1113Statistics. median (o:: WeightedHistogramStat ) = quantile (o, .5 )
1214
1315function Base. show (io:: IO , o:: WeightedHistogramStat )
@@ -25,7 +27,12 @@ Create a histogram with bin partition defined by `edges`.
2527- If `left`, the bins will be left-closed.
2628- If `closed`, the bin on the end will be closed.
2729 - E.g. for a two bin histogram ``[a, b), [b, c)`` vs. ``[a, b), [b, c]``
28- # Example
30+
31+ If `edges` is a tuple instead of an array, a multidimensional histogram will be
32+ generated that behaves like a `WeightedOnlineStat{VectorOb}`.
33+
34+ # Examples
35+
2936 o = fit!(WeightedHist(-5:.1:5), randn(10^6))
3037
3138 # approximate statistics
@@ -38,68 +45,152 @@ Create a histogram with bin partition defined by `edges`.
3845 extrema(o)
3946 area(o)
4047 pdf(o)
48+
49+ ## 2d Histogram
50+
51+ hist2d = fit!(WeightedHist((-5:1:5, -5:1:5) ), randn(10000,2), rand(10000))
52+ value(hist2d).y
4153"""
42- struct WeightedHist{T, R} <: WeightedHistogramStat{T}
54+ struct WeightedHist1D{R} <: WeightedHist{Float64}
55+ edges:: R
56+ counts:: Vector{Int}
57+ meanw:: Vector{Float64}
58+ outcount:: Vector{Int}
59+ meanwout:: Vector{Float64}
60+ left:: Bool
61+ closed:: Bool
62+ end
63+ struct WeightedHistND{R, N} <: WeightedHist{OnlineStats.VectorOb}
4364 edges:: R
44- counts:: Vector{Float64}
45- out:: Vector{Float64}
65+ counts:: Array{Int,N}
66+ meanw:: Array{Float64,N}
67+ outcount:: Array{Int,N}
68+ meanwout:: Array{Float64,N}
4669 left:: Bool
4770 closed:: Bool
71+ end
4872
49- function WeightedHist (edges:: R , T:: Type = eltype (edges); left:: Bool = true , closed:: Bool = true ) where {R<: AbstractVector }
50- new {T,R} (edges, zeros (Int, length (edges) - 1 ), [0 ,0 ], left, closed)
73+ function WeightedHist (edges; left:: Bool = true , closed:: Bool = true )
74+ edges = isa (edges,Tuple) ? edges : (edges,)
75+ counts = zeros (Int, map (i-> length (i)- 1 , edges))
76+ meanw = zeros (Float64, map (i-> length (i)- 1 , edges))
77+ outcount = zeros (Int,ntuple (_-> 3 ,length (edges)))
78+ meanwout = zeros (Float64,ntuple (_-> 3 ,length (edges)))
79+ if length (edges) == 1
80+ WeightedHist1D (edges[1 ],counts,meanw,outcount,meanwout,left,closed)
81+ else
82+ WeightedHistND {typeof(edges),length(edges)} (edges, counts, meanw,outcount,meanwout, left, closed)
5183 end
5284end
53- nobs (o:: WeightedHist ) = sum (o. counts) + sum (o. out)
54- weightsum (o:: WeightedHist ) = nobs (o)
55- value (o:: WeightedHist ) = (x= o. edges, y= o. counts)
56-
57- midpoints (o:: WeightedHist ) = midpoints (o. edges)
85+ # Special case for 1D Histogram
86+ nobs (o:: WeightedHist ) = sum (o. counts) + sum (o. outcount)
87+ weightsum (o:: WeightedHist ) = LinearAlgebra. dot (o. counts, o. meanw) + LinearAlgebra. dot (o. outcount,o. meanwout)
88+ value (o:: WeightedHist ) = (x= edges (o), y= o. counts .* o. meanw)
89+ binindices (o:: WeightedHistND{<:Any,N} , x:: AbstractVector ) where N = binindices (o, ntuple (i-> x[i],N))
90+ binindices (o:: WeightedHist1D ,x) = OnlineStats. binindex (o. edges, x, o. left, o. closed)
91+ binindices (o:: WeightedHistND ,x) = CartesianIndex (map ((e,ix)-> OnlineStats. binindex (e, ix, o. left, o. closed), o. edges, x))
92+ midpoints (o:: WeightedHistND ) = Iterators. product (map (midpoints,o. edges)... )
93+ midpoints (o:: WeightedHist1D ) = midpoints (edges (o))
5894counts (o:: WeightedHist ) = o. counts
5995edges (o:: WeightedHist ) = o. edges
96+ function Statistics. mean (o:: WeightedHist )
97+ weights = value (o). y
98+ N = ndims (o. counts)
99+ r = ntuple (N) do idim
100+ a = map (i-> i[idim],midpoints (o))
101+ mean (a,fweights (weights))
102+ end
103+ N== 1 ? r[1 ] : r
104+ end
105+ function Statistics. var (o:: WeightedHist )
106+ weights = value (o). y
107+ N = ndims (o. counts)
108+ r = ntuple (N) do idim
109+ a = map (i-> i[idim],midpoints (o))
110+ var (a,fweights (weights),corrected= true )
111+ end
112+ N== 1 ? r[1 ] : r
113+ end
114+ Statistics. std (o:: WeightedHist ) = sqrt .(var (o))
115+ Statistics. median (o:: WeightedHist ) = quantile (o, .5 )
60116
61- function Base. extrema (o:: WeightedHist )
117+ function Base. extrema (o:: WeightedHist1D )
118+ x, y = midpoints (o), counts (o)
119+ x[findfirst (! iszero,y)],x[findlast (! iszero,y)]
120+ end
121+ function Base. extrema (o:: WeightedHistND{<:Any,N} ) where N
62122 x, y = midpoints (o), counts (o)
63- x[findfirst (x -> x > 0 , y)], x[findlast (x -> x > 0 , y)]
123+ ntuple (N) do idim
124+ avalue = any (! iszero, y, dims = setdiff (1 : N,idim))[:]
125+ x. iterators[idim][findfirst (avalue)],x. iterators[idim][findlast (avalue)]
126+ end
64127end
128+
65129function Statistics. quantile (o:: WeightedHist , p = [0 , .25 , .5 , .75 , 1 ])
66130 x, y = midpoints (o), counts (o)
67- inds = findall (x -> x != 0 , y)
68- quantile (x[inds], fweights (y[inds]), p)
131+ N = ndims (y)
132+ inds = findall (! iszero, y)
133+ yweights = fweights (y[inds])
134+ subset = collect (x)[inds]
135+ r = ntuple (N) do idim
136+ data = map (i-> i[idim],subset)
137+ quantile (data, fweights (y[inds]), p)
138+ end
139+ if N== 1
140+ return r[1 ]
141+ else
142+ return r
143+ end
69144end
70145
71146function area (o:: WeightedHist )
72147 c = o. counts
73148 e = o. edges
74- if isa (e, AbstractRange)
75- return step (e) * sum (c)
76- else
77- return sum ((e[i+ 1 ] - e[i]) * c[i] for i in 1 : length (c))
149+ return mapreduce (+ , CartesianIndices (c)) do I
150+ ar = prod (map ((ed,i)-> ed[i+ 1 ]- ed[i],e,I. I))
151+ c[I]* ar
78152 end
79153end
80154
155+ outindex (o, ci:: CartesianIndex ) = CartesianIndex (map ((i,l)-> i < 1 ? 1 : i > l ? 3 : 2 , ci. I, size (o. counts)))
156+ outindex (o, ci:: Int ) = CartesianIndex (ci < 1 ? 1 : ci > length (o. counts) ? 3 : 2 )
81157function pdf (o:: WeightedHist , y)
82- i = OnlineStats . binindex (o . edges , y, o . left, o . closed )
83- if i < 1 || i > length (o . counts )
84- return 0.0
158+ ci = binindices (o , y)
159+ if all ( isequal ( 2 ), outindex (o,ci) . I )
160+ return o . counts[ci] * o . meanw[ci] / area (o) / weightsum (o)
85161 else
86- return o . counts[i] / area (o)
162+ return 0.0
87163 end
88164end
89165
90166function _fit! (o:: WeightedHist , x, wt)
91- i = OnlineStats. binindex (o. edges, x, o. left, o. closed)
92- if 1 ≤ i < length (o. edges)
93- o. counts[i] += wt
167+ # length(x) == N || error("You must provide $(N) values for the histogram")
168+ ci = binindices (o, x)
169+ oi = outindex (o,ci)
170+ if all (isequal (2 ),oi. I)
171+ o. counts[ci] += 1
172+ o. meanw[ci] = smooth (o. meanw[ci], wt, 1.0 / o. counts[ci])
94173 else
95- o. out[1 + (i > 0 )] += wt
174+ o. outcount[oi] += 1
175+ o. meanwout[oi] = smooth (o. meanwout[oi], wt, 1.0 / o. outcount[oi])
96176 end
97177end
98178
99179function _merge! (o:: WeightedHist , o2:: WeightedHist )
100180 if o. edges == o2. edges
101181 for j in eachindex (o. counts)
102- o. counts[j] += o2. counts[j]
182+ newcount = o. counts[j] + o2. counts[j]
183+ if newcount > 0
184+ o. meanw[j] = (o. meanw[j]* o. counts[j] + o2. meanw[j]* o2. counts[j])/ newcount
185+ end
186+ o. counts[j] = newcount
187+ end
188+ for j in eachindex (o. outcount)
189+ newcount = o. outcount[j] + o2. outcount[j]
190+ if newcount > 0
191+ o. meanwout[j] = (o. meanwout[j]* o. outcount[j] + o2. meanwout[j]* o2. outcount[j])/ newcount
192+ end
193+ o. outcount[j] = newcount
103194 end
104195 else
105196 @warn (" WeightedHistogram edges do not align. Merging is approximate." )
0 commit comments