Skip to content

Commit 8e315f1

Browse files
committed
big speedup for "Last N" statistics
Use O(1) algorithms to compute the "Last N" min/max/mean/stddev statistics.
1 parent 8073aaa commit 8e315f1

1 file changed

Lines changed: 66 additions & 50 deletions

File tree

prettyping

Lines changed: 66 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -377,23 +377,63 @@ function print_newlines_if_needed() {
377377
378378
# Clears the data structure.
379379
function clear(d) {
380-
d["index"] = 0 # The next position to store a value
381-
d["size"] = 0 # The array size, goes up to LAST_N
380+
split("", d)
381+
d["n"] = 0 # number of vals in the window, goes up to LAST_N
382+
d["avg"] = 0 # mean
383+
d["s2"] = 0 # sum of squared differences from the current mean, for Welfords method
384+
d["sd"] = 0 # standard deviation
385+
d["min","-"] = d["min","+"] = 0 # min deque
386+
d["max","-"] = d["max","+"] = 0 # max deque
387+
d["vals","-"] = d["vals","+"] = 0 # vals deque
382388
}
383389
384-
# This function stores the value to the passed data structure.
385-
# The data structure holds at most LAST_N values. When it is full,
386-
# a new value overwrite the oldest one.
387-
function store(d, value) {
388-
d[d["index"]] = value
389-
d["index"]++
390-
if ( d["index"] >= d["size"] ) {
391-
if ( d["size"] < LAST_N ) {
392-
d["size"]++
393-
} else {
394-
d["index"] = 0
395-
}
390+
# e.g. https://www.nayuki.io/page/sliding-window-minimum-maximum-algorithm
391+
function update_minmax(d, s, sign, val, oldval) {
392+
# Remove vals that cannot possibly be the min or max anymore, since they have been superseded by this new val
393+
while (d[s,"+"] > d[s,"-"] && (val - d[s,d[s,"+"]-1]) * sign > 0)
394+
delete d[s,--d[s,"+"]]
395+
# Add our new val
396+
d[s,d[s,"+"]++] = val
397+
# Remove an old min or max that has rolled out of the window
398+
if (d[s,d[s,"-"]] == oldval)
399+
delete d[s,d[s,"-"]++]
400+
# Store the current min or max for easy access
401+
d[s] = d[s,d[s,"-"]]
402+
}
403+
404+
# Uses Welfords method for computing variance (online and numerically stable)
405+
# e.g. https://github.com/ajcr/rolling/blob/master/rolling/stats/variance.py
406+
function store(d, val,
407+
oldval, delta) {
408+
# Add new val
409+
d["vals",d["vals","+"]++] = val
410+
d["n"]++
411+
412+
# Update Welfords method with new val
413+
delta = val - d["avg"]
414+
d["avg"] += delta / d["n"]
415+
d["s2"] += delta * (val - d["avg"])
416+
417+
if (d["n"] <= LAST_N) {
418+
# Window is still growing, so do not need to remove the oldest val
419+
oldval = "none"
396420
}
421+
else {
422+
# Remove old val that has rolled out of the window
423+
oldval = d["vals",d["vals","-"]]
424+
delete d["vals",d["vals","-"]++]
425+
d["n"]--
426+
427+
# Update Welfords method in reverse to remove old val
428+
delta = oldval - d["avg"]
429+
d["avg"] -= delta / d["n"]
430+
d["s2"] -= delta * (oldval - d["avg"])
431+
}
432+
433+
d["sd"] = sqrt(abs(d["s2"]) / d["n"])
434+
435+
update_minmax(d, "min", -1, val, oldval)
436+
update_minmax(d, "max", +1, val, oldval)
397437
}
398438
399439
############################################################
@@ -520,44 +560,20 @@ function print_global_stats(percentage_lost, avg_rtt) {
520560
}
521561
}
522562
523-
# All arguments are just local variables.
524-
function print_recent_stats(i, percentage_lost, sum, min, avg, max, diffs) {
525-
# Calculate and print the lost packets statistics
526-
sum = 0
527-
for ( i=0 ; i<lastn_lost["size"] ; i++ ) {
528-
sum += lastn_lost[i]
529-
}
530-
percentage_lost = (lastn_lost["size"] > 0) ? (sum*100/lastn_lost["size"]) : 0
531-
printf( "%2d/%3d (%2d%%) lost; ",
532-
sum,
533-
lastn_lost["size"],
534-
percentage_lost )
535-
536-
# Calculate the min/avg/max rtt times
537-
sum = diffs = 0
538-
min = max = lastn_rtt[0]
539-
for ( i=0 ; i<lastn_rtt["size"] ; i++ ) {
540-
sum += lastn_rtt[i]
541-
if ( lastn_rtt[i] < min ) min = lastn_rtt[i]
542-
if ( lastn_rtt[i] > max ) max = lastn_rtt[i]
543-
}
544-
avg = (lastn_rtt["size"]) ? (sum/lastn_rtt["size"]) : 0
545-
546-
# Calculate mdev (mean absolute deviation)
547-
for ( i=0 ; i<lastn_rtt["size"] ; i++ ) {
548-
diffs += abs(lastn_rtt[i] - avg)
549-
}
550-
if ( lastn_rtt["size"] > 0 ) {
551-
diffs /= lastn_rtt["size"]
552-
}
563+
function print_recent_stats() {
564+
# Print the lost packets statistics
565+
printf( "%2.0f/%3d (%2d%%) lost; ",
566+
lastn_lost["avg"] * lastn_lost["n"],
567+
lastn_lost["n"],
568+
lastn_lost["avg"] * 100.0 )
553569
554570
# Print the rtt statistics
555571
printf( "%4.0f/" ESC_BOLD "%4.0f" ESC_DEFAULT "/%4.0f/%4.0fms (last %d)",
556-
min,
557-
avg,
558-
max,
559-
diffs,
560-
lastn_rtt["size"] )
572+
lastn_rtt["min"],
573+
lastn_rtt["avg"],
574+
lastn_rtt["max"],
575+
lastn_rtt["sd"],
576+
lastn_rtt["n"] )
561577
}
562578
563579
function print_statistics_bar() {
@@ -779,7 +795,7 @@ BEGIN {
779795
# $4 = time
780796
781797
# This must be called before incrementing the last_seq variable!
782-
rtt = int($4)
798+
rtt = $4 + 0.0
783799
process_rtt(rtt)
784800
785801
seq = int($2)

0 commit comments

Comments
 (0)