caio.co/de/go-tdigest

Add MergeDestructive

Id
fc4de7be449bb6d60ead8feb924dd8649d8256da
Author
Vladimir Mihailenco
Commit time
2018-11-08T15:29:24+02:00

Modified summary.go

@@ -1,8 +1,9
package tdigest

import (
"fmt"
"math"
+ "math/rand"
"sort"
)

@@ -159,6 +160,24
means: append([]float64{}, s.means...),
counts: append([]uint32{}, s.counts...),
}
+}
+
+// Randomly shuffles summary contents, so they can be added to another summary
+// with being pathological. Renders summary invalid.
+func (s *summary) shuffle(rng RNG) {
+ for i := len(s.means) - 1; i > 1; i-- {
+ s.Swap(i, rand.Intn(i+1))
+ }
+}
+
+// for sort.Interface
+func (s *summary) Swap(i, j int) {
+ s.means[i], s.means[j] = s.means[j], s.means[i]
+ s.counts[i], s.counts[j] = s.counts[j], s.counts[i]
+}
+
+func (s *summary) Less(i, j int) bool {
+ return s.means[i] < s.means[j]
}

// A simple loop unroll saves a surprising amount of time.

Modified tdigest.go

@@ -228,7 +228,8
t.summary = newSummary(estimateCapacity(t.compression))
t.count = 0

- oldTree.Perm(t.rng, func(mean float64, count uint32) bool {
+ oldTree.shuffle(t.rng)
+ oldTree.ForEach(func(mean float64, count uint32) bool {
err = t.AddWeighted(mean, count)
return err == nil
})
@@ -247,6 +248,20
}

other.summary.Perm(t.rng, func(mean float64, count uint32) bool {
+ err = t.AddWeighted(mean, count)
+ return err == nil
+ })
+ return err
+}
+
+// As Merge, above, but leaves other in a scrambled state
+func (t *TDigest) MergeDestructive(other *TDigest) (err error) {
+ if other.summary.Len() == 0 {
+ return nil
+ }
+
+ other.summary.shuffle(t.rng)
+ other.summary.ForEach(func(mean float64, count uint32) bool {
err = t.AddWeighted(mean, count)
return err == nil
})

Modified tdigest_test.go

@@ -298,7 +298,15
return data[int(index)+1]*(index-float64(int(index))) + data[int(index)]*(float64(int(index)+1)-index)
}

-func TestMerge(t *testing.T) {
+func TestMergeNormal(t *testing.T) {
+ testMerge(t, false)
+}
+
+func TestMergeDescructive(t *testing.T) {
+ testMerge(t, true)
+}
+
+func testMerge(t *testing.T, destructive bool) {
if testing.Short() {
t.Skipf("Skipping merge test. Short flag is on")
}
@@ -326,7 +334,12

dist2 := uncheckedNew()
for i := 0; i < numSubs; i++ {
- _ = dist2.Merge(subs[i])
+ if destructive {
+ _ = dist2.MergeDestructive(subs[i])
+ } else {
+ _ = dist2.Merge(subs[i])
+ }
+
}

if dist.Count() != dist2.Count() {