caio.co/de/go-tdigest

Make Add take only one parameter, introduce AddWeighted

This patch renames the previous Add(float64,uint32) to AddWeighted
and introduces a method Add(float64) which is simply an alias
to AddWeighted(float64,1).
Id
15346ee3016d9fa00f85b08b1fb6bd46fbe59637
Author
Caio
Commit time
2017-10-25T14:57:35+02:00

Modified README.md

@@ -44,7 +44,7
var t = tdigest.New()

for i := 0; i < 10000; i++ {
- t.Add(rand.Float64(), 1)
+ t.Add(rand.Float64())
}

fmt.Printf("p(.5) = %.6f\n", t.Quantile(0.5))

Modified serialization.go

@@ -109,7 +109,7
return nil, err
}

- err = t.Add(means[i], decUint)
+ err = t.AddWeighted(means[i], decUint)
if err != nil {
return nil, err
}

Modified serialization_test.go

@@ -28,7 +28,7
func TestSerialization(t *testing.T) {
t1, _ := New()
for i := 0; i < 100; i++ {
- _ = t1.Add(rand.Float64(), 1)
+ _ = t1.Add(rand.Float64())
}

serialized, _ := t1.AsBytes()

Modified tdigest.go

@@ -106,12 +106,15
return x1*w1/(w1+w2) + x2*w2/(w1+w2)
}

-// Add registers a new sample in the digest.
+// AddWeighted registers a new sample in the digest.
+//
// It's the main entry point for the digest and very likely the only
// method to be used for collecting samples. The count parameter is for
// when you are registering a sample that occurred multiple times - the
// most common value for this is 1.
-func (t *TDigest) Add(value float64, count uint32) (err error) {
+//
+// This will emit an error if `value` is NaN of if `count` is zero.
+func (t *TDigest) AddWeighted(value float64, count uint32) (err error) {

if count == 0 {
return fmt.Errorf("Illegal datapoint <value: %.4f, count: %d>", value, count)
@@ -180,6 +183,12
return err
}

+// Add(x) is an alias for AddWeighted(x,1)
+// Read the documentation for AddWeighted for more details.
+func (t *TDigest) Add(value float64) error {
+ return t.AddWeighted(value, 1)
+}
+
// Compress tries to reduce the number of individual centroids stored
// in the digest.
// Compression trades off accuracy for performance and happens
@@ -198,7 +207,7
shuffle(nodes)

for _, item := range nodes {
- err := t.Add(item.mean, item.count)
+ err := t.AddWeighted(item.mean, item.count)
if err != nil {
return err
}
@@ -221,7 +230,7
shuffle(nodes)

for _, item := range nodes {
- err := t.Add(item.mean, item.count)
+ err := t.AddWeighted(item.mean, item.count)
if err != nil {
return err
}

Modified tdigest_test.go

@@ -29,22 +29,22
t.Errorf("Quantile() on an empty digest should return NaN. Got: %.4f", tdigest.Quantile(0.1))
}

- _ = tdigest.Add(0.4, 1)
+ _ = tdigest.Add(0.4)

if tdigest.Quantile(0.1) != 0.4 {
t.Errorf("Quantile() on a single-sample digest should return the samples's mean. Got %.4f", tdigest.Quantile(0.1))
}

- _ = tdigest.Add(0.5, 1)
+ _ = tdigest.Add(0.5)

if tdigest.summary.Len() != 2 {
t.Errorf("Expected size 2, got %d", tdigest.summary.Len())
}

- err := tdigest.Add(0, 0)
+ err := tdigest.AddWeighted(0, 0)

if err == nil {
- t.Errorf("Expected Add() to error out with input (0,0)")
+ t.Errorf("Expected AddWeighted() to error out with input (0,0)")
}
}

@@ -67,7 +67,7
tdigest := uncheckedNew()

for i := 0; i < 100000; i++ {
- _ = tdigest.Add(rand.Float64(), 1)
+ _ = tdigest.Add(rand.Float64())
}

assertDifferenceSmallerThan(tdigest, 0.5, 0.02, t)
@@ -100,7 +100,7
}

for i := 0; i < len(data); i++ {
- _ = tdigest.Add(data[i], 1)
+ _ = tdigest.Add(data[i])

assertDifferenceFromQuantile(data[:i+1], tdigest, 0.001, 1.0+0.001*float64(i), t)
assertDifferenceFromQuantile(data[:i+1], tdigest, 0.01, 1.0+0.005*float64(i), t)
@@ -127,7 +127,7
sorted := make([]float64, 0, len(data))

for i := 0; i < len(data); i++ {
- _ = tdigest.Add(data[i], 1)
+ _ = tdigest.Add(data[i])
sorted = append(sorted, data[i])

// Estimated quantiles are all over the place for low counts, which is
@@ -157,9 +157,9
func TestSingletonInACrowd(t *testing.T) {
tdigest := uncheckedNew()
for i := 0; i < 10000; i++ {
- tdigest.Add(10, 1)
+ tdigest.Add(10)
}
- tdigest.Add(20, 1)
+ tdigest.Add(20)
tdigest.Compress()

for _, q := range []float64{0, 0.5, 0.8, 0.9, 0.99, 0.999} {
@@ -185,7 +185,7

data := []float64{0, 279, 2, 281}
for _, f := range data {
- tdigest.Add(f, 1)
+ tdigest.Add(f)
}

quantiles := []float64{0.01, 0.25, 0.5, 0.75, 0.999}
@@ -206,7 +206,7
// Create data slice with repeats matching weights we gave to tdigest
data := []float64{}
for i := 0; i < 100; i++ {
- _ = tdigest.Add(float64(i), uint32(i))
+ _ = tdigest.AddWeighted(float64(i), uint32(i))

for j := 0; j < i; j++ {
data = append(data, float64(i))
@@ -227,9 +227,9
func TestIntegers(t *testing.T) {
tdigest := uncheckedNew()

- _ = tdigest.Add(1, 1)
- _ = tdigest.Add(2, 1)
- _ = tdigest.Add(3, 1)
+ _ = tdigest.Add(1)
+ _ = tdigest.Add(2)
+ _ = tdigest.Add(3)

if tdigest.Quantile(0.5) != 2 {
t.Errorf("Expected p(0.5) = 2, Got %.2f instead", tdigest.Quantile(0.5))
@@ -238,7 +238,7
tdigest = uncheckedNew()

for _, i := range []float64{1, 2, 2, 2, 2, 2, 2, 2, 3} {
- _ = tdigest.Add(i, 1)
+ _ = tdigest.Add(i)
}

if tdigest.Quantile(0.5) != 2 {
@@ -289,8 +289,8
num := rand.Float64()

data[i] = num
- _ = dist.Add(num, 1)
- subs[i%numSubs].Add(num, 1)
+ _ = dist.Add(num)
+ subs[i%numSubs].Add(num)
}

dist.Compress()
@@ -334,7 +334,7
tdigest := uncheckedNew()

for i := 0; i < 1000; i++ {
- _ = tdigest.Add(rand.Float64(), 1)
+ _ = tdigest.Add(rand.Float64())
}

initialCount := tdigest.count
@@ -375,7 +375,7
tdigest := uncheckedNew(Compression(10))

for i := 0; i < 100; i++ {
- _ = tdigest.Add(float64(i), 1)
+ _ = tdigest.Add(float64(i))
}

// Iterate limited number.
@@ -409,7 +409,7

b.ResetTimer()
for n := 0; n < b.N; n++ {
- err := t.Add(data[n], 1)
+ err := t.AddWeighted(data[n], 1)
if err != nil {
b.Error(err)
}