Make Add take only one parameter, introduce AddWeighted
This patch renames the previous Add(float64,uint32) to AddWeighted and introduces a method Add(float64) which is simply an alias to AddWeighted(float64,1).
- Id
- 15346ee3016d9fa00f85b08b1fb6bd46fbe59637
- Author
- Caio
- Commit time
- 2017-10-25T14:57:35+02:00
Modified README.md
var t = tdigest.New()
for i := 0; i < 10000; i++ {
- t.Add(rand.Float64(), 1)
+ t.Add(rand.Float64())
}
fmt.Printf("p(.5) = %.6f\n", t.Quantile(0.5))
Modified serialization.go
return nil, err
}
- err = t.Add(means[i], decUint)
+ err = t.AddWeighted(means[i], decUint)
if err != nil {
return nil, err
}
Modified serialization_test.go
func TestSerialization(t *testing.T) {
t1, _ := New()
for i := 0; i < 100; i++ {
- _ = t1.Add(rand.Float64(), 1)
+ _ = t1.Add(rand.Float64())
}
serialized, _ := t1.AsBytes()
Modified tdigest.go
return x1*w1/(w1+w2) + x2*w2/(w1+w2)
}
-// Add registers a new sample in the digest.
+// AddWeighted registers a new sample in the digest.
+//
// It's the main entry point for the digest and very likely the only
// method to be used for collecting samples. The count parameter is for
// when you are registering a sample that occurred multiple times - the
// most common value for this is 1.
-func (t *TDigest) Add(value float64, count uint32) (err error) {
+//
+// This will emit an error if `value` is NaN of if `count` is zero.
+func (t *TDigest) AddWeighted(value float64, count uint32) (err error) {
if count == 0 {
return fmt.Errorf("Illegal datapoint <value: %.4f, count: %d>", value, count)
return err
}
+// Add(x) is an alias for AddWeighted(x,1)
+// Read the documentation for AddWeighted for more details.
+func (t *TDigest) Add(value float64) error {
+ return t.AddWeighted(value, 1)
+}
+
// Compress tries to reduce the number of individual centroids stored
// in the digest.
// Compression trades off accuracy for performance and happens
shuffle(nodes)
for _, item := range nodes {
- err := t.Add(item.mean, item.count)
+ err := t.AddWeighted(item.mean, item.count)
if err != nil {
return err
}
shuffle(nodes)
for _, item := range nodes {
- err := t.Add(item.mean, item.count)
+ err := t.AddWeighted(item.mean, item.count)
if err != nil {
return err
}
Modified tdigest_test.go
t.Errorf("Quantile() on an empty digest should return NaN. Got: %.4f", tdigest.Quantile(0.1))
}
- _ = tdigest.Add(0.4, 1)
+ _ = tdigest.Add(0.4)
if tdigest.Quantile(0.1) != 0.4 {
t.Errorf("Quantile() on a single-sample digest should return the samples's mean. Got %.4f", tdigest.Quantile(0.1))
}
- _ = tdigest.Add(0.5, 1)
+ _ = tdigest.Add(0.5)
if tdigest.summary.Len() != 2 {
t.Errorf("Expected size 2, got %d", tdigest.summary.Len())
}
- err := tdigest.Add(0, 0)
+ err := tdigest.AddWeighted(0, 0)
if err == nil {
- t.Errorf("Expected Add() to error out with input (0,0)")
+ t.Errorf("Expected AddWeighted() to error out with input (0,0)")
}
}
tdigest := uncheckedNew()
for i := 0; i < 100000; i++ {
- _ = tdigest.Add(rand.Float64(), 1)
+ _ = tdigest.Add(rand.Float64())
}
assertDifferenceSmallerThan(tdigest, 0.5, 0.02, t)
}
for i := 0; i < len(data); i++ {
- _ = tdigest.Add(data[i], 1)
+ _ = tdigest.Add(data[i])
assertDifferenceFromQuantile(data[:i+1], tdigest, 0.001, 1.0+0.001*float64(i), t)
assertDifferenceFromQuantile(data[:i+1], tdigest, 0.01, 1.0+0.005*float64(i), t)
sorted := make([]float64, 0, len(data))
for i := 0; i < len(data); i++ {
- _ = tdigest.Add(data[i], 1)
+ _ = tdigest.Add(data[i])
sorted = append(sorted, data[i])
// Estimated quantiles are all over the place for low counts, which is
func TestSingletonInACrowd(t *testing.T) {
tdigest := uncheckedNew()
for i := 0; i < 10000; i++ {
- tdigest.Add(10, 1)
+ tdigest.Add(10)
}
- tdigest.Add(20, 1)
+ tdigest.Add(20)
tdigest.Compress()
for _, q := range []float64{0, 0.5, 0.8, 0.9, 0.99, 0.999} {
data := []float64{0, 279, 2, 281}
for _, f := range data {
- tdigest.Add(f, 1)
+ tdigest.Add(f)
}
quantiles := []float64{0.01, 0.25, 0.5, 0.75, 0.999}
// Create data slice with repeats matching weights we gave to tdigest
data := []float64{}
for i := 0; i < 100; i++ {
- _ = tdigest.Add(float64(i), uint32(i))
+ _ = tdigest.AddWeighted(float64(i), uint32(i))
for j := 0; j < i; j++ {
data = append(data, float64(i))
func TestIntegers(t *testing.T) {
tdigest := uncheckedNew()
- _ = tdigest.Add(1, 1)
- _ = tdigest.Add(2, 1)
- _ = tdigest.Add(3, 1)
+ _ = tdigest.Add(1)
+ _ = tdigest.Add(2)
+ _ = tdigest.Add(3)
if tdigest.Quantile(0.5) != 2 {
t.Errorf("Expected p(0.5) = 2, Got %.2f instead", tdigest.Quantile(0.5))
tdigest = uncheckedNew()
for _, i := range []float64{1, 2, 2, 2, 2, 2, 2, 2, 3} {
- _ = tdigest.Add(i, 1)
+ _ = tdigest.Add(i)
}
if tdigest.Quantile(0.5) != 2 {
num := rand.Float64()
data[i] = num
- _ = dist.Add(num, 1)
- subs[i%numSubs].Add(num, 1)
+ _ = dist.Add(num)
+ subs[i%numSubs].Add(num)
}
dist.Compress()
tdigest := uncheckedNew()
for i := 0; i < 1000; i++ {
- _ = tdigest.Add(rand.Float64(), 1)
+ _ = tdigest.Add(rand.Float64())
}
initialCount := tdigest.count
tdigest := uncheckedNew(Compression(10))
for i := 0; i < 100; i++ {
- _ = tdigest.Add(float64(i), 1)
+ _ = tdigest.Add(float64(i))
}
// Iterate limited number.
b.ResetTimer()
for n := 0; n < b.N; n++ {
- err := t.Add(data[n], 1)
+ err := t.AddWeighted(data[n], 1)
if err != nil {
b.Error(err)
}