caio.co/de/go-tdigest

Make New() return an error instead of panic()ing

This patch now makes New() return a (*TDigest,error) tuple,
which makes deserialization safe without having to trap
for panic()s.

The only remaining panic() is for bad input in a public
function (`Quantile(float64)`). I'm keen on keeping it.
Id
c6dea31b738119c8399b925f6ccf533b1346966d
Author
Caio
Commit time
2017-10-25T14:36:06+02:00

Modified options.go

@@ -1,6 +1,8
package tdigest

-type tdigestOption func(*TDigest)
+import "errors"
+
+type tdigestOption func(*TDigest) error

// Compression sets the digest compression
//
@@ -13,13 +15,14
// (thus: better precision), which means a bigger serialization payload,
// higher memory footprint and slower addition of new samples.
//
-// Compression must be a value greater of equal to 1, will panic
-// otherwise.
+// Compression must be a value greater of equal to 1, will yield an
+// error otherwise.
func Compression(compression uint32) tdigestOption {
- if compression < 1 {
- panic("Compression should be >= 1")
- }
- return func(t *TDigest) {
+ return func(t *TDigest) error {
+ if compression < 1 {
+ return errors.New("Compression should be >= 1")
+ }
t.compression = float64(compression)
+ return nil
}
}

Modified options_test.go

@@ -1,9 +1,13
package tdigest

import "testing"

func TestDefaults(t *testing.T) {
- digest := New()
+ digest, err := New()
+
+ if err != nil {
+ t.Errorf("Creating a default TDigest should never error out. Got %s", err)
+ }

if digest.compression != 100 {
t.Errorf("The default compression should be 100")
@@ -11,9 +15,13
}

func TestCompression(t *testing.T) {
- if New(Compression(40)).compression != 40 {
+ digest, _ := New(Compression(40))
+ if digest.compression != 40 {
t.Errorf("The compression option should change the new digest compression")
}

- shouldPanic(func() { Compression(0) }, t, "Compression < 1 should panic")
+ digest, err := New(Compression(0))
+ if err == nil || digest != nil {
+ t.Errorf("Trying to create a digest with bad compression should give an error")
+ }
}

Modified serialization.go

@@ -76,7 +76,10
return nil, err
}

- t := New(Compression(uint32(compression)))
+ t, err := New(Compression(uint32(compression)))
+ if err != nil {
+ return nil, err
+ }

var numCentroids int32
err = binary.Read(buf, endianess, &numCentroids)

Modified serialization_test.go

@@ -26,7 +26,7
}

func TestSerialization(t *testing.T) {
- t1 := New()
+ t1, _ := New()
for i := 0; i < 100; i++ {
_ = t1.Add(rand.Float64(), 1)
}

Modified tdigest.go

@@ -24,18 +24,21
//
// By default the digest is constructed with a configuration that
// should be useful for most use-cases.
-func New(options ...tdigestOption) *TDigest {
+func New(options ...tdigestOption) (*TDigest, error) {
tdigest := &TDigest{
compression: 100,
count: 0,
}

for _, option := range options {
- option(tdigest)
+ err := option(tdigest)
+ if err != nil {
+ return nil, err
+ }
}

tdigest.summary = newSummary(estimateCapacity(tdigest.compression))
- return tdigest
+ return tdigest, nil
}

func _quantile(index float64, previousIndex float64, nextIndex float64, previousMean float64, nextMean float64) float64 {

Modified tdigest_test.go

@@ -11,6 +11,11
rand.Seed(0xDEADBEE)
}

+func uncheckedNew(options ...tdigestOption) *TDigest {
+ t, _ := New(options...)
+ return t
+}
+
// Test of tdigest internals and accuracy. Note no t.Parallel():
// during tests the default random seed is consistent, but varying
// concurrency scheduling mixes up the random values used in each test.
@@ -18,7 +23,7
// for all tests. So, no test concurrency here.

func TestTInternals(t *testing.T) {
- tdigest := New()
+ tdigest := uncheckedNew()

if !math.IsNaN(tdigest.Quantile(0.1)) {
t.Errorf("Quantile() on an empty digest should return NaN. Got: %.4f", tdigest.Quantile(0.1))
@@ -59,7 +64,7
}

func TestUniformDistribution(t *testing.T) {
- tdigest := New()
+ tdigest := uncheckedNew()

for i := 0; i < 100000; i++ {
_ = tdigest.Add(rand.Float64(), 1)
@@ -87,7 +92,7
}

func TestSequentialInsertion(t *testing.T) {
- tdigest := New()
+ tdigest := uncheckedNew()

data := make([]float64, 10000)
for i := 0; i < len(data); i++ {
@@ -110,7 +115,7
}

func TestNonSequentialInsertion(t *testing.T) {
- tdigest := New()
+ tdigest := uncheckedNew()

// Not quite a uniform distribution, but close.
data := make([]float64, 1000)
@@ -150,7 +155,7
}

func TestSingletonInACrowd(t *testing.T) {
- tdigest := New()
+ tdigest := uncheckedNew()
for i := 0; i < 10000; i++ {
tdigest.Add(10, 1)
}
@@ -176,7 +181,7
}

func TestRespectBounds(t *testing.T) {
- tdigest := New(Compression(10))
+ tdigest := uncheckedNew(Compression(10))

data := []float64{0, 279, 2, 281}
for _, f := range data {
@@ -196,7 +201,7
}

func TestWeights(t *testing.T) {
- tdigest := New(Compression(10))
+ tdigest := uncheckedNew(Compression(10))

// Create data slice with repeats matching weights we gave to tdigest
data := []float64{}
@@ -220,7 +225,7
}

func TestIntegers(t *testing.T) {
- tdigest := New()
+ tdigest := uncheckedNew()

_ = tdigest.Add(1, 1)
_ = tdigest.Add(2, 1)
@@ -230,7 +235,7
t.Errorf("Expected p(0.5) = 2, Got %.2f instead", tdigest.Quantile(0.5))
}

- tdigest = New()
+ tdigest = uncheckedNew()

for _, i := range []float64{1, 2, 2, 2, 2, 2, 2, 2, 3} {
_ = tdigest.Add(i, 1)
@@ -276,10 +281,10

subs := make([]*TDigest, numSubs)
for i := 0; i < numSubs; i++ {
- subs[i] = New()
+ subs[i] = uncheckedNew()
}

- dist := New()
+ dist := uncheckedNew()
for i := 0; i < numItems; i++ {
num := rand.Float64()

@@ -290,7 +295,7

dist.Compress()

- dist2 := New()
+ dist2 := uncheckedNew()
for i := 0; i < numSubs; i++ {
dist2.Merge(subs[i])
}
@@ -326,7 +331,7
}

func TestCompressDoesntChangeCount(t *testing.T) {
- tdigest := New()
+ tdigest := uncheckedNew()

for i := 0; i < 1000; i++ {
_ = tdigest.Add(rand.Float64(), 1)
@@ -355,7 +360,7
}

func TestPanic(t *testing.T) {
- tdigest := New()
+ tdigest := uncheckedNew()

shouldPanic(func() {
tdigest.Quantile(-42)
@@ -367,7 +372,7
}

func TestForEachCentroid(t *testing.T) {
- tdigest := New(Compression(10))
+ tdigest := uncheckedNew(Compression(10))

for i := 0; i < 100; i++ {
_ = tdigest.Add(float64(i), 1)
@@ -395,7 +400,7
}

func benchmarkAdd(compression uint32, b *testing.B) {
- t := New(Compression(compression))
+ t := uncheckedNew(Compression(compression))

data := make([]float64, b.N)
for n := 0; n < b.N; n++ {