caio.co/de/go-tdigest

Add new CDF(float64) public method

Id
921f89b03a40b4e5748f2ff0d115de06c292f889
Author
Caio
Commit time
2017-10-28T16:36:22+02:00

Modified README.md

@@ -55,6 +55,7
}

fmt.Printf("p(.5) = %.6f\n", t.Quantile(0.5))
+ fmt.Printf("CDF(Quantile(.5)) = %.6f\n", t.CDF(t.Quantile(0.5)))
}
```

Modified tdigest.go

@@ -277,6 +277,52
return err
}

+// CDF computes the fraction in which all samples are less than
+// or equal to the given value.
+func (t *TDigest) CDF(value float64) float64 {
+ if t.summary.Len() == 0 {
+ return math.NaN()
+ } else if t.summary.Len() == 1 {
+ if value < t.summary.Mean(0) {
+ return 0
+ } else {
+ return 1
+ }
+ }
+
+ // We have at least 2 centroids
+ left := (t.summary.Mean(1) - t.summary.Mean(0)) / 2
+ right := left
+ tot := 0.0
+
+ for i := 1; i < t.summary.Len()-1; i++ {
+ prevMean := t.summary.Mean(i - 1)
+ if value < prevMean+right {
+ v := (tot + float64(t.summary.Count(i-1))*interpolate(value, prevMean-left, prevMean+right)) / float64(t.Count())
+ if v > 0 {
+ return v
+ }
+ return 0
+ }
+
+ tot += float64(t.summary.Count(i - 1))
+ left = right
+ right = (t.summary.Mean(i+1) - t.summary.Mean(i)) / 2
+ }
+
+ // last centroid
+ lastMean := t.summary.Mean(t.summary.Len() - 1)
+ if value < lastMean+right {
+ lastCount := float64(t.summary.Count(t.summary.Len() - 1))
+ return (tot + lastCount*interpolate(value, lastMean-left, lastMean+right)) / 2
+ }
+ return 1
+}
+
+func interpolate(x, x0, x1 float64) float64 {
+ return (x - x0) / (x1 - x0)
+}
+
// ForEachCentroid calls the specified function for each centroid.
//
// Iteration stops when the supplied function returns false, or when all

Modified tdigest_test.go

@@ -29,10 +29,22
t.Errorf("Quantile() on an empty digest should return NaN. Got: %.4f", tdigest.Quantile(0.1))
}

+ if !math.IsNaN(tdigest.CDF(1)) {
+ t.Errorf("CDF() on an empty digest should return NaN. Got: %.4f", tdigest.CDF(1))
+ }
+
_ = tdigest.Add(0.4)

if tdigest.Quantile(0.1) != 0.4 {
t.Errorf("Quantile() on a single-sample digest should return the samples's mean. Got %.4f", tdigest.Quantile(0.1))
+ }
+
+ if tdigest.CDF(0.3) != 0 {
+ t.Errorf("CDF(x) on digest with a single centroid should return 0 if x < mean")
+ }
+
+ if tdigest.CDF(0.5) != 1 {
+ t.Errorf("CDF(x) on digest with a single centroid should return 1 if x >= mean")
}

_ = tdigest.Add(0.5)
@@ -256,6 +268,19
}
}

+func cdf(x float64, data []float64) float64 {
+ var n1, n2 int
+ for i := 0; i < len(data); i++ {
+ if data[i] < x {
+ n1++
+ }
+ if data[i] <= x {
+ n2++
+ }
+ }
+ return float64(n1+n2) / 2.0 / float64(len(data))
+}
+
func quantile(q float64, data []float64) float64 {
if len(data) == 0 {
return math.NaN()
@@ -325,6 +350,20
if math.Abs(e2) >= 0.015 {
t.Errorf("e2 >= 0.015: parts=%3d q=%.3f e1=%.4f e2=%.4f rel=%.3f real=%.3f",
numSubs, q, e1, e2, math.Abs(e2)/q, z-q)
+ }
+
+ z = cdf(q, data)
+ e1 = dist.CDF(q) - z
+ e2 = dist2.CDF(q) - z
+
+ if math.Abs(e2)/q > 0.3 {
+ t.Errorf("CDF e2 < 0.015: parts=%3d q=%.3f e1=%.4f e2=%.4f rel=%.3f",
+ numSubs, q, e1, e2, math.Abs(e2)/q)
+ }
+
+ if math.Abs(e2) >= 0.015 {
+ t.Errorf("CDF e2 < 0.015: parts=%3d q=%.3f e1=%.4f e2=%.4f rel=%.3f",
+ numSubs, q, e1, e2, math.Abs(e2)/q)
}
}
}