124 lines
3.1 KiB
Go
124 lines
3.1 KiB
Go
// Copyright 2021 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package stats
|
|
|
|
import (
|
|
"container/heap"
|
|
"math"
|
|
)
|
|
|
|
type maxHeap []float64
|
|
|
|
func (h maxHeap) Len() int { return len(h) }
|
|
func (h maxHeap) Less(i, j int) bool { return h[i] > h[j] }
|
|
func (h maxHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
|
|
func (h *maxHeap) Push(x interface{}) {
|
|
*h = append(*h, x.(float64))
|
|
}
|
|
func (h *maxHeap) Pop() interface{} {
|
|
old := *h
|
|
n := len(old)
|
|
x := old[n-1]
|
|
*h = old[0 : n-1]
|
|
return x
|
|
}
|
|
|
|
type minHeap []float64
|
|
|
|
func (h minHeap) Len() int { return len(h) }
|
|
func (h minHeap) Less(i, j int) bool { return h[i] < h[j] }
|
|
func (h minHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
|
|
func (h *minHeap) Push(x interface{}) {
|
|
*h = append(*h, x.(float64))
|
|
}
|
|
func (h *minHeap) Pop() interface{} {
|
|
old := *h
|
|
n := len(old)
|
|
x := old[n-1]
|
|
*h = old[0 : n-1]
|
|
return x
|
|
}
|
|
|
|
// addToHeaps adds a value to the appropriate heap,and keeps their sizes close.
|
|
// This is a direct translation of the addToHeaps function described in the
|
|
// paper.
|
|
func addToHeaps(min *minHeap, max *maxHeap, x float64) {
|
|
// NB: There's an ambiguity in the paper here related to if-then/else
|
|
// evaluation. This structure seems to yield the desired results.
|
|
if min.Len() == 0 || x < (*min)[0] {
|
|
heap.Push(max, x)
|
|
} else {
|
|
heap.Push(min, x)
|
|
}
|
|
if min.Len() > max.Len()+1 {
|
|
heap.Push(max, heap.Pop(min))
|
|
} else if max.Len() > min.Len()+1 {
|
|
heap.Push(min, heap.Pop(max))
|
|
}
|
|
}
|
|
|
|
// getMedian finds the median of the min and max heaps.
|
|
// This is a direct translation of the getMedian function described in the
|
|
// paper.
|
|
func getMedian(min minHeap, max maxHeap) float64 {
|
|
if min.Len() > max.Len() {
|
|
return min[0]
|
|
}
|
|
if max.Len() > min.Len() {
|
|
return max[0]
|
|
}
|
|
return (max[0] + min[0]) / 2
|
|
}
|
|
|
|
// edmx implements the EDM-X algorithm.
|
|
// This algorithm runs in place, modifying the data.
|
|
func edmx(input []float64, delta int) int {
|
|
input = normalize(input)
|
|
var lmax, rmax maxHeap
|
|
var lmin, rmin minHeap
|
|
heap.Init(&lmax)
|
|
heap.Init(&rmax)
|
|
heap.Init(&lmin)
|
|
heap.Init(&rmin)
|
|
bestStat := math.Inf(-1)
|
|
bestLoc := -1
|
|
|
|
for i := 0; i < delta-1; i++ {
|
|
addToHeaps(&lmin, &lmax, input[i])
|
|
}
|
|
for i := delta; i < len(input)-delta+1; i++ {
|
|
addToHeaps(&lmin, &lmax, input[i-1])
|
|
ml := getMedian(lmin, lmax)
|
|
rmax, rmin = rmax[:0], rmin[:0]
|
|
for j := i; j < i+delta-1; j++ {
|
|
addToHeaps(&rmin, &rmax, input[j])
|
|
}
|
|
for j := i + delta; j < len(input)+1; j++ {
|
|
addToHeaps(&rmin, &rmax, input[j-1])
|
|
mr := getMedian(rmin, rmax)
|
|
stat := float64(i*(j-i)) / float64(j) * (ml - mr) * (ml - mr)
|
|
if stat > bestStat {
|
|
bestStat = stat
|
|
bestLoc = i
|
|
}
|
|
}
|
|
}
|
|
|
|
return bestLoc
|
|
}
|
|
|
|
// EDMX runs the EDM-X algorithm on a slice of floats.
|
|
func EDMX(input []float64, delta int) int {
|
|
// edmx modfies the input, don't do that.
|
|
c := make([]float64, len(input))
|
|
copy(c, input)
|
|
return edmx(c, delta)
|
|
}
|
|
|
|
// EDMXInt runs the EDM-X algorithm on a slice of integer datapoints.
|
|
func EDMXInt(input []int, delta int) int {
|
|
return edmx(toFloat(input), delta)
|
|
}
|