package topk import ( "sort" ) // http://www.cs.ucsb.edu/research/tech_reports/reports/2005-23.pdf type Element struct { Value string Count int } type Samples []*Element func (sm Samples) Len() int { return len(sm) } func (sm Samples) Less(i, j int) bool { return sm[i].Count < sm[j].Count } func (sm Samples) Swap(i, j int) { sm[i], sm[j] = sm[j], sm[i] } type Stream struct { k int mon map[string]*Element // the minimum Element min *Element } func New(k int) *Stream { s := new(Stream) s.k = k s.mon = make(map[string]*Element) s.min = &Element{} // Track k+1 so that less frequenet items contended for that spot, // resulting in k being more accurate. return s } func (s *Stream) Insert(x string) { s.insert(&Element{x, 1}) } func (s *Stream) Merge(sm Samples) { for _, e := range sm { s.insert(e) } } func (s *Stream) insert(in *Element) { e := s.mon[in.Value] if e != nil { e.Count++ } else { if len(s.mon) < s.k+1 { e = &Element{in.Value, in.Count} s.mon[in.Value] = e } else { e = s.min delete(s.mon, e.Value) e.Value = in.Value e.Count += in.Count s.min = e } } if e.Count < s.min.Count { s.min = e } } func (s *Stream) Query() Samples { var sm Samples for _, e := range s.mon { sm = append(sm, e) } sort.Sort(sort.Reverse(sm)) if len(sm) < s.k { return sm } return sm[:s.k] }