From 9784cae23b55104816701bdadb9a715a3146055e Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Thu, 1 Oct 2020 13:56:46 -0700 Subject: [PATCH] util/uniq: add new package This makes it easy to compact slices that contain duplicate elements by sorting and then uniqing. This is an alternative to constructing an intermediate map and then extracting elements from it. It also provides more control over equality than using a map key does. Signed-off-by: Josh Bleecher Snyder --- util/uniq/slice.go | 65 ++++++++++++++++++++++++++++++ util/uniq/slice_test.go | 88 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 153 insertions(+) create mode 100644 util/uniq/slice.go create mode 100644 util/uniq/slice_test.go diff --git a/util/uniq/slice.go b/util/uniq/slice.go new file mode 100644 index 000000000..37898de44 --- /dev/null +++ b/util/uniq/slice.go @@ -0,0 +1,65 @@ +// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package uniq provides removal of adjacent duplicate elements in slices. +// It is similar to the unix command uniq. +package uniq + +import ( + "fmt" + "reflect" +) + +type badTypeError struct { + typ reflect.Type +} + +func (e badTypeError) Error() string { + return fmt.Sprintf("uniq.ModifySlice's first argument must have type *[]T, got %v", e.typ) +} + +// ModifySlice removes adjacent duplicate elements from the slice pointed to by sliceptr. +// It adjusts the length of the slice appropriately and zeros the tail. +// eq reports whether (*sliceptr)[i] and (*sliceptr)[j] are equal. +// ModifySlice does O(len(*sliceptr)) operations. +func ModifySlice(sliceptr interface{}, eq func(i, j int) bool) { + rvp := reflect.ValueOf(sliceptr) + if rvp.Type().Kind() != reflect.Ptr { + panic(badTypeError{rvp.Type()}) + } + rv := rvp.Elem() + if rv.Type().Kind() != reflect.Slice { + panic(badTypeError{rvp.Type()}) + } + + length := rv.Len() + dst := 0 + for i := 1; i < length; i++ { + if eq(dst, i) { + continue + } + dst++ + // slice[dst] = slice[i] + rv.Index(dst).Set(rv.Index(i)) + } + + end := dst + 1 + var zero reflect.Value + if end < length { + zero = reflect.Zero(rv.Type().Elem()) + } + + // for i := range slice[end:] { + // size[i] = 0/nil/{} + // } + for i := end; i < length; i++ { + // slice[i] = 0/nil/{} + rv.Index(i).Set(zero) + } + + // slice = slice[:end] + if end < length { + rv.SetLen(end) + } +} diff --git a/util/uniq/slice_test.go b/util/uniq/slice_test.go new file mode 100644 index 000000000..2455cdc4c --- /dev/null +++ b/util/uniq/slice_test.go @@ -0,0 +1,88 @@ +// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package uniq_test + +import ( + "reflect" + "strconv" + "testing" + + "tailscale.com/util/uniq" +) + +func TestModifySlice(t *testing.T) { + tests := []struct { + in []int + want []int + }{ + {in: []int{0, 1, 2}, want: []int{0, 1, 2}}, + {in: []int{0, 1, 2, 2}, want: []int{0, 1, 2}}, + {in: []int{0, 0, 1, 2}, want: []int{0, 1, 2}}, + {in: []int{0, 1, 0, 2}, want: []int{0, 1, 0, 2}}, + {in: []int{0}, want: []int{0}}, + {in: []int{0, 0}, want: []int{0}}, + {in: []int{}, want: []int{}}, + } + + for _, test := range tests { + in := make([]int, len(test.in)) + copy(in, test.in) + uniq.ModifySlice(&test.in, func(i, j int) bool { return test.in[i] == test.in[j] }) + if !reflect.DeepEqual(test.in, test.want) { + t.Errorf("uniq.Slice(%v) = %v, want %v", in, test.in, test.want) + } + start := len(test.in) + test.in = test.in[:cap(test.in)] + for i := start; i < len(in); i++ { + if test.in[i] != 0 { + t.Errorf("uniq.Slice(%v): non-0 in tail of %v at index %v", in, test.in, i) + } + } + } +} + +func Benchmark(b *testing.B) { + benches := []struct { + name string + reset func(s []byte) + }{ + {name: "AllDups", + reset: func(s []byte) { + for i := range s { + s[i] = '*' + } + }, + }, + {name: "NoDups", + reset: func(s []byte) { + for i := range s { + s[i] = byte(i) + } + }, + }, + } + + for _, bb := range benches { + b.Run(bb.name, func(b *testing.B) { + for size := 1; size <= 4096; size *= 16 { + b.Run(strconv.Itoa(size), func(b *testing.B) { + benchmark(b, 64, bb.reset) + }) + } + }) + } +} + +func benchmark(b *testing.B, size int64, reset func(s []byte)) { + b.ReportAllocs() + b.SetBytes(size) + s := make([]byte, size) + b.ResetTimer() + for i := 0; i < b.N; i++ { + s = s[:size] + reset(s) + uniq.ModifySlice(&s, func(i, j int) bool { return s[i] == s[j] }) + } +}