|
1 | 1 | package pattern
|
2 | 2 |
|
3 | 3 | import (
|
| 4 | + "math/rand" |
| 5 | + "strconv" |
| 6 | + "strings" |
4 | 7 | "testing"
|
5 | 8 |
|
6 | 9 | "github.com/stretchr/testify/assert"
|
7 | 10 | )
|
8 | 11 |
|
9 |
| -func TestPrefixFunction(t *testing.T) { |
| 12 | +func testFindSequence(a *assert.Assertions, cnt int, needles []string, haystack string) { |
| 13 | + var needlesB [][]byte |
| 14 | + for _, needle := range needles { |
| 15 | + needlesB = append(needlesB, []byte(needle)) |
| 16 | + } |
| 17 | + res := findSequence([]byte(haystack), needlesB) |
| 18 | + a.Equal(cnt, res, "wrong total number of matches") |
| 19 | +} |
| 20 | + |
| 21 | +func TestFindSequence(t *testing.T) { |
10 | 22 | a := assert.New(t)
|
11 |
| - var str *substring |
12 | 23 |
|
13 |
| - str = newSubstringPattern([]byte("aabaaab")) |
14 |
| - a.Equal([]int32{0, 1, 0, 1, 2, 2, 3}, str.prefFunc, "wrong prefix function") |
| 24 | + testFindSequence(a, 2, []string{"abra", "ada"}, "abracadabra") |
| 25 | + testFindSequence(a, 2, []string{"aba", "aba"}, "abacaba") |
| 26 | + testFindSequence(a, 2, []string{"aba", "caba"}, "abacaba") |
| 27 | + testFindSequence(a, 1, []string{"abacaba"}, "abacaba") |
| 28 | + testFindSequence(a, 0, []string{"abacaba"}, "aba") |
| 29 | + testFindSequence(a, 1, []string{"aba"}, "abacaba") |
| 30 | + testFindSequence(a, 0, []string{"dad"}, "abacaba") |
| 31 | + testFindSequence(a, 1, []string{"aba", "dad"}, "abacaba") |
| 32 | + testFindSequence(a, 0, []string{"dad", "aba"}, "abacaba") |
15 | 33 |
|
16 |
| - str = newSubstringPattern([]byte("abacaba")) |
17 |
| - a.Equal([]int32{0, 0, 1, 0, 1, 2, 3}, str.prefFunc, "wrong prefix function") |
| 34 | + testFindSequence(a, 2, []string{"needle", "haystack"}, "can you find a needle in a haystack?") |
| 35 | + testFindSequence(a, 2, []string{"k8s_pod", "_prod"}, "\"k8s_pod\":{\"main_prod\"}") |
18 | 36 |
|
19 |
| - str = newSubstringPattern([]byte("abracadabra")) |
20 |
| - a.Equal([]int32{0, 0, 0, 1, 0, 1, 0, 1, 2, 3, 4}, str.prefFunc, "wrong prefix function") |
| 37 | + testFindSequence(a, 2, []string{"!13", "37#"}, "woah!13@37#test") |
21 | 38 |
|
22 |
| - str = newSubstringPattern([]byte("abacdadba")) |
23 |
| - a.Equal([]int32{0, 0, 1, 0, 0, 1, 0, 0, 1}, str.prefFunc, "wrong prefix function") |
| 39 | + testFindSequence(a, 1, []string{"abc"}, strings.Repeat("ab", 1024)+"c") |
| 40 | +} |
24 | 41 |
|
25 |
| - str = newSubstringPattern([]byte("!@#\"123{}();'!@#")) |
26 |
| - a.Equal([]int32{1, 2, 3}, str.prefFunc[len(str.prefFunc)-3:], "wrong prefix function") |
| 42 | +func BenchmarkFindSequence_Deterministic(b *testing.B) { |
| 43 | + type testCase struct { |
| 44 | + haystack []byte |
| 45 | + needles [][]byte |
| 46 | + } |
27 | 47 |
|
28 |
| - str = newSubstringPattern([]byte("template#find template in templates text")) |
29 |
| - a.Equal(int32(8), str.prefFunc[21], "wrong prefix function") |
30 |
| - a.Equal(int32(8), str.prefFunc[33], "wrong prefix function") |
31 |
| - a.Equal(int32(0), str.prefFunc[34], "wrong prefix function") |
32 |
| -} |
| 48 | + type namedTestCase struct { |
| 49 | + name string |
| 50 | + cases []testCase |
| 51 | + } |
33 | 52 |
|
34 |
| -func testSubstring(a *assert.Assertions, cnt int, substr, text string) { |
35 |
| - subs := newSubstringPattern([]byte(substr)) |
36 |
| - str := []byte(text) |
37 |
| - total := 0 |
38 |
| - for { |
39 |
| - to := findSubstring(str, subs) |
40 |
| - if to == -1 { |
41 |
| - break |
42 |
| - } |
43 |
| - total++ |
44 |
| - a.Equal(string(subs.val), string(str[to-len(subs.val):to]), "substring doesn't match") |
45 |
| - str = str[to:] |
| 53 | + testCases := []namedTestCase{ |
| 54 | + { |
| 55 | + name: "regular-cases", |
| 56 | + cases: []testCase{ |
| 57 | + {bb("Hello, world!"), [][]byte{bb("orl")}}, |
| 58 | + {bb("some-k8s-service"), [][]byte{bb("k8s")}}, |
| 59 | + }, |
| 60 | + }, |
| 61 | + { |
| 62 | + name: "corner-cases", |
| 63 | + cases: []testCase{ |
| 64 | + {bb(strings.Repeat("ab", 32) + "c"), [][]byte{bb("abc")}}, |
| 65 | + {bb(strings.Repeat("ab", 64) + "c"), [][]byte{bb("abc")}}, |
| 66 | + {bb(strings.Repeat("ab", 1024) + "c"), [][]byte{bb("abc")}}, |
| 67 | + {bb(strings.Repeat("ab", 16384) + "c"), [][]byte{bb("abc")}}, |
| 68 | + }, |
| 69 | + }, |
46 | 70 | }
|
47 |
| - a.Equal(cnt, total, "wrong total number of matches") |
48 |
| -} |
49 | 71 |
|
50 |
| -func testSequence(a *assert.Assertions, cnt int, substr []string, text string) { |
51 |
| - subs := make([]*substring, len(substr)) |
52 |
| - for i, s := range substr { |
53 |
| - subs[i] = newSubstringPattern([]byte(s)) |
| 72 | + for _, tc := range testCases { |
| 73 | + for i, c := range tc.cases { |
| 74 | + b.Run(tc.name+"-"+strconv.Itoa(i), func(b *testing.B) { |
| 75 | + for b.Loop() { |
| 76 | + findSequence([]byte(c.haystack), c.needles) |
| 77 | + } |
| 78 | + }) |
| 79 | + } |
54 | 80 | }
|
55 |
| - res := findSequence([]byte(text), subs) |
56 |
| - a.Equal(cnt, res, "wrong total number of matches") |
57 | 81 | }
|
58 | 82 |
|
59 |
| -func TestSubstring(t *testing.T) { |
60 |
| - a := assert.New(t) |
| 83 | +func BenchmarkFindSequence_Random(b *testing.B) { |
| 84 | + sizes := []struct { |
| 85 | + name string |
| 86 | + haystackSize int |
| 87 | + needleSize int |
| 88 | + needleCount int |
| 89 | + }{ |
| 90 | + {"tiny", 64, 3, 2}, |
| 91 | + {"small", 256, 10, 3}, |
| 92 | + {"medium", 1024, 50, 5}, |
| 93 | + {"large", 16384, 200, 10}, |
| 94 | + {"extra-large", 1048576, 1024, 100}, |
| 95 | + } |
61 | 96 |
|
62 |
| - testSubstring(a, 2, "aba", "abacaba") |
63 |
| - testSubstring(a, 0, "abc", "abacaba") |
64 |
| - testSubstring(a, 1, "abacaba", "abacaba") |
65 |
| - testSubstring(a, 0, "abacaba", "aba") |
66 |
| - testSubstring(a, 0, "longtext", "a") |
67 |
| - testSubstring(a, 4, "a", "abacaba") |
68 |
| - testSubstring(a, 0, "d", "abacaba") |
69 |
| - testSubstring(a, 1, "aca", "abacaba") |
70 |
| - testSubstring(a, 3, "aab", "aabaaabaab") |
71 |
| - testSubstring(a, 2, "aa", "aaaaa") // actually there are 4, but for our purposes we want this behaviour |
72 |
| - testSubstring(a, 1, "abaab", "abaabaab") // actually there are 2 |
73 |
| - |
74 |
| - testSubstring(a, 1, "needle", "can you find a needle in a haystack?") |
75 |
| - testSubstring(a, 1, "haystack", "can you find a needle in a haystack?") |
76 |
| - testSubstring(a, 0, "elephant", "can you find a needle in a haystack?") |
77 |
| - |
78 |
| - testSubstring(a, 1, "@", "symbols@test") |
79 |
| - testSubstring(a, 1, "!1337#", "woah!1337#test") |
| 97 | + for _, size := range sizes { |
| 98 | + b.Run(size.name, func(b *testing.B) { |
| 99 | + haystack, needles := generateTestData( |
| 100 | + size.haystackSize, size.needleSize, size.needleCount, 256, |
| 101 | + ) |
| 102 | + b.ResetTimer() |
| 103 | + for b.Loop() { |
| 104 | + findSequence(haystack, needles) |
| 105 | + b.SetBytes(int64(len(haystack))) |
| 106 | + } |
| 107 | + }) |
| 108 | + } |
80 | 109 | }
|
81 | 110 |
|
82 |
| -func TestSequence(t *testing.T) { |
83 |
| - a := assert.New(t) |
| 111 | +func generateTestData(haystackSize, needleSize, needleCount, charset int) ([]byte, [][]byte) { |
| 112 | + haystack := generateRandomBytes(haystackSize, charset) |
84 | 113 |
|
85 |
| - testSequence(a, 2, []string{"abra", "ada"}, "abracadabra") |
86 |
| - testSequence(a, 2, []string{"aba", "aba"}, "abacaba") |
87 |
| - testSequence(a, 2, []string{"aba", "caba"}, "abacaba") |
88 |
| - testSequence(a, 1, []string{"abacaba"}, "abacaba") |
89 |
| - testSequence(a, 0, []string{"abacaba"}, "aba") |
90 |
| - testSequence(a, 1, []string{"aba"}, "abacaba") |
91 |
| - testSequence(a, 0, []string{"dad"}, "abacaba") |
92 |
| - testSequence(a, 1, []string{"aba", "dad"}, "abacaba") |
93 |
| - testSequence(a, 0, []string{"dad", "aba"}, "abacaba") |
| 114 | + needles := make([][]byte, needleCount) |
| 115 | + for i := range needleCount { |
| 116 | + pattern := generateRandomBytes(needleSize, charset) |
| 117 | + pos := rand.Intn(len(haystack) - needleSize) |
| 118 | + copy(haystack[pos:], pattern) |
| 119 | + needles[i] = pattern |
| 120 | + } |
| 121 | + |
| 122 | + return haystack, needles |
| 123 | +} |
94 | 124 |
|
95 |
| - testSequence(a, 2, []string{"needle", "haystack"}, "can you find a needle in a haystack?") |
96 |
| - testSequence(a, 2, []string{"k8s_pod", "_prod"}, "\"k8s_pod\":{\"main_prod\"}") |
| 125 | +func generateRandomBytes(size, charset int) []byte { |
| 126 | + b := make([]byte, size) |
| 127 | + for i := range b { |
| 128 | + b[i] = byte(rand.Intn(charset)) |
| 129 | + } |
| 130 | + return b |
| 131 | +} |
97 | 132 |
|
98 |
| - testSequence(a, 2, []string{"!13", "37#"}, "woah!13@37#test") |
| 133 | +func bb(s string) []byte { |
| 134 | + return []byte(s) |
99 | 135 | }
|
0 commit comments