-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathchar.go
More file actions
90 lines (82 loc) · 1.61 KB
/
char.go
File metadata and controls
90 lines (82 loc) · 1.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
package shingle
import (
"fmt"
"unicode/utf8"
"github.com/koykov/byteseq"
)
type char[T byteseq.Q] struct {
base[T]
k uint64
w []uint64
}
func NewChar[T byteseq.Q](k uint64, cleanSet string) Shingler[T] {
sh := &char[T]{base: base[T]{cset: cleanSet}, k: k}
sh.init()
return sh
}
func (sh *char[T]) Shingle(s T) []T {
bcap := 1
if sh.k > 0 {
bcap = len(s) / int(sh.k)
}
buf := make([]T, 0, bcap)
return sh.AppendShingle(buf, s)
}
func (sh *char[T]) AppendShingle(dst []T, s T) []T {
b := sh.clean(s, false)
sc := byteseq.B2Q[T](b)
if uint64(len(b)) <= sh.k || sh.k == 0 {
dst = append(dst, sc)
return dst
}
bl := uint64(len(b))
_ = b[bl-1]
for i := uint64(0); i < bl; {
_, l := utf8.DecodeRune(b[i:])
ul := uint64(l)
sh.w = append(sh.w, i)
i += ul
}
lo, hi := uint64(0), sh.k
_, _ = sh.w[len(sh.w)-1], sc[len(sc)-1]
for hi < uint64(len(sh.w)) {
if sh.w[hi] == 0 {
fmt.Println(string(sh.base.cbuf))
fmt.Println(lo, hi)
fmt.Printf("%+v\n", sh.w)
}
dst = append(dst, sc[sh.w[lo]:sh.w[hi]])
lo++
hi++
}
dst = append(dst, sc[sh.w[lo]:])
return dst
}
func (sh *char[T]) Each(s T, fn func(T)) {
b := sh.clean(s, false)
sc := byteseq.B2Q[T](b)
if uint64(len(b)) <= sh.k || sh.k == 0 {
fn(sc)
return
}
bl := uint64(len(b))
_ = b[bl-1]
for i := uint64(0); i < bl; {
_, l := utf8.DecodeRune(b[i:])
ul := uint64(l)
sh.w = append(sh.w, i)
i += ul
}
lo, hi := uint64(0), sh.k
_, _ = sh.w[len(sh.w)-1], sc[len(sc)-1]
for hi < uint64(len(sh.w)) {
fn(sc[sh.w[lo]:sh.w[hi]])
lo++
hi++
}
fn(sc[sh.w[lo]:])
}
func (sh *char[T]) Reset() {
sh.base.reset()
sh.w = sh.w[:0]
}