Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 21 additions & 11 deletions fst_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,8 @@ type Iterator interface {
// lexicographic order. Iterators should be constructed with the FSTIterator
// method on the parent FST structure.
type FSTIterator struct {
f *FST
aut Automaton
maxQ int
f *FST
aut Automaton

cache fstIteratorCache

Expand All @@ -71,7 +70,8 @@ type FSTIterator struct {
valsStack []uint64
autStatesStack []int

nextStart []byte
nextStart []byte
lastOffset int
}

type fstIteratorCache struct {
Expand Down Expand Up @@ -200,7 +200,9 @@ func (i *FSTIterator) prepare(key []byte) error {
continue
}

i.maxQ = maxQ
i.nextStart = append(i.nextStart[:0], i.keysStack...)
i.lastOffset = maxQ

return nil
}

Expand All @@ -224,29 +226,33 @@ func (i *FSTIterator) Current() ([]byte, uint64) {
// or the advancement goes beyond the configured endKeyExclusive, then
// ErrIteratorDone is returned.
func (i *FSTIterator) Next() error {
return i.next(i.maxQ, -1)
return i.next(i.lastOffset, -1)
}

func (i *FSTIterator) Step(maxNodes int) (int, error) {
return i.nextStep(i.maxQ, maxNodes)
s, err := i.nextStep(i.lastOffset, maxNodes)
return s, err
}

func (i *FSTIterator) next(lastOffset int, maxNodes int) error {
_, err := i.nextStep(lastOffset, maxNodes)
return err
}

func (i *FSTIterator) nextStep(lastOffset int, maxNodes int) (int, error) {
// remember where we started
func (i *FSTIterator) prepareForNext() {
i.nextStart = append(i.nextStart[:0], i.keysStack...)
i.maxQ = -1
i.lastOffset = -1
}

func (i *FSTIterator) nextStep(lastOffset int, maxNodes int) (int, error) {
// remember where we started
nextOffset := lastOffset + 1
iterations := 0

OUTER:
for true {
if maxNodes > 0 && iterations == maxNodes {
i.lastOffset = nextOffset - 1
return iterations, ErrIteratorYield
}

Expand All @@ -257,6 +263,7 @@ OUTER:
if curr.Final() && i.aut.IsMatch(autCurr) &&
bytes.Compare(i.keysStack, i.nextStart) > 0 {
// in final state greater than start key
i.prepareForNext()
return iterations, nil
}

Expand All @@ -276,6 +283,7 @@ OUTER:
// push onto stack
next, err := i.stateGet(nextAddr)
if err != nil {
i.prepareForNext()
return iterations, err
}

Expand All @@ -288,6 +296,7 @@ OUTER:
// check to see if new keystack might have gone too far
if i.endKeyExclusive != nil &&
bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 {
i.prepareForNext()
return iterations, ErrIteratorDone
}

Expand Down Expand Up @@ -316,6 +325,7 @@ OUTER:
i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-1]
}

i.prepareForNext()
return iterations, ErrIteratorDone
}

Expand All @@ -336,7 +346,7 @@ func (i *FSTIterator) seek(key []byte) error {
if !i.statesStack[len(i.statesStack)-1].Final() ||
!i.aut.IsMatch(i.autStatesStack[len(i.autStatesStack)-1]) ||
bytes.Compare(i.keysStack, key) < 0 {
return i.next(i.maxQ, -1)
return i.next(i.lastOffset, -1)
}

return nil
Expand Down
227 changes: 221 additions & 6 deletions fst_iterator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,7 @@ func TestRegexpSearch(t *testing.T) {
}
}

func TestIteratorRegexpLazySearchNext(t *testing.T) {
func TestIterator_Regexp_LazySearch(t *testing.T) {
var buf bytes.Buffer
b, err := New(&buf, nil)
if err != nil {
Expand Down Expand Up @@ -705,24 +705,239 @@ func TestIteratorRegexpLazySearchNext(t *testing.T) {
t.Fatalf("error expected lazily initialized search")
}

_, err = itr.Step(1)
niterations, err := itr.Step(1)
if niterations != 1 {
t.Fatalf("error expected only one node iteration")
}

if err != ErrIteratorYield {
t.Fatalf("error expected yield but received: %v", err)
}

_, err = itr.Step(20)
for {
niterations, err = itr.Step(1)
if niterations != 1 {
t.Fatalf("error expected only one node iteration")
}

if err != nil {
if err == ErrIteratorYield {
continue
} else if err == ErrIteratorDone {
break
}

t.Fatalf("unexpected iterator step error: %v", err)
}

key, val := itr.Current()
got[string(key)] = val
}

if !reflect.DeepEqual(want, got) {
t.Errorf("expected %v, got: %v", want, got)
}
}

func TestIterator_Regexp_LazySearch_Next(t *testing.T) {
var buf bytes.Buffer
b, err := New(&buf, nil)
if err != nil {
t.Fatalf("error creating builder: %v", err)
}

err = insertStringMap(b, smallSample)
if err != nil {
t.Fatalf("error building: %v", err)
}

err = b.Close()
if err != nil {
t.Fatalf("error closing: %v", err)
}

fst, err := Load(buf.Bytes())
if err != nil {
t.Fatalf("error loading set: %v", err)
}

r, err := regexp.New(`.*ur.*`)
if err != nil {
t.Fatalf("error building regexp automaton: %v", err)
}

want := map[string]uint64{
"thurs": 5,
}

got := map[string]uint64{}
itr, err := fst.LazySearch(r, nil, nil)
if err != nil {
t.Fatalf("iterator error: %v", err)
t.Fatalf("error creating fst iterator: %v", err)
}

err = itr.Next()
if err != nil {
t.Fatalf("error iterating next: %v", err)
}

key, val := itr.Current()
got[string(key)] = val

err = itr.Next()
if err != ErrIteratorDone {
t.Fatalf("error expected done: %v", err)
}

if !reflect.DeepEqual(want, got) {
t.Errorf("expected %v, got: %v", want, got)
}
}

func TestIterator_Regexp_LazySearch_Step(t *testing.T) {
var buf bytes.Buffer
b, err := New(&buf, nil)
if err != nil {
t.Fatalf("error creating builder: %v", err)
}

err = insertStringMap(b, smallSample)
if err != nil {
t.Fatalf("error building: %v", err)
}

err = b.Close()
if err != nil {
t.Fatalf("error closing: %v", err)
}

fst, err := Load(buf.Bytes())
if err != nil {
t.Fatalf("error loading set: %v", err)
}

r, err := regexp.New(`.*u.*`)
if err != nil {
t.Fatalf("error building regexp automaton: %v", err)
}

want := map[string]uint64{
"tues": 3,
"thurs": 5,
}

got := map[string]uint64{}
itr, err := fst.LazySearch(r, nil, nil)
if err != nil {
t.Fatalf("error creating fst iterator: %v", err)
}

niterations, err := itr.Step(1)
if niterations != 1 {
t.Fatalf("error expected only one node iteration")
}

if err != ErrIteratorYield {
t.Fatalf("error expected yield but received: %v", err)
}

for {
niterations, err = itr.Step(1)
if niterations != 1 {
t.Fatalf("error expected only one node iteration")
}

if err != nil {
if err == ErrIteratorYield {
continue
} else if err == ErrIteratorDone {
break
}

t.Fatalf("unexpected iterator step error: %v", err)
}

key, val := itr.Current()
got[string(key)] = val
}

if !reflect.DeepEqual(want, got) {
t.Errorf("expected %v, got: %v", want, got)
}
}

func TestIterator_Regexp_LazySearch_LargeStep(t *testing.T) {
var buf bytes.Buffer
b, err := New(&buf, nil)
if err != nil {
t.Fatalf("error creating builder: %v", err)
}

err = insertStringMap(b, smallSample)
if err != nil {
t.Fatalf("error building: %v", err)
}

err = b.Close()
if err != nil {
t.Fatalf("error closing: %v", err)
}

fst, err := Load(buf.Bytes())
if err != nil {
t.Fatalf("error loading set: %v", err)
}

r, err := regexp.New(`.*u.*`)
if err != nil {
t.Fatalf("error building regexp automaton: %v", err)
}

want := map[string]uint64{
"tues": 3,
"thurs": 5,
}

got := map[string]uint64{}
itr, err := fst.LazySearch(r, nil, nil)
if err != nil {
t.Fatalf("error creating fst iterator: %v", err)
}

niterations, err := itr.Step(50)
if niterations > 50 {
t.Fatalf("error expected only one node iteration")
}

if err != nil {
t.Fatalf("error iterating: %v", err)
}

key, val := itr.Current()
got[string(key)] = val

niterations, err = itr.Step(50)
if niterations > 50 {
t.Fatalf("error expected only one node iteration")
}

if err != nil {
t.Fatalf("error iterating: %v", err)
}

key, val = itr.Current()
got[string(key)] = val

niterations, err = itr.Step(50)
if niterations > 50 {
t.Fatalf("error expected only one node iteration")
}

_, err = itr.Step(20)
if err != ErrIteratorDone {
t.Fatalf("iterator error: %v", err)
t.Fatalf("error iterating, expected done: %v", err)
}

if !reflect.DeepEqual(want, got) {
t.Errorf("expected %v, got: %v", want, got)
}
}