Update to last common bleve (#3986)
This commit is contained in:
parent
1b7cd3d0b0
commit
917b9641ec
184 changed files with 39576 additions and 121 deletions
254
vendor/github.com/couchbase/vellum/fst.go
generated
vendored
Normal file
254
vendor/github.com/couchbase/vellum/fst.go
generated
vendored
Normal file
|
@ -0,0 +1,254 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package vellum
|
||||
|
||||
import (
|
||||
"io"
|
||||
|
||||
"github.com/willf/bitset"
|
||||
)
|
||||
|
||||
// FST is an in-memory representation of a finite state transducer,
|
||||
// capable of returning the uint64 value associated with
|
||||
// each []byte key stored, as well as enumerating all of the keys
|
||||
// in order.
|
||||
type FST struct {
|
||||
f io.Closer
|
||||
ver int
|
||||
len int
|
||||
typ int
|
||||
data []byte
|
||||
decoder decoder
|
||||
}
|
||||
|
||||
func new(data []byte, f io.Closer) (rv *FST, err error) {
|
||||
rv = &FST{
|
||||
data: data,
|
||||
f: f,
|
||||
}
|
||||
|
||||
rv.ver, rv.typ, err = decodeHeader(data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rv.decoder, err = loadDecoder(rv.ver, rv.data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rv.len = rv.decoder.getLen()
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// Contains returns true if this FST contains the specified key.
|
||||
func (f *FST) Contains(val []byte) (bool, error) {
|
||||
_, exists, err := f.Get(val)
|
||||
return exists, err
|
||||
}
|
||||
|
||||
// Get returns the value associated with the key. NOTE: a value of zero
|
||||
// does not imply the key does not exist, you must consult the second
|
||||
// return value as well.
|
||||
func (f *FST) Get(input []byte) (uint64, bool, error) {
|
||||
return f.get(input, nil)
|
||||
}
|
||||
|
||||
func (f *FST) get(input []byte, prealloc fstState) (uint64, bool, error) {
|
||||
var total uint64
|
||||
curr := f.decoder.getRoot()
|
||||
state, err := f.decoder.stateAt(curr, prealloc)
|
||||
if err != nil {
|
||||
return 0, false, err
|
||||
}
|
||||
for i := range input {
|
||||
_, curr, output := state.TransitionFor(input[i])
|
||||
if curr == noneAddr {
|
||||
return 0, false, nil
|
||||
}
|
||||
|
||||
state, err = f.decoder.stateAt(curr, state)
|
||||
if err != nil {
|
||||
return 0, false, err
|
||||
}
|
||||
|
||||
total += output
|
||||
}
|
||||
|
||||
if state.Final() {
|
||||
total += state.FinalOutput()
|
||||
return total, true, nil
|
||||
}
|
||||
return 0, false, nil
|
||||
}
|
||||
|
||||
// Version returns the encoding version used by this FST instance.
|
||||
func (f *FST) Version() int {
|
||||
return f.ver
|
||||
}
|
||||
|
||||
// Len returns the number of entries in this FST instance.
|
||||
func (f *FST) Len() int {
|
||||
return f.len
|
||||
}
|
||||
|
||||
// Type returns the type of this FST instance.
|
||||
func (f *FST) Type() int {
|
||||
return f.typ
|
||||
}
|
||||
|
||||
// Close will unmap any mmap'd data (if managed by vellum) and it will close
|
||||
// the backing file (if managed by vellum). You MUST call Close() for any
|
||||
// FST instance that is created.
|
||||
func (f *FST) Close() error {
|
||||
if f.f != nil {
|
||||
err := f.f.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
f.data = nil
|
||||
f.decoder = nil
|
||||
return nil
|
||||
}
|
||||
|
||||
// Start returns the start state of this Automaton
|
||||
func (f *FST) Start() int {
|
||||
return f.decoder.getRoot()
|
||||
}
|
||||
|
||||
// IsMatch returns if this state is a matching state in this Automaton
|
||||
func (f *FST) IsMatch(addr int) bool {
|
||||
match, _ := f.IsMatchWithVal(addr)
|
||||
return match
|
||||
}
|
||||
|
||||
// CanMatch returns if this state can ever transition to a matching state
|
||||
// in this Automaton
|
||||
func (f *FST) CanMatch(addr int) bool {
|
||||
if addr == noneAddr {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// WillAlwaysMatch returns if from this state the Automaton will always
|
||||
// be in a matching state
|
||||
func (f *FST) WillAlwaysMatch(int) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// Accept returns the next state for this Automaton on input of byte b
|
||||
func (f *FST) Accept(addr int, b byte) int {
|
||||
next, _ := f.AcceptWithVal(addr, b)
|
||||
return next
|
||||
}
|
||||
|
||||
// IsMatchWithVal returns if this state is a matching state in this Automaton
|
||||
// and also returns the final output value for this state
|
||||
func (f *FST) IsMatchWithVal(addr int) (bool, uint64) {
|
||||
s, err := f.decoder.stateAt(addr, nil)
|
||||
if err != nil {
|
||||
return false, 0
|
||||
}
|
||||
return s.Final(), s.FinalOutput()
|
||||
}
|
||||
|
||||
// AcceptWithVal returns the next state for this Automaton on input of byte b
|
||||
// and also returns the output value for the transition
|
||||
func (f *FST) AcceptWithVal(addr int, b byte) (int, uint64) {
|
||||
s, err := f.decoder.stateAt(addr, nil)
|
||||
if err != nil {
|
||||
return noneAddr, 0
|
||||
}
|
||||
_, next, output := s.TransitionFor(b)
|
||||
return next, output
|
||||
}
|
||||
|
||||
// Iterator returns a new Iterator capable of enumerating the key/value pairs
|
||||
// between the provided startKeyInclusive and endKeyExclusive.
|
||||
func (f *FST) Iterator(startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) {
|
||||
return newIterator(f, startKeyInclusive, endKeyExclusive, nil)
|
||||
}
|
||||
|
||||
// Search returns a new Iterator capable of enumerating the key/value pairs
|
||||
// between the provided startKeyInclusive and endKeyExclusive that also
|
||||
// satisfy the provided automaton.
|
||||
func (f *FST) Search(aut Automaton, startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) {
|
||||
return newIterator(f, startKeyInclusive, endKeyExclusive, aut)
|
||||
}
|
||||
|
||||
// Debug is only intended for debug purposes, it simply asks the underlying
|
||||
// decoder visit each state, and pass it to the provided callback.
|
||||
func (f *FST) Debug(callback func(int, interface{}) error) error {
|
||||
|
||||
addr := f.decoder.getRoot()
|
||||
set := bitset.New(uint(addr))
|
||||
stack := addrStack{addr}
|
||||
|
||||
stateNumber := 0
|
||||
stack, addr = stack[:len(stack)-1], stack[len(stack)-1]
|
||||
for addr != noneAddr {
|
||||
if set.Test(uint(addr)) {
|
||||
stack, addr = stack.Pop()
|
||||
continue
|
||||
}
|
||||
set.Set(uint(addr))
|
||||
state, err := f.decoder.stateAt(addr, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = callback(stateNumber, state)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for i := 0; i < state.NumTransitions(); i++ {
|
||||
tchar := state.TransitionAt(i)
|
||||
_, dest, _ := state.TransitionFor(tchar)
|
||||
stack = append(stack, dest)
|
||||
}
|
||||
stateNumber++
|
||||
stack, addr = stack.Pop()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type addrStack []int
|
||||
|
||||
func (a addrStack) Pop() (addrStack, int) {
|
||||
l := len(a)
|
||||
if l < 1 {
|
||||
return a, noneAddr
|
||||
}
|
||||
return a[:l-1], a[l-1]
|
||||
}
|
||||
|
||||
// Reader() returns a Reader instance that a single thread may use to
|
||||
// retrieve data from the FST
|
||||
func (f *FST) Reader() (*Reader, error) {
|
||||
return &Reader{f: f}, nil
|
||||
}
|
||||
|
||||
// A Reader is meant for a single threaded use
|
||||
type Reader struct {
|
||||
f *FST
|
||||
prealloc fstStateV1
|
||||
}
|
||||
|
||||
func (r *Reader) Get(input []byte) (uint64, bool, error) {
|
||||
return r.f.get(input, &r.prealloc)
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue