Update bleve dependency to latest master revision (#6100)
* update bleve to master b17287a86f6cac923a5d886e10618df994eeb54b6724eac2e3b8dde89cfbe3a2 * remove unused pkg from dep file * change bleve from master to recent revision
This commit is contained in:
parent
11e316654e
commit
a380cfd8e0
161 changed files with 9911 additions and 4233 deletions
2
vendor/github.com/couchbase/vellum/automaton.go
generated
vendored
2
vendor/github.com/couchbase/vellum/automaton.go
generated
vendored
|
@ -81,5 +81,5 @@ func (m *AlwaysMatch) Accept(int, byte) int {
|
|||
return 0
|
||||
}
|
||||
|
||||
// creating an alwaysMatchAutomaton to avoid unnecesary repeated allocations.
|
||||
// creating an alwaysMatchAutomaton to avoid unnecessary repeated allocations.
|
||||
var alwaysMatchAutomaton = &AlwaysMatch{}
|
||||
|
|
159
vendor/github.com/couchbase/vellum/builder.go
generated
vendored
159
vendor/github.com/couchbase/vellum/builder.go
generated
vendored
|
@ -38,8 +38,7 @@ type Builder struct {
|
|||
encoder encoder
|
||||
opts *BuilderOpts
|
||||
|
||||
builderNodePool builderNodePool
|
||||
transitionPool transitionPool
|
||||
builderNodePool *builderNodePool
|
||||
}
|
||||
|
||||
const noneAddr = 1
|
||||
|
@ -51,12 +50,14 @@ func newBuilder(w io.Writer, opts *BuilderOpts) (*Builder, error) {
|
|||
if opts == nil {
|
||||
opts = defaultBuilderOpts
|
||||
}
|
||||
builderNodePool := &builderNodePool{}
|
||||
rv := &Builder{
|
||||
registry: newRegistry(opts.RegistryTableSize, opts.RegistryMRUSize),
|
||||
opts: opts,
|
||||
lastAddr: noneAddr,
|
||||
unfinished: newUnfinishedNodes(builderNodePool),
|
||||
registry: newRegistry(builderNodePool, opts.RegistryTableSize, opts.RegistryMRUSize),
|
||||
builderNodePool: builderNodePool,
|
||||
opts: opts,
|
||||
lastAddr: noneAddr,
|
||||
}
|
||||
rv.unfinished = newUnfinishedNodes(&rv.builderNodePool)
|
||||
|
||||
var err error
|
||||
rv.encoder, err = loadEncoder(opts.Encoder, w)
|
||||
|
@ -71,9 +72,7 @@ func newBuilder(w io.Writer, opts *BuilderOpts) (*Builder, error) {
|
|||
}
|
||||
|
||||
func (b *Builder) Reset(w io.Writer) error {
|
||||
b.transitionPool.reset()
|
||||
b.builderNodePool.reset()
|
||||
b.unfinished.Reset(&b.builderNodePool)
|
||||
b.unfinished.Reset()
|
||||
b.registry.Reset()
|
||||
b.lastAddr = noneAddr
|
||||
b.encoder.reset(w)
|
||||
|
@ -107,7 +106,7 @@ func (b *Builder) Insert(key []byte, val uint64) error {
|
|||
return err
|
||||
}
|
||||
b.copyLastKey(key)
|
||||
b.unfinished.addSuffix(key[prefixLen:], out, &b.builderNodePool)
|
||||
b.unfinished.addSuffix(key[prefixLen:], out)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
@ -142,7 +141,7 @@ func (b *Builder) compileFrom(iState int) error {
|
|||
if addr == noneAddr {
|
||||
node = b.unfinished.popEmpty()
|
||||
} else {
|
||||
node = b.unfinished.popFreeze(addr, &b.transitionPool)
|
||||
node = b.unfinished.popFreeze(addr)
|
||||
}
|
||||
var err error
|
||||
addr, err = b.compile(node)
|
||||
|
@ -150,7 +149,7 @@ func (b *Builder) compileFrom(iState int) error {
|
|||
return nil
|
||||
}
|
||||
}
|
||||
b.unfinished.topLastFreeze(addr, &b.transitionPool)
|
||||
b.unfinished.topLastFreeze(addr)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -183,22 +182,25 @@ type unfinishedNodes struct {
|
|||
// this means calls get() and pushXYZ() must be paired,
|
||||
// as well as calls put() and popXYZ()
|
||||
cache []builderNodeUnfinished
|
||||
|
||||
builderNodePool *builderNodePool
|
||||
}
|
||||
|
||||
func (u *unfinishedNodes) Reset(p *builderNodePool) {
|
||||
func (u *unfinishedNodes) Reset() {
|
||||
u.stack = u.stack[:0]
|
||||
for i := 0; i < len(u.cache); i++ {
|
||||
u.cache[i] = builderNodeUnfinished{}
|
||||
}
|
||||
u.pushEmpty(false, p)
|
||||
u.pushEmpty(false)
|
||||
}
|
||||
|
||||
func newUnfinishedNodes(p *builderNodePool) *unfinishedNodes {
|
||||
rv := &unfinishedNodes{
|
||||
stack: make([]*builderNodeUnfinished, 0, 64),
|
||||
cache: make([]builderNodeUnfinished, 64),
|
||||
stack: make([]*builderNodeUnfinished, 0, 64),
|
||||
cache: make([]builderNodeUnfinished, 64),
|
||||
builderNodePool: p,
|
||||
}
|
||||
rv.pushEmpty(false, p)
|
||||
rv.pushEmpty(false)
|
||||
return rv
|
||||
}
|
||||
|
||||
|
@ -249,9 +251,9 @@ func (u *unfinishedNodes) findCommonPrefixAndSetOutput(key []byte,
|
|||
return i, out
|
||||
}
|
||||
|
||||
func (u *unfinishedNodes) pushEmpty(final bool, p *builderNodePool) {
|
||||
func (u *unfinishedNodes) pushEmpty(final bool) {
|
||||
next := u.get()
|
||||
next.node = p.alloc()
|
||||
next.node = u.builderNodePool.Get()
|
||||
next.node.final = final
|
||||
u.stack = append(u.stack, next)
|
||||
}
|
||||
|
@ -265,11 +267,11 @@ func (u *unfinishedNodes) popRoot() *builderNode {
|
|||
return rv
|
||||
}
|
||||
|
||||
func (u *unfinishedNodes) popFreeze(addr int, tp *transitionPool) *builderNode {
|
||||
func (u *unfinishedNodes) popFreeze(addr int) *builderNode {
|
||||
l := len(u.stack)
|
||||
var unfinished *builderNodeUnfinished
|
||||
u.stack, unfinished = u.stack[:l-1], u.stack[l-1]
|
||||
unfinished.lastCompiled(addr, tp)
|
||||
unfinished.lastCompiled(addr)
|
||||
rv := unfinished.node
|
||||
u.put()
|
||||
return rv
|
||||
|
@ -289,12 +291,12 @@ func (u *unfinishedNodes) setRootOutput(out uint64) {
|
|||
u.stack[0].node.finalOutput = out
|
||||
}
|
||||
|
||||
func (u *unfinishedNodes) topLastFreeze(addr int, tp *transitionPool) {
|
||||
func (u *unfinishedNodes) topLastFreeze(addr int) {
|
||||
last := len(u.stack) - 1
|
||||
u.stack[last].lastCompiled(addr, tp)
|
||||
u.stack[last].lastCompiled(addr)
|
||||
}
|
||||
|
||||
func (u *unfinishedNodes) addSuffix(bs []byte, out uint64, p *builderNodePool) {
|
||||
func (u *unfinishedNodes) addSuffix(bs []byte, out uint64) {
|
||||
if len(bs) == 0 {
|
||||
return
|
||||
}
|
||||
|
@ -304,13 +306,13 @@ func (u *unfinishedNodes) addSuffix(bs []byte, out uint64, p *builderNodePool) {
|
|||
u.stack[last].lastOut = out
|
||||
for _, b := range bs[1:] {
|
||||
next := u.get()
|
||||
next.node = p.alloc()
|
||||
next.node = u.builderNodePool.Get()
|
||||
next.hasLastT = true
|
||||
next.lastIn = b
|
||||
next.lastOut = 0
|
||||
u.stack = append(u.stack, next)
|
||||
}
|
||||
u.pushEmpty(true, p)
|
||||
u.pushEmpty(true)
|
||||
}
|
||||
|
||||
type builderNodeUnfinished struct {
|
||||
|
@ -320,17 +322,17 @@ type builderNodeUnfinished struct {
|
|||
hasLastT bool
|
||||
}
|
||||
|
||||
func (b *builderNodeUnfinished) lastCompiled(addr int, tp *transitionPool) {
|
||||
func (b *builderNodeUnfinished) lastCompiled(addr int) {
|
||||
if b.hasLastT {
|
||||
transIn := b.lastIn
|
||||
transOut := b.lastOut
|
||||
b.hasLastT = false
|
||||
b.lastOut = 0
|
||||
trans := tp.alloc()
|
||||
trans.in = transIn
|
||||
trans.out = transOut
|
||||
trans.addr = addr
|
||||
b.node.trans = append(b.node.trans, trans)
|
||||
b.node.trans = append(b.node.trans, transition{
|
||||
in: transIn,
|
||||
out: transOut,
|
||||
addr: addr,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -338,8 +340,8 @@ func (b *builderNodeUnfinished) addOutputPrefix(prefix uint64) {
|
|||
if b.node.final {
|
||||
b.node.finalOutput = outputCat(prefix, b.node.finalOutput)
|
||||
}
|
||||
for _, t := range b.node.trans {
|
||||
t.out = outputCat(prefix, t.out)
|
||||
for i := range b.node.trans {
|
||||
b.node.trans[i].out = outputCat(prefix, b.node.trans[i].out)
|
||||
}
|
||||
if b.hasLastT {
|
||||
b.lastOut = outputCat(prefix, b.lastOut)
|
||||
|
@ -348,8 +350,22 @@ func (b *builderNodeUnfinished) addOutputPrefix(prefix uint64) {
|
|||
|
||||
type builderNode struct {
|
||||
finalOutput uint64
|
||||
trans []*transition
|
||||
trans []transition
|
||||
final bool
|
||||
|
||||
// intrusive linked list
|
||||
next *builderNode
|
||||
}
|
||||
|
||||
// reset resets the receiver builderNode to a re-usable state.
|
||||
func (n *builderNode) reset() {
|
||||
n.final = false
|
||||
n.finalOutput = 0
|
||||
for i := range n.trans {
|
||||
n.trans[i] = emptyTransition
|
||||
}
|
||||
n.trans = n.trans[:0]
|
||||
n.next = nil
|
||||
}
|
||||
|
||||
func (n *builderNode) equiv(o *builderNode) bool {
|
||||
|
@ -377,6 +393,8 @@ func (n *builderNode) equiv(o *builderNode) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
var emptyTransition = transition{}
|
||||
|
||||
type transition struct {
|
||||
out uint64
|
||||
addr int
|
||||
|
@ -398,56 +416,37 @@ func outputCat(l, r uint64) uint64 {
|
|||
return l + r
|
||||
}
|
||||
|
||||
// the next builderNode to alloc() will be all[nextOuter][nextInner]
|
||||
// builderNodePool pools builderNodes using a singly linked list.
|
||||
//
|
||||
// NB: builderNode lifecylce is described by the following interactions -
|
||||
// +------------------------+ +----------------------+
|
||||
// | Unfinished Nodes | Transfer once | Registry |
|
||||
// |(not frozen builderNode)|-----builderNode is ------->| (frozen builderNode) |
|
||||
// +------------------------+ marked frozen +----------------------+
|
||||
// ^ |
|
||||
// | |
|
||||
// | Put()
|
||||
// | Get() on +-------------------+ when
|
||||
// +-new char--------| builderNode Pool |<-----------evicted
|
||||
// +-------------------+
|
||||
type builderNodePool struct {
|
||||
all [][]builderNode
|
||||
nextOuter int
|
||||
nextInner int
|
||||
head *builderNode
|
||||
}
|
||||
|
||||
func (p *builderNodePool) reset() {
|
||||
p.nextOuter = 0
|
||||
p.nextInner = 0
|
||||
}
|
||||
|
||||
func (p *builderNodePool) alloc() *builderNode {
|
||||
if p.nextOuter >= len(p.all) {
|
||||
p.all = append(p.all, make([]builderNode, 256))
|
||||
func (p *builderNodePool) Get() *builderNode {
|
||||
if p.head == nil {
|
||||
return &builderNode{}
|
||||
}
|
||||
rv := &p.all[p.nextOuter][p.nextInner]
|
||||
p.nextInner += 1
|
||||
if p.nextInner >= len(p.all[p.nextOuter]) {
|
||||
p.nextOuter += 1
|
||||
p.nextInner = 0
|
||||
}
|
||||
rv.finalOutput = 0
|
||||
rv.trans = rv.trans[:0]
|
||||
rv.final = false
|
||||
return rv
|
||||
head := p.head
|
||||
p.head = p.head.next
|
||||
return head
|
||||
}
|
||||
|
||||
// the next transition to alloc() will be all[nextOuter][nextInner]
|
||||
type transitionPool struct {
|
||||
all [][]transition
|
||||
nextOuter int
|
||||
nextInner int
|
||||
}
|
||||
|
||||
func (p *transitionPool) reset() {
|
||||
p.nextOuter = 0
|
||||
p.nextInner = 0
|
||||
}
|
||||
|
||||
func (p *transitionPool) alloc() *transition {
|
||||
if p.nextOuter >= len(p.all) {
|
||||
p.all = append(p.all, make([]transition, 256))
|
||||
func (p *builderNodePool) Put(v *builderNode) {
|
||||
if v == nil {
|
||||
return
|
||||
}
|
||||
rv := &p.all[p.nextOuter][p.nextInner]
|
||||
p.nextInner += 1
|
||||
if p.nextInner >= len(p.all[p.nextOuter]) {
|
||||
p.nextOuter += 1
|
||||
p.nextInner = 0
|
||||
}
|
||||
*rv = transition{}
|
||||
return rv
|
||||
v.reset()
|
||||
v.next = p.head
|
||||
p.head = v
|
||||
}
|
||||
|
|
4
vendor/github.com/couchbase/vellum/decoder_v1.go
generated
vendored
4
vendor/github.com/couchbase/vellum/decoder_v1.go
generated
vendored
|
@ -29,8 +29,6 @@ func init() {
|
|||
|
||||
type decoderV1 struct {
|
||||
data []byte
|
||||
root uint64
|
||||
len uint64
|
||||
}
|
||||
|
||||
func newDecoderV1(data []byte) *decoderV1 {
|
||||
|
@ -219,7 +217,7 @@ func (f *fstStateV1) Final() bool {
|
|||
}
|
||||
|
||||
func (f *fstStateV1) FinalOutput() uint64 {
|
||||
if f.numTrans > 0 && f.final && f.outSize > 0 {
|
||||
if f.final && f.outSize > 0 {
|
||||
return readPackedUint(f.data[f.outFinal : f.outFinal+f.outSize])
|
||||
}
|
||||
return 0
|
||||
|
|
50
vendor/github.com/couchbase/vellum/fst.go
generated
vendored
50
vendor/github.com/couchbase/vellum/fst.go
generated
vendored
|
@ -74,8 +74,8 @@ func (f *FST) get(input []byte, prealloc fstState) (uint64, bool, error) {
|
|||
if err != nil {
|
||||
return 0, false, err
|
||||
}
|
||||
for i := range input {
|
||||
_, curr, output := state.TransitionFor(input[i])
|
||||
for _, c := range input {
|
||||
_, curr, output := state.TransitionFor(c)
|
||||
if curr == noneAddr {
|
||||
return 0, false, nil
|
||||
}
|
||||
|
@ -243,6 +243,52 @@ func (f *FST) Reader() (*Reader, error) {
|
|||
return &Reader{f: f}, nil
|
||||
}
|
||||
|
||||
func (f *FST) GetMinKey() ([]byte, error) {
|
||||
var rv []byte
|
||||
|
||||
curr := f.decoder.getRoot()
|
||||
state, err := f.decoder.stateAt(curr, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for !state.Final() {
|
||||
nextTrans := state.TransitionAt(0)
|
||||
_, curr, _ = state.TransitionFor(nextTrans)
|
||||
state, err = f.decoder.stateAt(curr, state)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rv = append(rv, nextTrans)
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (f *FST) GetMaxKey() ([]byte, error) {
|
||||
var rv []byte
|
||||
|
||||
curr := f.decoder.getRoot()
|
||||
state, err := f.decoder.stateAt(curr, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for state.NumTransitions() > 0 {
|
||||
nextTrans := state.TransitionAt(state.NumTransitions() - 1)
|
||||
_, curr, _ = state.TransitionFor(nextTrans)
|
||||
state, err = f.decoder.stateAt(curr, state)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rv = append(rv, nextTrans)
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// A Reader is meant for a single threaded use
|
||||
type Reader struct {
|
||||
f *FST
|
||||
|
|
133
vendor/github.com/couchbase/vellum/fst_iterator.go
generated
vendored
133
vendor/github.com/couchbase/vellum/fst_iterator.go
generated
vendored
|
@ -76,7 +76,8 @@ func newIterator(f *FST, startKeyInclusive, endKeyExclusive []byte,
|
|||
|
||||
// Reset resets the Iterator' internal state to allow for iterator
|
||||
// reuse (e.g. pooling).
|
||||
func (i *FSTIterator) Reset(f *FST, startKeyInclusive, endKeyExclusive []byte, aut Automaton) error {
|
||||
func (i *FSTIterator) Reset(f *FST,
|
||||
startKeyInclusive, endKeyExclusive []byte, aut Automaton) error {
|
||||
if aut == nil {
|
||||
aut = alwaysMatchAutomaton
|
||||
}
|
||||
|
@ -91,14 +92,14 @@ func (i *FSTIterator) Reset(f *FST, startKeyInclusive, endKeyExclusive []byte, a
|
|||
|
||||
// pointTo attempts to point us to the specified location
|
||||
func (i *FSTIterator) pointTo(key []byte) error {
|
||||
|
||||
// tried to seek before start
|
||||
if bytes.Compare(key, i.startKeyInclusive) < 0 {
|
||||
key = i.startKeyInclusive
|
||||
}
|
||||
|
||||
// trid to see past end
|
||||
if i.endKeyExclusive != nil && bytes.Compare(key, i.endKeyExclusive) > 0 {
|
||||
// tried to see past end
|
||||
if i.endKeyExclusive != nil &&
|
||||
bytes.Compare(key, i.endKeyExclusive) > 0 {
|
||||
key = i.endKeyExclusive
|
||||
}
|
||||
|
||||
|
@ -121,21 +122,23 @@ func (i *FSTIterator) pointTo(key []byte) error {
|
|||
i.statesStack = append(i.statesStack, root)
|
||||
i.autStatesStack = append(i.autStatesStack, autStart)
|
||||
for j := 0; j < len(key); j++ {
|
||||
keyJ := key[j]
|
||||
curr := i.statesStack[len(i.statesStack)-1]
|
||||
autCurr := i.autStatesStack[len(i.autStatesStack)-1]
|
||||
|
||||
pos, nextAddr, nextVal := curr.TransitionFor(key[j])
|
||||
pos, nextAddr, nextVal := curr.TransitionFor(keyJ)
|
||||
if nextAddr == noneAddr {
|
||||
// needed transition doesn't exist
|
||||
// find last trans before the one we needed
|
||||
for q := 0; q < curr.NumTransitions(); q++ {
|
||||
if curr.TransitionAt(q) < key[j] {
|
||||
for q := curr.NumTransitions() - 1; q >= 0; q-- {
|
||||
if curr.TransitionAt(q) < keyJ {
|
||||
maxQ = q
|
||||
break
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
autNext := i.aut.Accept(autCurr, key[j])
|
||||
autNext := i.aut.Accept(autCurr, keyJ)
|
||||
|
||||
next, err := i.f.decoder.stateAt(nextAddr, nil)
|
||||
if err != nil {
|
||||
|
@ -143,14 +146,16 @@ func (i *FSTIterator) pointTo(key []byte) error {
|
|||
}
|
||||
|
||||
i.statesStack = append(i.statesStack, next)
|
||||
i.keysStack = append(i.keysStack, key[j])
|
||||
i.keysStack = append(i.keysStack, keyJ)
|
||||
i.keysPosStack = append(i.keysPosStack, pos)
|
||||
i.valsStack = append(i.valsStack, nextVal)
|
||||
i.autStatesStack = append(i.autStatesStack, autNext)
|
||||
continue
|
||||
}
|
||||
|
||||
if !i.statesStack[len(i.statesStack)-1].Final() || !i.aut.IsMatch(i.autStatesStack[len(i.autStatesStack)-1]) || bytes.Compare(i.keysStack, key) < 0 {
|
||||
if !i.statesStack[len(i.statesStack)-1].Final() ||
|
||||
!i.aut.IsMatch(i.autStatesStack[len(i.autStatesStack)-1]) ||
|
||||
bytes.Compare(i.keysStack, key) < 0 {
|
||||
return i.next(maxQ)
|
||||
}
|
||||
|
||||
|
@ -181,15 +186,12 @@ func (i *FSTIterator) Next() error {
|
|||
}
|
||||
|
||||
func (i *FSTIterator) next(lastOffset int) error {
|
||||
|
||||
// remember where we started
|
||||
if cap(i.nextStart) < len(i.keysStack) {
|
||||
i.nextStart = make([]byte, len(i.keysStack))
|
||||
} else {
|
||||
i.nextStart = i.nextStart[0:len(i.keysStack)]
|
||||
}
|
||||
copy(i.nextStart, i.keysStack)
|
||||
i.nextStart = append(i.nextStart[:0], i.keysStack...)
|
||||
|
||||
nextOffset := lastOffset + 1
|
||||
|
||||
OUTER:
|
||||
for true {
|
||||
curr := i.statesStack[len(i.statesStack)-1]
|
||||
autCurr := i.autStatesStack[len(i.autStatesStack)-1]
|
||||
|
@ -200,58 +202,62 @@ func (i *FSTIterator) next(lastOffset int) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
nextOffset := lastOffset + 1
|
||||
if nextOffset < curr.NumTransitions() {
|
||||
numTrans := curr.NumTransitions()
|
||||
|
||||
INNER:
|
||||
for nextOffset < numTrans {
|
||||
t := curr.TransitionAt(nextOffset)
|
||||
autNext := i.aut.Accept(autCurr, t)
|
||||
if i.aut.CanMatch(autNext) {
|
||||
pos, nextAddr, v := curr.TransitionFor(t)
|
||||
|
||||
// the next slot in the statesStack might have an
|
||||
// fstState instance that we can reuse
|
||||
var nextPrealloc fstState
|
||||
if len(i.statesStack) < cap(i.statesStack) {
|
||||
nextPrealloc = i.statesStack[0:cap(i.statesStack)][len(i.statesStack)]
|
||||
}
|
||||
|
||||
// push onto stack
|
||||
next, err := i.f.decoder.stateAt(nextAddr, nextPrealloc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
i.statesStack = append(i.statesStack, next)
|
||||
i.keysStack = append(i.keysStack, t)
|
||||
i.keysPosStack = append(i.keysPosStack, pos)
|
||||
i.valsStack = append(i.valsStack, v)
|
||||
i.autStatesStack = append(i.autStatesStack, autNext)
|
||||
lastOffset = -1
|
||||
|
||||
// check to see if new keystack might have gone too far
|
||||
if i.endKeyExclusive != nil && bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 {
|
||||
return ErrIteratorDone
|
||||
}
|
||||
} else {
|
||||
lastOffset = nextOffset
|
||||
if !i.aut.CanMatch(autNext) {
|
||||
nextOffset += 1
|
||||
continue INNER
|
||||
}
|
||||
|
||||
continue
|
||||
pos, nextAddr, v := curr.TransitionFor(t)
|
||||
|
||||
// the next slot in the statesStack might have an
|
||||
// fstState instance that we can reuse
|
||||
var nextPrealloc fstState
|
||||
if len(i.statesStack) < cap(i.statesStack) {
|
||||
nextPrealloc = i.statesStack[0:cap(i.statesStack)][len(i.statesStack)]
|
||||
}
|
||||
|
||||
// push onto stack
|
||||
next, err := i.f.decoder.stateAt(nextAddr, nextPrealloc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
i.statesStack = append(i.statesStack, next)
|
||||
i.keysStack = append(i.keysStack, t)
|
||||
i.keysPosStack = append(i.keysPosStack, pos)
|
||||
i.valsStack = append(i.valsStack, v)
|
||||
i.autStatesStack = append(i.autStatesStack, autNext)
|
||||
|
||||
// check to see if new keystack might have gone too far
|
||||
if i.endKeyExclusive != nil &&
|
||||
bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 {
|
||||
return ErrIteratorDone
|
||||
}
|
||||
|
||||
nextOffset = 0
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
if len(i.statesStack) > 1 {
|
||||
// no transitions, and still room to pop
|
||||
i.statesStack = i.statesStack[:len(i.statesStack)-1]
|
||||
i.keysStack = i.keysStack[:len(i.keysStack)-1]
|
||||
lastOffset = i.keysPosStack[len(i.keysPosStack)-1]
|
||||
|
||||
i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-1]
|
||||
i.valsStack = i.valsStack[:len(i.valsStack)-1]
|
||||
i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-1]
|
||||
continue
|
||||
} else {
|
||||
if len(i.statesStack) <= 1 {
|
||||
// stack len is 1 (root), can't go back further, we're done
|
||||
break
|
||||
}
|
||||
|
||||
// no transitions, and still room to pop
|
||||
i.statesStack = i.statesStack[:len(i.statesStack)-1]
|
||||
i.keysStack = i.keysStack[:len(i.keysStack)-1]
|
||||
|
||||
nextOffset = i.keysPosStack[len(i.keysPosStack)-1] + 1
|
||||
|
||||
i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-1]
|
||||
i.valsStack = i.valsStack[:len(i.valsStack)-1]
|
||||
i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-1]
|
||||
}
|
||||
|
||||
return ErrIteratorDone
|
||||
|
@ -262,15 +268,12 @@ func (i *FSTIterator) next(lastOffset int) error {
|
|||
// seek operation would go past the last key, or outside the configured
|
||||
// startKeyInclusive/endKeyExclusive then ErrIteratorDone is returned.
|
||||
func (i *FSTIterator) Seek(key []byte) error {
|
||||
err := i.pointTo(key)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
return i.pointTo(key)
|
||||
}
|
||||
|
||||
// Close will free any resources held by this iterator.
|
||||
func (i *FSTIterator) Close() error {
|
||||
// at the moment we don't do anything, but wanted this for API completeness
|
||||
// at the moment we don't do anything,
|
||||
// but wanted this for API completeness
|
||||
return nil
|
||||
}
|
||||
|
|
203
vendor/github.com/couchbase/vellum/levenshtein2/LICENSE
generated
vendored
Normal file
203
vendor/github.com/couchbase/vellum/levenshtein2/LICENSE
generated
vendored
Normal file
|
@ -0,0 +1,203 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
125
vendor/github.com/couchbase/vellum/levenshtein2/alphabet.go
generated
vendored
Normal file
125
vendor/github.com/couchbase/vellum/levenshtein2/alphabet.go
generated
vendored
Normal file
|
@ -0,0 +1,125 @@
|
|||
// Copyright (c) 2018 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package levenshtein2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type FullCharacteristicVector []uint32
|
||||
|
||||
func (fcv FullCharacteristicVector) shiftAndMask(offset, mask uint32) uint32 {
|
||||
bucketID := offset / 32
|
||||
align := offset - bucketID*32
|
||||
if align == 0 {
|
||||
return fcv[bucketID] & mask
|
||||
}
|
||||
left := fcv[bucketID] >> align
|
||||
right := fcv[bucketID+1] << (32 - align)
|
||||
return (left | right) & mask
|
||||
}
|
||||
|
||||
type tuple struct {
|
||||
char rune
|
||||
fcv FullCharacteristicVector
|
||||
}
|
||||
|
||||
type sortRunes []rune
|
||||
|
||||
func (s sortRunes) Less(i, j int) bool {
|
||||
return s[i] < s[j]
|
||||
}
|
||||
|
||||
func (s sortRunes) Swap(i, j int) {
|
||||
s[i], s[j] = s[j], s[i]
|
||||
}
|
||||
|
||||
func (s sortRunes) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func sortRune(r []rune) []rune {
|
||||
sort.Sort(sortRunes(r))
|
||||
return r
|
||||
}
|
||||
|
||||
type Alphabet struct {
|
||||
charset []tuple
|
||||
index uint32
|
||||
}
|
||||
|
||||
func (a *Alphabet) resetNext() {
|
||||
a.index = 0
|
||||
}
|
||||
|
||||
func (a *Alphabet) next() (rune, FullCharacteristicVector, error) {
|
||||
if int(a.index) >= len(a.charset) {
|
||||
return 0, nil, fmt.Errorf("eof")
|
||||
}
|
||||
|
||||
rv := a.charset[a.index]
|
||||
a.index++
|
||||
return rv.char, rv.fcv, nil
|
||||
}
|
||||
|
||||
func dedupe(in string) string {
|
||||
lookUp := make(map[rune]struct{}, len(in))
|
||||
var rv string
|
||||
for len(in) > 0 {
|
||||
r, size := utf8.DecodeRuneInString(in)
|
||||
in = in[size:]
|
||||
if _, ok := lookUp[r]; !ok {
|
||||
rv += string(r)
|
||||
lookUp[r] = struct{}{}
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func queryChars(qChars string) Alphabet {
|
||||
chars := dedupe(qChars)
|
||||
inChars := sortRune([]rune(chars))
|
||||
charsets := make([]tuple, 0, len(inChars))
|
||||
|
||||
for _, c := range inChars {
|
||||
tempChars := qChars
|
||||
var bits []uint32
|
||||
for len(tempChars) > 0 {
|
||||
var chunk string
|
||||
if len(tempChars) > 32 {
|
||||
chunk = tempChars[0:32]
|
||||
tempChars = tempChars[32:]
|
||||
} else {
|
||||
chunk = tempChars
|
||||
tempChars = tempChars[:0]
|
||||
}
|
||||
|
||||
chunkBits := uint32(0)
|
||||
bit := uint32(1)
|
||||
for _, chr := range chunk {
|
||||
if chr == c {
|
||||
chunkBits |= bit
|
||||
}
|
||||
bit <<= 1
|
||||
}
|
||||
bits = append(bits, chunkBits)
|
||||
}
|
||||
bits = append(bits, 0)
|
||||
charsets = append(charsets, tuple{char: c, fcv: FullCharacteristicVector(bits)})
|
||||
}
|
||||
return Alphabet{charset: charsets}
|
||||
}
|
250
vendor/github.com/couchbase/vellum/levenshtein2/dfa.go
generated
vendored
Normal file
250
vendor/github.com/couchbase/vellum/levenshtein2/dfa.go
generated
vendored
Normal file
|
@ -0,0 +1,250 @@
|
|||
// Copyright (c) 2018 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package levenshtein2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
)
|
||||
|
||||
const SinkState = uint32(0)
|
||||
|
||||
type DFA struct {
|
||||
transitions [][256]uint32
|
||||
distances []Distance
|
||||
initState int
|
||||
ed uint8
|
||||
}
|
||||
|
||||
/// Returns the initial state
|
||||
func (d *DFA) initialState() int {
|
||||
return d.initState
|
||||
}
|
||||
|
||||
/// Returns the Levenshtein distance associated to the
|
||||
/// current state.
|
||||
func (d *DFA) distance(stateId int) Distance {
|
||||
return d.distances[stateId]
|
||||
}
|
||||
|
||||
/// Returns the number of states in the `DFA`.
|
||||
func (d *DFA) numStates() int {
|
||||
return len(d.transitions)
|
||||
}
|
||||
|
||||
/// Returns the destination state reached after consuming a given byte.
|
||||
func (d *DFA) transition(fromState int, b uint8) int {
|
||||
return int(d.transitions[fromState][b])
|
||||
}
|
||||
|
||||
func (d *DFA) eval(bytes []uint8) Distance {
|
||||
state := d.initialState()
|
||||
|
||||
for _, b := range bytes {
|
||||
state = d.transition(state, b)
|
||||
}
|
||||
|
||||
return d.distance(state)
|
||||
}
|
||||
|
||||
func (d *DFA) Start() int {
|
||||
return int(d.initialState())
|
||||
}
|
||||
|
||||
func (d *DFA) IsMatch(state int) bool {
|
||||
if _, ok := d.distance(state).(Exact); ok {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (d *DFA) CanMatch(state int) bool {
|
||||
return state > 0 && state < d.numStates()
|
||||
}
|
||||
|
||||
func (d *DFA) Accept(state int, b byte) int {
|
||||
return int(d.transition(state, b))
|
||||
}
|
||||
|
||||
// WillAlwaysMatch returns if the specified state will always end in a
|
||||
// matching state.
|
||||
func (d *DFA) WillAlwaysMatch(state int) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func fill(dest []uint32, val uint32) {
|
||||
for i := range dest {
|
||||
dest[i] = val
|
||||
}
|
||||
}
|
||||
|
||||
func fillTransitions(dest *[256]uint32, val uint32) {
|
||||
for i := range dest {
|
||||
dest[i] = val
|
||||
}
|
||||
}
|
||||
|
||||
type Utf8DFAStateBuilder struct {
|
||||
dfaBuilder *Utf8DFABuilder
|
||||
stateID uint32
|
||||
defaultSuccessor []uint32
|
||||
}
|
||||
|
||||
func (sb *Utf8DFAStateBuilder) addTransitionID(fromStateID uint32, b uint8,
|
||||
toStateID uint32) {
|
||||
sb.dfaBuilder.transitions[fromStateID][b] = toStateID
|
||||
}
|
||||
|
||||
func (sb *Utf8DFAStateBuilder) addTransition(in rune, toStateID uint32) {
|
||||
fromStateID := sb.stateID
|
||||
chars := []byte(string(in))
|
||||
lastByte := chars[len(chars)-1]
|
||||
|
||||
for i, ch := range chars[:len(chars)-1] {
|
||||
remNumBytes := len(chars) - i - 1
|
||||
defaultSuccessor := sb.defaultSuccessor[remNumBytes]
|
||||
intermediateStateID := sb.dfaBuilder.transitions[fromStateID][ch]
|
||||
|
||||
if intermediateStateID == defaultSuccessor {
|
||||
intermediateStateID = sb.dfaBuilder.allocate()
|
||||
fillTransitions(&sb.dfaBuilder.transitions[intermediateStateID],
|
||||
sb.defaultSuccessor[remNumBytes-1])
|
||||
}
|
||||
|
||||
sb.addTransitionID(fromStateID, ch, intermediateStateID)
|
||||
fromStateID = intermediateStateID
|
||||
}
|
||||
|
||||
toStateIDDecoded := sb.dfaBuilder.getOrAllocate(original(toStateID))
|
||||
sb.addTransitionID(fromStateID, lastByte, toStateIDDecoded)
|
||||
}
|
||||
|
||||
type Utf8StateId uint32
|
||||
|
||||
func original(stateId uint32) Utf8StateId {
|
||||
return predecessor(stateId, 0)
|
||||
}
|
||||
|
||||
func predecessor(stateId uint32, numSteps uint8) Utf8StateId {
|
||||
return Utf8StateId(stateId*4 + uint32(numSteps))
|
||||
}
|
||||
|
||||
// Utf8DFABuilder makes it possible to define a DFA
|
||||
// that takes unicode character, and build a `DFA`
|
||||
// that operates on utf-8 encoded
|
||||
type Utf8DFABuilder struct {
|
||||
index []uint32
|
||||
distances []Distance
|
||||
transitions [][256]uint32
|
||||
initialState uint32
|
||||
numStates uint32
|
||||
maxNumStates uint32
|
||||
}
|
||||
|
||||
func withMaxStates(maxStates uint32) *Utf8DFABuilder {
|
||||
rv := &Utf8DFABuilder{
|
||||
index: make([]uint32, maxStates*2+100),
|
||||
distances: make([]Distance, 0, maxStates),
|
||||
transitions: make([][256]uint32, 0, maxStates),
|
||||
maxNumStates: maxStates,
|
||||
}
|
||||
|
||||
for i := range rv.index {
|
||||
rv.index[i] = math.MaxUint32
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func (dfab *Utf8DFABuilder) allocate() uint32 {
|
||||
newState := dfab.numStates
|
||||
dfab.numStates++
|
||||
|
||||
dfab.distances = append(dfab.distances, Atleast{d: 255})
|
||||
dfab.transitions = append(dfab.transitions, [256]uint32{})
|
||||
|
||||
return newState
|
||||
}
|
||||
|
||||
func (dfab *Utf8DFABuilder) getOrAllocate(state Utf8StateId) uint32 {
|
||||
if int(state) >= cap(dfab.index) {
|
||||
cloneIndex := make([]uint32, int(state)*2)
|
||||
copy(cloneIndex, dfab.index)
|
||||
dfab.index = cloneIndex
|
||||
}
|
||||
if dfab.index[state] != math.MaxUint32 {
|
||||
return dfab.index[state]
|
||||
}
|
||||
|
||||
nstate := dfab.allocate()
|
||||
dfab.index[state] = nstate
|
||||
|
||||
return nstate
|
||||
}
|
||||
|
||||
func (dfab *Utf8DFABuilder) setInitialState(iState uint32) {
|
||||
decodedID := dfab.getOrAllocate(original(iState))
|
||||
dfab.initialState = decodedID
|
||||
}
|
||||
|
||||
func (dfab *Utf8DFABuilder) build(ed uint8) *DFA {
|
||||
return &DFA{
|
||||
transitions: dfab.transitions,
|
||||
distances: dfab.distances,
|
||||
initState: int(dfab.initialState),
|
||||
ed: ed,
|
||||
}
|
||||
}
|
||||
|
||||
func (dfab *Utf8DFABuilder) addState(state, default_suc_orig uint32,
|
||||
distance Distance) (*Utf8DFAStateBuilder, error) {
|
||||
if state > dfab.maxNumStates {
|
||||
return nil, fmt.Errorf("State id is larger than maxNumStates")
|
||||
}
|
||||
|
||||
stateID := dfab.getOrAllocate(original(state))
|
||||
dfab.distances[stateID] = distance
|
||||
|
||||
defaultSuccID := dfab.getOrAllocate(original(default_suc_orig))
|
||||
// creates a chain of states of predecessors of `default_suc_orig`.
|
||||
// Accepting k-bytes (whatever the bytes are) from `predecessor_states[k-1]`
|
||||
// leads to the `default_suc_orig` state.
|
||||
predecessorStates := []uint32{defaultSuccID,
|
||||
defaultSuccID,
|
||||
defaultSuccID,
|
||||
defaultSuccID}
|
||||
|
||||
for numBytes := uint8(1); numBytes < 4; numBytes++ {
|
||||
predecessorState := predecessor(default_suc_orig, numBytes)
|
||||
predecessorStateID := dfab.getOrAllocate(predecessorState)
|
||||
predecessorStates[numBytes] = predecessorStateID
|
||||
succ := predecessorStates[numBytes-1]
|
||||
fillTransitions(&dfab.transitions[predecessorStateID], succ)
|
||||
}
|
||||
|
||||
// 1-byte encoded chars.
|
||||
fill(dfab.transitions[stateID][0:192], predecessorStates[0])
|
||||
// 2-bytes encoded chars.
|
||||
fill(dfab.transitions[stateID][192:224], predecessorStates[1])
|
||||
// 3-bytes encoded chars.
|
||||
fill(dfab.transitions[stateID][224:240], predecessorStates[2])
|
||||
// 4-bytes encoded chars.
|
||||
fill(dfab.transitions[stateID][240:256], predecessorStates[3])
|
||||
|
||||
return &Utf8DFAStateBuilder{
|
||||
dfaBuilder: dfab,
|
||||
stateID: stateID,
|
||||
defaultSuccessor: predecessorStates}, nil
|
||||
}
|
64
vendor/github.com/couchbase/vellum/levenshtein2/levenshtein.go
generated
vendored
Normal file
64
vendor/github.com/couchbase/vellum/levenshtein2/levenshtein.go
generated
vendored
Normal file
|
@ -0,0 +1,64 @@
|
|||
// Copyright (c) 2018 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package levenshtein2
|
||||
|
||||
import "fmt"
|
||||
|
||||
// StateLimit is the maximum number of states allowed
|
||||
const StateLimit = 10000
|
||||
|
||||
// ErrTooManyStates is returned if you attempt to build a Levenshtein
|
||||
// automaton which requires too many states.
|
||||
var ErrTooManyStates = fmt.Errorf("dfa contains more than %d states",
|
||||
StateLimit)
|
||||
|
||||
// LevenshteinAutomatonBuilder wraps a precomputed
|
||||
// datastructure that allows to produce small (but not minimal) DFA.
|
||||
type LevenshteinAutomatonBuilder struct {
|
||||
pDfa *ParametricDFA
|
||||
}
|
||||
|
||||
// NewLevenshteinAutomatonBuilder creates a
|
||||
// reusable, threadsafe Levenshtein automaton builder.
|
||||
// `maxDistance` - maximum distance considered by the automaton.
|
||||
// `transposition` - assign a distance of 1 for transposition
|
||||
//
|
||||
// Building this automaton builder is computationally intensive.
|
||||
// While it takes only a few milliseconds for `d=2`, it grows
|
||||
// exponentially with `d`. It is only reasonable to `d <= 5`.
|
||||
func NewLevenshteinAutomatonBuilder(maxDistance uint8,
|
||||
transposition bool) (*LevenshteinAutomatonBuilder, error) {
|
||||
lnfa := newLevenshtein(maxDistance, transposition)
|
||||
|
||||
pdfa, err := fromNfa(lnfa)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &LevenshteinAutomatonBuilder{pDfa: pdfa}, nil
|
||||
}
|
||||
|
||||
// BuildDfa builds the levenshtein automaton for serving
|
||||
// queries with a given edit distance.
|
||||
func (lab *LevenshteinAutomatonBuilder) BuildDfa(query string,
|
||||
fuzziness uint8) (*DFA, error) {
|
||||
return lab.pDfa.buildDfa(query, fuzziness, false)
|
||||
}
|
||||
|
||||
// MaxDistance returns the MaxEdit distance supported by the
|
||||
// LevenshteinAutomatonBuilder builder.
|
||||
func (lab *LevenshteinAutomatonBuilder) MaxDistance() uint8 {
|
||||
return lab.pDfa.maxDistance
|
||||
}
|
292
vendor/github.com/couchbase/vellum/levenshtein2/levenshtein_nfa.go
generated
vendored
Normal file
292
vendor/github.com/couchbase/vellum/levenshtein2/levenshtein_nfa.go
generated
vendored
Normal file
|
@ -0,0 +1,292 @@
|
|||
// Copyright (c) 2018 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package levenshtein2
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sort"
|
||||
)
|
||||
|
||||
/// Levenshtein Distance computed by a Levenshtein Automaton.
|
||||
///
|
||||
/// Levenshtein automata can only compute the exact Levenshtein distance
|
||||
/// up to a given `max_distance`.
|
||||
///
|
||||
/// Over this distance, the automaton will invariably
|
||||
/// return `Distance::AtLeast(max_distance + 1)`.
|
||||
type Distance interface {
|
||||
distance() uint8
|
||||
}
|
||||
|
||||
type Exact struct {
|
||||
d uint8
|
||||
}
|
||||
|
||||
func (e Exact) distance() uint8 {
|
||||
return e.d
|
||||
}
|
||||
|
||||
type Atleast struct {
|
||||
d uint8
|
||||
}
|
||||
|
||||
func (a Atleast) distance() uint8 {
|
||||
return a.d
|
||||
}
|
||||
|
||||
func characteristicVector(query []rune, c rune) uint64 {
|
||||
chi := uint64(0)
|
||||
for i := 0; i < len(query); i++ {
|
||||
if query[i] == c {
|
||||
chi |= 1 << uint64(i)
|
||||
}
|
||||
}
|
||||
return chi
|
||||
}
|
||||
|
||||
type NFAState struct {
|
||||
Offset uint32
|
||||
Distance uint8
|
||||
InTranspose bool
|
||||
}
|
||||
|
||||
type NFAStates []NFAState
|
||||
|
||||
func (ns NFAStates) Len() int {
|
||||
return len(ns)
|
||||
}
|
||||
|
||||
func (ns NFAStates) Less(i, j int) bool {
|
||||
if ns[i].Offset != ns[j].Offset {
|
||||
return ns[i].Offset < ns[j].Offset
|
||||
}
|
||||
|
||||
if ns[i].Distance != ns[j].Distance {
|
||||
return ns[i].Distance < ns[j].Distance
|
||||
}
|
||||
|
||||
return !ns[i].InTranspose && ns[j].InTranspose
|
||||
}
|
||||
|
||||
func (ns NFAStates) Swap(i, j int) {
|
||||
ns[i], ns[j] = ns[j], ns[i]
|
||||
}
|
||||
|
||||
func (ns *NFAState) imply(other NFAState) bool {
|
||||
transposeImply := ns.InTranspose
|
||||
if !other.InTranspose {
|
||||
transposeImply = !other.InTranspose
|
||||
}
|
||||
|
||||
deltaOffset := ns.Offset - other.Offset
|
||||
if ns.Offset < other.Offset {
|
||||
deltaOffset = other.Offset - ns.Offset
|
||||
}
|
||||
|
||||
if transposeImply {
|
||||
return uint32(other.Distance) >= (uint32(ns.Distance) + deltaOffset)
|
||||
}
|
||||
|
||||
return uint32(other.Distance) > (uint32(ns.Distance) + deltaOffset)
|
||||
}
|
||||
|
||||
type MultiState struct {
|
||||
states []NFAState
|
||||
}
|
||||
|
||||
func (ms *MultiState) States() []NFAState {
|
||||
return ms.states
|
||||
}
|
||||
|
||||
func (ms *MultiState) Clear() {
|
||||
ms.states = ms.states[:0]
|
||||
}
|
||||
|
||||
func newMultiState() *MultiState {
|
||||
return &MultiState{states: make([]NFAState, 0)}
|
||||
}
|
||||
|
||||
func (ms *MultiState) normalize() uint32 {
|
||||
minOffset := uint32(math.MaxUint32)
|
||||
|
||||
for _, s := range ms.states {
|
||||
if s.Offset < minOffset {
|
||||
minOffset = s.Offset
|
||||
}
|
||||
}
|
||||
if minOffset == uint32(math.MaxUint32) {
|
||||
minOffset = 0
|
||||
}
|
||||
|
||||
for i := 0; i < len(ms.states); i++ {
|
||||
ms.states[i].Offset -= minOffset
|
||||
}
|
||||
|
||||
sort.Sort(NFAStates(ms.states))
|
||||
|
||||
return minOffset
|
||||
}
|
||||
|
||||
func (ms *MultiState) addStates(nState NFAState) {
|
||||
|
||||
for _, s := range ms.states {
|
||||
if s.imply(nState) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
i := 0
|
||||
for i < len(ms.states) {
|
||||
if nState.imply(ms.states[i]) {
|
||||
ms.states = append(ms.states[:i], ms.states[i+1:]...)
|
||||
} else {
|
||||
i++
|
||||
}
|
||||
}
|
||||
ms.states = append(ms.states, nState)
|
||||
|
||||
}
|
||||
|
||||
func extractBit(bitset uint64, pos uint8) bool {
|
||||
shift := bitset >> pos
|
||||
bit := shift & 1
|
||||
return bit == uint64(1)
|
||||
}
|
||||
|
||||
func dist(left, right uint32) uint32 {
|
||||
if left > right {
|
||||
return left - right
|
||||
}
|
||||
return right - left
|
||||
}
|
||||
|
||||
type LevenshteinNFA struct {
|
||||
mDistance uint8
|
||||
damerau bool
|
||||
}
|
||||
|
||||
func newLevenshtein(maxD uint8, transposition bool) *LevenshteinNFA {
|
||||
return &LevenshteinNFA{mDistance: maxD,
|
||||
damerau: transposition,
|
||||
}
|
||||
}
|
||||
|
||||
func (la *LevenshteinNFA) maxDistance() uint8 {
|
||||
return la.mDistance
|
||||
}
|
||||
|
||||
func (la *LevenshteinNFA) msDiameter() uint8 {
|
||||
return 2*la.mDistance + 1
|
||||
}
|
||||
|
||||
func (la *LevenshteinNFA) initialStates() *MultiState {
|
||||
ms := MultiState{}
|
||||
nfaState := NFAState{}
|
||||
ms.addStates(nfaState)
|
||||
return &ms
|
||||
}
|
||||
|
||||
func (la *LevenshteinNFA) multistateDistance(ms *MultiState,
|
||||
queryLen uint32) Distance {
|
||||
minDistance := Atleast{d: la.mDistance + 1}
|
||||
for _, s := range ms.states {
|
||||
t := s.Distance + uint8(dist(queryLen, s.Offset))
|
||||
if t <= uint8(la.mDistance) {
|
||||
if minDistance.distance() > t {
|
||||
minDistance.d = t
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if minDistance.distance() == la.mDistance+1 {
|
||||
return Atleast{d: la.mDistance + 1}
|
||||
}
|
||||
|
||||
return minDistance
|
||||
}
|
||||
|
||||
func (la *LevenshteinNFA) simpleTransition(state NFAState,
|
||||
symbol uint64, ms *MultiState) {
|
||||
|
||||
if state.Distance < la.mDistance {
|
||||
// insertion
|
||||
ms.addStates(NFAState{Offset: state.Offset,
|
||||
Distance: state.Distance + 1,
|
||||
InTranspose: false})
|
||||
|
||||
// substitution
|
||||
ms.addStates(NFAState{Offset: state.Offset + 1,
|
||||
Distance: state.Distance + 1,
|
||||
InTranspose: false})
|
||||
|
||||
n := la.mDistance + 1 - state.Distance
|
||||
for d := uint8(1); d < n; d++ {
|
||||
if extractBit(symbol, d) {
|
||||
// for d > 0, as many deletion and character match
|
||||
ms.addStates(NFAState{Offset: state.Offset + 1 + uint32(d),
|
||||
Distance: state.Distance + d,
|
||||
InTranspose: false})
|
||||
}
|
||||
}
|
||||
|
||||
if la.damerau && extractBit(symbol, 1) {
|
||||
ms.addStates(NFAState{
|
||||
Offset: state.Offset,
|
||||
Distance: state.Distance + 1,
|
||||
InTranspose: true})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if extractBit(symbol, 0) {
|
||||
ms.addStates(NFAState{Offset: state.Offset + 1,
|
||||
Distance: state.Distance,
|
||||
InTranspose: false})
|
||||
}
|
||||
|
||||
if state.InTranspose && extractBit(symbol, 0) {
|
||||
ms.addStates(NFAState{Offset: state.Offset + 2,
|
||||
Distance: state.Distance,
|
||||
InTranspose: false})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (la *LevenshteinNFA) transition(cState *MultiState,
|
||||
dState *MultiState, scv uint64) {
|
||||
dState.Clear()
|
||||
mask := (uint64(1) << la.msDiameter()) - uint64(1)
|
||||
|
||||
for _, state := range cState.states {
|
||||
cv := (scv >> state.Offset) & mask
|
||||
la.simpleTransition(state, cv, dState)
|
||||
}
|
||||
|
||||
sort.Sort(NFAStates(dState.states))
|
||||
}
|
||||
|
||||
func (la *LevenshteinNFA) computeDistance(query, other []rune) Distance {
|
||||
cState := la.initialStates()
|
||||
nState := newMultiState()
|
||||
|
||||
for _, i := range other {
|
||||
nState.Clear()
|
||||
chi := characteristicVector(query, i)
|
||||
la.transition(cState, nState, chi)
|
||||
cState, nState = nState, cState
|
||||
}
|
||||
|
||||
return la.multistateDistance(cState, uint32(len(query)))
|
||||
}
|
349
vendor/github.com/couchbase/vellum/levenshtein2/parametric_dfa.go
generated
vendored
Normal file
349
vendor/github.com/couchbase/vellum/levenshtein2/parametric_dfa.go
generated
vendored
Normal file
|
@ -0,0 +1,349 @@
|
|||
// Copyright (c) 2018 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package levenshtein2
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
)
|
||||
|
||||
type ParametricState struct {
|
||||
shapeID uint32
|
||||
offset uint32
|
||||
}
|
||||
|
||||
func newParametricState() ParametricState {
|
||||
return ParametricState{}
|
||||
}
|
||||
|
||||
func (ps *ParametricState) isDeadEnd() bool {
|
||||
return ps.shapeID == 0
|
||||
}
|
||||
|
||||
type Transition struct {
|
||||
destShapeID uint32
|
||||
deltaOffset uint32
|
||||
}
|
||||
|
||||
func (t *Transition) apply(state ParametricState) ParametricState {
|
||||
ps := ParametricState{
|
||||
shapeID: t.destShapeID}
|
||||
// don't need any offset if we are in the dead state,
|
||||
// this ensures we have only one dead state.
|
||||
if t.destShapeID != 0 {
|
||||
ps.offset = state.offset + t.deltaOffset
|
||||
}
|
||||
|
||||
return ps
|
||||
}
|
||||
|
||||
type ParametricStateIndex struct {
|
||||
stateIndex []uint32
|
||||
stateQueue []ParametricState
|
||||
numOffsets uint32
|
||||
}
|
||||
|
||||
func newParametricStateIndex(queryLen,
|
||||
numParamState uint32) ParametricStateIndex {
|
||||
numOffsets := queryLen + 1
|
||||
if numParamState == 0 {
|
||||
numParamState = numOffsets
|
||||
}
|
||||
maxNumStates := numParamState * numOffsets
|
||||
psi := ParametricStateIndex{
|
||||
stateIndex: make([]uint32, maxNumStates),
|
||||
stateQueue: make([]ParametricState, 0, 150),
|
||||
numOffsets: numOffsets,
|
||||
}
|
||||
|
||||
for i := uint32(0); i < maxNumStates; i++ {
|
||||
psi.stateIndex[i] = math.MaxUint32
|
||||
}
|
||||
return psi
|
||||
}
|
||||
|
||||
func (psi *ParametricStateIndex) numStates() int {
|
||||
return len(psi.stateQueue)
|
||||
}
|
||||
|
||||
func (psi *ParametricStateIndex) maxNumStates() int {
|
||||
return len(psi.stateIndex)
|
||||
}
|
||||
|
||||
func (psi *ParametricStateIndex) get(stateID uint32) ParametricState {
|
||||
return psi.stateQueue[stateID]
|
||||
}
|
||||
|
||||
func (psi *ParametricStateIndex) getOrAllocate(ps ParametricState) uint32 {
|
||||
bucket := ps.shapeID*psi.numOffsets + ps.offset
|
||||
if bucket < uint32(len(psi.stateIndex)) &&
|
||||
psi.stateIndex[bucket] != math.MaxUint32 {
|
||||
return psi.stateIndex[bucket]
|
||||
}
|
||||
nState := uint32(len(psi.stateQueue))
|
||||
psi.stateQueue = append(psi.stateQueue, ps)
|
||||
|
||||
psi.stateIndex[bucket] = nState
|
||||
return nState
|
||||
}
|
||||
|
||||
type ParametricDFA struct {
|
||||
distance []uint8
|
||||
transitions []Transition
|
||||
maxDistance uint8
|
||||
transitionStride uint32
|
||||
diameter uint32
|
||||
}
|
||||
|
||||
func (pdfa *ParametricDFA) initialState() ParametricState {
|
||||
return ParametricState{shapeID: 1}
|
||||
}
|
||||
|
||||
// Returns true iff whatever characters come afterward,
|
||||
// we will never reach a shorter distance
|
||||
func (pdfa *ParametricDFA) isPrefixSink(state ParametricState, queryLen uint32) bool {
|
||||
if state.isDeadEnd() {
|
||||
return true
|
||||
}
|
||||
|
||||
remOffset := queryLen - state.offset
|
||||
if remOffset < pdfa.diameter {
|
||||
stateDistances := pdfa.distance[pdfa.diameter*state.shapeID:]
|
||||
prefixDistance := stateDistances[remOffset]
|
||||
if prefixDistance > pdfa.maxDistance {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, d := range stateDistances {
|
||||
if d < prefixDistance {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (pdfa *ParametricDFA) numStates() int {
|
||||
return len(pdfa.transitions) / int(pdfa.transitionStride)
|
||||
}
|
||||
|
||||
func min(x, y uint32) uint32 {
|
||||
if x < y {
|
||||
return x
|
||||
}
|
||||
return y
|
||||
}
|
||||
|
||||
func (pdfa *ParametricDFA) transition(state ParametricState,
|
||||
chi uint32) Transition {
|
||||
return pdfa.transitions[pdfa.transitionStride*state.shapeID+chi]
|
||||
}
|
||||
|
||||
func (pdfa *ParametricDFA) getDistance(state ParametricState,
|
||||
qLen uint32) Distance {
|
||||
remainingOffset := qLen - state.offset
|
||||
if state.isDeadEnd() || remainingOffset >= pdfa.diameter {
|
||||
return Atleast{d: pdfa.maxDistance + 1}
|
||||
}
|
||||
dist := pdfa.distance[int(pdfa.diameter*state.shapeID)+int(remainingOffset)]
|
||||
if dist > pdfa.maxDistance {
|
||||
return Atleast{d: dist}
|
||||
}
|
||||
return Exact{d: dist}
|
||||
}
|
||||
|
||||
func (pdfa *ParametricDFA) computeDistance(left, right string) Distance {
|
||||
state := pdfa.initialState()
|
||||
leftChars := []rune(left)
|
||||
for _, chr := range []rune(right) {
|
||||
start := state.offset
|
||||
stop := min(start+pdfa.diameter, uint32(len(leftChars)))
|
||||
chi := characteristicVector(leftChars[start:stop], chr)
|
||||
transition := pdfa.transition(state, uint32(chi))
|
||||
state = transition.apply(state)
|
||||
if state.isDeadEnd() {
|
||||
return Atleast{d: pdfa.maxDistance + 1}
|
||||
}
|
||||
}
|
||||
return pdfa.getDistance(state, uint32(len(left)))
|
||||
}
|
||||
|
||||
func (pdfa *ParametricDFA) buildDfa(query string, distance uint8,
|
||||
prefix bool) (*DFA, error) {
|
||||
qLen := uint32(len([]rune(query)))
|
||||
alphabet := queryChars(query)
|
||||
|
||||
psi := newParametricStateIndex(qLen, uint32(pdfa.numStates()))
|
||||
maxNumStates := psi.maxNumStates()
|
||||
deadEndStateID := psi.getOrAllocate(newParametricState())
|
||||
if deadEndStateID != 0 {
|
||||
return nil, fmt.Errorf("Invalid dead end state")
|
||||
}
|
||||
|
||||
initialStateID := psi.getOrAllocate(pdfa.initialState())
|
||||
dfaBuilder := withMaxStates(uint32(maxNumStates))
|
||||
mask := uint32((1 << pdfa.diameter) - 1)
|
||||
|
||||
var stateID int
|
||||
for stateID = 0; stateID < StateLimit; stateID++ {
|
||||
if stateID == psi.numStates() {
|
||||
break
|
||||
}
|
||||
state := psi.get(uint32(stateID))
|
||||
if prefix && pdfa.isPrefixSink(state, qLen) {
|
||||
distance := pdfa.getDistance(state, qLen)
|
||||
dfaBuilder.addState(uint32(stateID), uint32(stateID), distance)
|
||||
} else {
|
||||
transition := pdfa.transition(state, 0)
|
||||
defSuccessor := transition.apply(state)
|
||||
defSuccessorID := psi.getOrAllocate(defSuccessor)
|
||||
distance := pdfa.getDistance(state, qLen)
|
||||
stateBuilder, err := dfaBuilder.addState(uint32(stateID), defSuccessorID, distance)
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parametric_dfa: buildDfa, err: %v", err)
|
||||
}
|
||||
|
||||
alphabet.resetNext()
|
||||
chr, cv, err := alphabet.next()
|
||||
for err == nil {
|
||||
chi := cv.shiftAndMask(state.offset, mask)
|
||||
|
||||
transition := pdfa.transition(state, chi)
|
||||
|
||||
destState := transition.apply(state)
|
||||
|
||||
destStateID := psi.getOrAllocate(destState)
|
||||
|
||||
stateBuilder.addTransition(chr, destStateID)
|
||||
|
||||
chr, cv, err = alphabet.next()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if stateID == StateLimit {
|
||||
return nil, ErrTooManyStates
|
||||
}
|
||||
|
||||
dfaBuilder.setInitialState(initialStateID)
|
||||
return dfaBuilder.build(distance), nil
|
||||
}
|
||||
|
||||
func fromNfa(nfa *LevenshteinNFA) (*ParametricDFA, error) {
|
||||
lookUp := newHash()
|
||||
lookUp.getOrAllocate(*newMultiState())
|
||||
initialState := nfa.initialStates()
|
||||
lookUp.getOrAllocate(*initialState)
|
||||
|
||||
maxDistance := nfa.maxDistance()
|
||||
msDiameter := nfa.msDiameter()
|
||||
|
||||
numChi := 1 << msDiameter
|
||||
chiValues := make([]uint64, numChi)
|
||||
for i := 0; i < numChi; i++ {
|
||||
chiValues[i] = uint64(i)
|
||||
}
|
||||
|
||||
transitions := make([]Transition, 0, numChi*int(msDiameter))
|
||||
var stateID int
|
||||
for stateID = 0; stateID < StateLimit; stateID++ {
|
||||
if stateID == len(lookUp.items) {
|
||||
break
|
||||
}
|
||||
|
||||
for _, chi := range chiValues {
|
||||
destMs := newMultiState()
|
||||
|
||||
ms := lookUp.getFromID(stateID)
|
||||
|
||||
nfa.transition(ms, destMs, chi)
|
||||
|
||||
translation := destMs.normalize()
|
||||
|
||||
destID := lookUp.getOrAllocate(*destMs)
|
||||
|
||||
transitions = append(transitions, Transition{
|
||||
destShapeID: uint32(destID),
|
||||
deltaOffset: translation,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if stateID == StateLimit {
|
||||
return nil, ErrTooManyStates
|
||||
}
|
||||
|
||||
ns := len(lookUp.items)
|
||||
diameter := int(msDiameter)
|
||||
|
||||
distances := make([]uint8, 0, diameter*ns)
|
||||
for stateID := 0; stateID < ns; stateID++ {
|
||||
ms := lookUp.getFromID(stateID)
|
||||
for offset := 0; offset < diameter; offset++ {
|
||||
dist := nfa.multistateDistance(ms, uint32(offset))
|
||||
distances = append(distances, dist.distance())
|
||||
}
|
||||
}
|
||||
|
||||
return &ParametricDFA{
|
||||
diameter: uint32(msDiameter),
|
||||
transitions: transitions,
|
||||
maxDistance: maxDistance,
|
||||
transitionStride: uint32(numChi),
|
||||
distance: distances,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type hash struct {
|
||||
index map[[16]byte]int
|
||||
items []MultiState
|
||||
}
|
||||
|
||||
func newHash() *hash {
|
||||
return &hash{
|
||||
index: make(map[[16]byte]int, 100),
|
||||
items: make([]MultiState, 0, 100),
|
||||
}
|
||||
}
|
||||
|
||||
func (h *hash) getOrAllocate(m MultiState) int {
|
||||
size := len(h.items)
|
||||
var exists bool
|
||||
var pos int
|
||||
md5 := getHash(&m)
|
||||
if pos, exists = h.index[md5]; !exists {
|
||||
h.index[md5] = size
|
||||
pos = size
|
||||
h.items = append(h.items, m)
|
||||
}
|
||||
return pos
|
||||
}
|
||||
|
||||
func (h *hash) getFromID(id int) *MultiState {
|
||||
return &h.items[id]
|
||||
}
|
||||
|
||||
func getHash(ms *MultiState) [16]byte {
|
||||
msBytes := []byte{}
|
||||
for _, state := range ms.states {
|
||||
jsonBytes, _ := json.Marshal(&state)
|
||||
msBytes = append(msBytes, jsonBytes...)
|
||||
}
|
||||
return md5.Sum(msBytes)
|
||||
}
|
79
vendor/github.com/couchbase/vellum/regexp/compile.go
generated
vendored
79
vendor/github.com/couchbase/vellum/regexp/compile.go
generated
vendored
|
@ -18,17 +18,27 @@ import (
|
|||
"regexp/syntax"
|
||||
"unicode"
|
||||
|
||||
unicode_utf8 "unicode/utf8"
|
||||
|
||||
"github.com/couchbase/vellum/utf8"
|
||||
)
|
||||
|
||||
type compiler struct {
|
||||
sizeLimit uint
|
||||
insts prog
|
||||
instsPool []inst
|
||||
|
||||
sequences utf8.Sequences
|
||||
rangeStack utf8.RangeStack
|
||||
startBytes []byte
|
||||
endBytes []byte
|
||||
}
|
||||
|
||||
func newCompiler(sizeLimit uint) *compiler {
|
||||
return &compiler{
|
||||
sizeLimit: sizeLimit,
|
||||
sizeLimit: sizeLimit,
|
||||
startBytes: make([]byte, unicode_utf8.UTFMax),
|
||||
endBytes: make([]byte, unicode_utf8.UTFMax),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -37,13 +47,13 @@ func (c *compiler) compile(ast *syntax.Regexp) (prog, error) {
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c.insts = append(c.insts, &inst{
|
||||
op: OpMatch,
|
||||
})
|
||||
inst := c.allocInst()
|
||||
inst.op = OpMatch
|
||||
c.insts = append(c.insts, inst)
|
||||
return c.insts, nil
|
||||
}
|
||||
|
||||
func (c *compiler) c(ast *syntax.Regexp) error {
|
||||
func (c *compiler) c(ast *syntax.Regexp) (err error) {
|
||||
if ast.Flags&syntax.NonGreedy > 1 {
|
||||
return ErrNoLazy
|
||||
}
|
||||
|
@ -67,11 +77,12 @@ func (c *compiler) c(ast *syntax.Regexp) error {
|
|||
next.Rune = next.Rune0[0:2]
|
||||
return c.c(&next)
|
||||
}
|
||||
seqs, err := utf8.NewSequences(r, r)
|
||||
c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc(
|
||||
r, r, c.sequences, c.rangeStack, c.startBytes, c.endBytes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, seq := range seqs {
|
||||
for _, seq := range c.sequences {
|
||||
c.compileUtf8Ranges(seq)
|
||||
}
|
||||
}
|
||||
|
@ -106,8 +117,7 @@ func (c *compiler) c(ast *syntax.Regexp) error {
|
|||
if len(ast.Sub) == 0 {
|
||||
return nil
|
||||
}
|
||||
jmpsToEnd := []uint{}
|
||||
|
||||
jmpsToEnd := make([]uint, 0, len(ast.Sub)-1)
|
||||
// does not handle last entry
|
||||
for i := 0; i < len(ast.Sub)-1; i++ {
|
||||
sub := ast.Sub[i]
|
||||
|
@ -188,7 +198,8 @@ func (c *compiler) c(ast *syntax.Regexp) error {
|
|||
return err
|
||||
}
|
||||
}
|
||||
var splits, starts []uint
|
||||
splits := make([]uint, 0, ast.Max-ast.Min)
|
||||
starts := make([]uint, 0, ast.Max-ast.Min)
|
||||
for i := ast.Min; i < ast.Max; i++ {
|
||||
splits = append(splits, c.emptySplit())
|
||||
starts = append(starts, uint(len(c.insts)))
|
||||
|
@ -218,8 +229,7 @@ func (c *compiler) compileClass(ast *syntax.Regexp) error {
|
|||
if len(ast.Rune) == 0 {
|
||||
return nil
|
||||
}
|
||||
var jmps []uint
|
||||
|
||||
jmps := make([]uint, 0, len(ast.Rune)-2)
|
||||
// does not do last pair
|
||||
for i := 0; i < len(ast.Rune)-2; i += 2 {
|
||||
rstart := ast.Rune[i]
|
||||
|
@ -249,16 +259,16 @@ func (c *compiler) compileClass(ast *syntax.Regexp) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (c *compiler) compileClassRange(startR, endR rune) error {
|
||||
seqs, err := utf8.NewSequences(startR, endR)
|
||||
func (c *compiler) compileClassRange(startR, endR rune) (err error) {
|
||||
c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc(
|
||||
startR, endR, c.sequences, c.rangeStack, c.startBytes, c.endBytes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var jmps []uint
|
||||
|
||||
jmps := make([]uint, 0, len(c.sequences)-1)
|
||||
// does not do last entry
|
||||
for i := 0; i < len(seqs)-1; i++ {
|
||||
seq := seqs[i]
|
||||
for i := 0; i < len(c.sequences)-1; i++ {
|
||||
seq := c.sequences[i]
|
||||
split := c.emptySplit()
|
||||
j1 := c.top()
|
||||
c.compileUtf8Ranges(seq)
|
||||
|
@ -267,7 +277,7 @@ func (c *compiler) compileClassRange(startR, endR rune) error {
|
|||
c.setSplit(split, j1, j2)
|
||||
}
|
||||
// handle last entry
|
||||
c.compileUtf8Ranges(seqs[len(seqs)-1])
|
||||
c.compileUtf8Ranges(c.sequences[len(c.sequences)-1])
|
||||
end := c.top()
|
||||
for _, jmp := range jmps {
|
||||
c.setJump(jmp, end)
|
||||
|
@ -278,25 +288,25 @@ func (c *compiler) compileClassRange(startR, endR rune) error {
|
|||
|
||||
func (c *compiler) compileUtf8Ranges(seq utf8.Sequence) {
|
||||
for _, r := range seq {
|
||||
c.insts = append(c.insts, &inst{
|
||||
op: OpRange,
|
||||
rangeStart: r.Start,
|
||||
rangeEnd: r.End,
|
||||
})
|
||||
inst := c.allocInst()
|
||||
inst.op = OpRange
|
||||
inst.rangeStart = r.Start
|
||||
inst.rangeEnd = r.End
|
||||
c.insts = append(c.insts, inst)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *compiler) emptySplit() uint {
|
||||
c.insts = append(c.insts, &inst{
|
||||
op: OpSplit,
|
||||
})
|
||||
inst := c.allocInst()
|
||||
inst.op = OpSplit
|
||||
c.insts = append(c.insts, inst)
|
||||
return c.top() - 1
|
||||
}
|
||||
|
||||
func (c *compiler) emptyJump() uint {
|
||||
c.insts = append(c.insts, &inst{
|
||||
op: OpJmp,
|
||||
})
|
||||
inst := c.allocInst()
|
||||
inst.op = OpJmp
|
||||
c.insts = append(c.insts, inst)
|
||||
return c.top() - 1
|
||||
}
|
||||
|
||||
|
@ -314,3 +324,12 @@ func (c *compiler) setJump(i, pc uint) {
|
|||
func (c *compiler) top() uint {
|
||||
return uint(len(c.insts))
|
||||
}
|
||||
|
||||
func (c *compiler) allocInst() *inst {
|
||||
if len(c.instsPool) <= 0 {
|
||||
c.instsPool = make([]inst, 16)
|
||||
}
|
||||
inst := &c.instsPool[0]
|
||||
c.instsPool = c.instsPool[1:]
|
||||
return inst
|
||||
}
|
||||
|
|
38
vendor/github.com/couchbase/vellum/regexp/dfa.go
generated
vendored
38
vendor/github.com/couchbase/vellum/regexp/dfa.go
generated
vendored
|
@ -23,7 +23,7 @@ import (
|
|||
const StateLimit = 10000
|
||||
|
||||
// ErrTooManyStates is returned if you attempt to build a Levenshtein
|
||||
// automaton which requries too many states.
|
||||
// automaton which requires too many states.
|
||||
var ErrTooManyStates = fmt.Errorf("dfa contains more than %d states",
|
||||
StateLimit)
|
||||
|
||||
|
@ -37,12 +37,12 @@ func newDfaBuilder(insts prog) *dfaBuilder {
|
|||
d := &dfaBuilder{
|
||||
dfa: &dfa{
|
||||
insts: insts,
|
||||
states: make([]*state, 0, 16),
|
||||
states: make([]state, 0, 16),
|
||||
},
|
||||
cache: make(map[string]int, 1024),
|
||||
}
|
||||
// add 0 state that is invalid
|
||||
d.dfa.states = append(d.dfa.states, &state{
|
||||
d.dfa.states = append(d.dfa.states, state{
|
||||
next: make([]int, 256),
|
||||
match: false,
|
||||
})
|
||||
|
@ -54,13 +54,15 @@ func (d *dfaBuilder) build() (*dfa, error) {
|
|||
next := newSparseSet(uint(len(d.dfa.insts)))
|
||||
|
||||
d.dfa.add(cur, 0)
|
||||
states := intStack{d.cachedState(cur)}
|
||||
ns, instsReuse := d.cachedState(cur, nil)
|
||||
states := intStack{ns}
|
||||
seen := make(map[int]struct{})
|
||||
var s int
|
||||
states, s = states.Pop()
|
||||
for s != 0 {
|
||||
for b := 0; b < 256; b++ {
|
||||
ns := d.runState(cur, next, s, byte(b))
|
||||
var ns int
|
||||
ns, instsReuse = d.runState(cur, next, s, byte(b), instsReuse)
|
||||
if ns != 0 {
|
||||
if _, ok := seen[ns]; !ok {
|
||||
seen[ns] = struct{}{}
|
||||
|
@ -76,15 +78,17 @@ func (d *dfaBuilder) build() (*dfa, error) {
|
|||
return d.dfa, nil
|
||||
}
|
||||
|
||||
func (d *dfaBuilder) runState(cur, next *sparseSet, state int, b byte) int {
|
||||
func (d *dfaBuilder) runState(cur, next *sparseSet, state int, b byte, instsReuse []uint) (
|
||||
int, []uint) {
|
||||
cur.Clear()
|
||||
for _, ip := range d.dfa.states[state].insts {
|
||||
cur.Add(ip)
|
||||
}
|
||||
d.dfa.run(cur, next, b)
|
||||
nextState := d.cachedState(next)
|
||||
var nextState int
|
||||
nextState, instsReuse = d.cachedState(next, instsReuse)
|
||||
d.dfa.states[state].next[b] = nextState
|
||||
return nextState
|
||||
return nextState, instsReuse
|
||||
}
|
||||
|
||||
func instsKey(insts []uint, buf []byte) []byte {
|
||||
|
@ -99,8 +103,12 @@ func instsKey(insts []uint, buf []byte) []byte {
|
|||
return buf
|
||||
}
|
||||
|
||||
func (d *dfaBuilder) cachedState(set *sparseSet) int {
|
||||
var insts []uint
|
||||
func (d *dfaBuilder) cachedState(set *sparseSet,
|
||||
instsReuse []uint) (int, []uint) {
|
||||
insts := instsReuse[:0]
|
||||
if cap(insts) == 0 {
|
||||
insts = make([]uint, 0, set.Len())
|
||||
}
|
||||
var isMatch bool
|
||||
for i := uint(0); i < uint(set.Len()); i++ {
|
||||
ip := set.Get(i)
|
||||
|
@ -113,26 +121,26 @@ func (d *dfaBuilder) cachedState(set *sparseSet) int {
|
|||
}
|
||||
}
|
||||
if len(insts) == 0 {
|
||||
return 0
|
||||
return 0, insts
|
||||
}
|
||||
d.keyBuf = instsKey(insts, d.keyBuf)
|
||||
v, ok := d.cache[string(d.keyBuf)]
|
||||
if ok {
|
||||
return v
|
||||
return v, insts
|
||||
}
|
||||
d.dfa.states = append(d.dfa.states, &state{
|
||||
d.dfa.states = append(d.dfa.states, state{
|
||||
insts: insts,
|
||||
next: make([]int, 256),
|
||||
match: isMatch,
|
||||
})
|
||||
newV := len(d.dfa.states) - 1
|
||||
d.cache[string(d.keyBuf)] = newV
|
||||
return newV
|
||||
return newV, nil
|
||||
}
|
||||
|
||||
type dfa struct {
|
||||
insts prog
|
||||
states []*state
|
||||
states []state
|
||||
}
|
||||
|
||||
func (d *dfa) add(set *sparseSet, ip uint) {
|
||||
|
|
2
vendor/github.com/couchbase/vellum/regexp/inst.go
generated
vendored
2
vendor/github.com/couchbase/vellum/regexp/inst.go
generated
vendored
|
@ -27,7 +27,7 @@ const (
|
|||
OpRange
|
||||
)
|
||||
|
||||
// instSize is the approxmiate size of the an inst struct in bytes
|
||||
// instSize is the approximate size of the an inst struct in bytes
|
||||
const instSize = 40
|
||||
|
||||
type inst struct {
|
||||
|
|
10
vendor/github.com/couchbase/vellum/regexp/regexp.go
generated
vendored
10
vendor/github.com/couchbase/vellum/regexp/regexp.go
generated
vendored
|
@ -35,6 +35,8 @@ var ErrNoLazy = fmt.Errorf("lazy quantifiers are not allowed")
|
|||
// too many instructions
|
||||
var ErrCompiledTooBig = fmt.Errorf("too many instructions")
|
||||
|
||||
var DefaultLimit = uint(10 * (1 << 20))
|
||||
|
||||
// Regexp implements the vellum.Automaton interface for matcing a user
|
||||
// specified regular expression.
|
||||
type Regexp struct {
|
||||
|
@ -47,7 +49,7 @@ type Regexp struct {
|
|||
// compiled finite state automaton. If this size is exceeded,
|
||||
// ErrCompiledTooBig will be returned.
|
||||
func New(expr string) (*Regexp, error) {
|
||||
return NewWithLimit(expr, 10*(1<<20))
|
||||
return NewWithLimit(expr, DefaultLimit)
|
||||
}
|
||||
|
||||
// NewRegexpWithLimit creates a new Regular Expression automaton with
|
||||
|
@ -59,6 +61,10 @@ func NewWithLimit(expr string, size uint) (*Regexp, error) {
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return NewParsedWithLimit(expr, parsed, size)
|
||||
}
|
||||
|
||||
func NewParsedWithLimit(expr string, parsed *syntax.Regexp, size uint) (*Regexp, error) {
|
||||
compiler := newCompiler(size)
|
||||
insts, err := compiler.compile(parsed)
|
||||
if err != nil {
|
||||
|
@ -103,7 +109,7 @@ func (r *Regexp) WillAlwaysMatch(int) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// Accept returns the new state, resulting from the transite byte b
|
||||
// Accept returns the new state, resulting from the transition byte b
|
||||
// when currently in the state s.
|
||||
func (r *Regexp) Accept(s int, b byte) int {
|
||||
if s < len(r.dfa.states) {
|
||||
|
|
36
vendor/github.com/couchbase/vellum/registry.go
generated
vendored
36
vendor/github.com/couchbase/vellum/registry.go
generated
vendored
|
@ -14,39 +14,35 @@
|
|||
|
||||
package vellum
|
||||
|
||||
import (
|
||||
"hash"
|
||||
"hash/fnv"
|
||||
)
|
||||
|
||||
type registryCell struct {
|
||||
addr int
|
||||
node *builderNode
|
||||
}
|
||||
|
||||
type registry struct {
|
||||
table []registryCell
|
||||
tableSize uint
|
||||
mruSize uint
|
||||
hasher hash.Hash64
|
||||
builderNodePool *builderNodePool
|
||||
table []registryCell
|
||||
tableSize uint
|
||||
mruSize uint
|
||||
}
|
||||
|
||||
func newRegistry(tableSize, mruSize int) *registry {
|
||||
func newRegistry(p *builderNodePool, tableSize, mruSize int) *registry {
|
||||
nsize := tableSize * mruSize
|
||||
rv := ®istry{
|
||||
table: make([]registryCell, nsize),
|
||||
tableSize: uint(tableSize),
|
||||
mruSize: uint(mruSize),
|
||||
hasher: fnv.New64a(),
|
||||
builderNodePool: p,
|
||||
table: make([]registryCell, nsize),
|
||||
tableSize: uint(tableSize),
|
||||
mruSize: uint(mruSize),
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (r *registry) Reset() {
|
||||
for i := 0; i < len(r.table); i++ {
|
||||
r.table[i] = registryCell{}
|
||||
var empty registryCell
|
||||
for i := range r.table {
|
||||
r.builderNodePool.Put(r.table[i].node)
|
||||
r.table[i] = empty
|
||||
}
|
||||
r.hasher.Reset()
|
||||
}
|
||||
|
||||
func (r *registry) entry(node *builderNode) (bool, int, *registryCell) {
|
||||
|
@ -57,7 +53,7 @@ func (r *registry) entry(node *builderNode) (bool, int, *registryCell) {
|
|||
start := r.mruSize * uint(bucket)
|
||||
end := start + r.mruSize
|
||||
rc := registryCache(r.table[start:end])
|
||||
return rc.entry(node)
|
||||
return rc.entry(node, r.builderNodePool)
|
||||
}
|
||||
|
||||
const fnvPrime = 1099511628211
|
||||
|
@ -81,11 +77,12 @@ func (r *registry) hash(b *builderNode) int {
|
|||
|
||||
type registryCache []registryCell
|
||||
|
||||
func (r registryCache) entry(node *builderNode) (bool, int, *registryCell) {
|
||||
func (r registryCache) entry(node *builderNode, pool *builderNodePool) (bool, int, *registryCell) {
|
||||
if len(r) == 1 {
|
||||
if r[0].node != nil && r[0].node.equiv(node) {
|
||||
return true, r[0].addr, nil
|
||||
}
|
||||
pool.Put(r[0].node)
|
||||
r[0].node = node
|
||||
return false, 0, &r[0]
|
||||
}
|
||||
|
@ -98,6 +95,7 @@ func (r registryCache) entry(node *builderNode) (bool, int, *registryCell) {
|
|||
}
|
||||
// no match
|
||||
last := len(r) - 1
|
||||
pool.Put(r[last].node)
|
||||
r[last].node = node // discard LRU
|
||||
r.promote(last)
|
||||
return false, 0, &r[0]
|
||||
|
|
98
vendor/github.com/couchbase/vellum/utf8/utf8.go
generated
vendored
98
vendor/github.com/couchbase/vellum/utf8/utf8.go
generated
vendored
|
@ -25,19 +25,39 @@ type Sequences []Sequence
|
|||
// NewSequences constructs a collection of Sequence which describe the
|
||||
// byte ranges covered between the start and end runes.
|
||||
func NewSequences(start, end rune) (Sequences, error) {
|
||||
var rv Sequences
|
||||
rv, _, err := NewSequencesPrealloc(start, end, nil, nil, nil, nil)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
var rangeStack rangeStack
|
||||
rangeStack = rangeStack.Push(&scalarRange{start, end})
|
||||
func NewSequencesPrealloc(start, end rune,
|
||||
preallocSequences Sequences,
|
||||
preallocRangeStack RangeStack,
|
||||
preallocStartBytes, preallocEndBytes []byte) (Sequences, RangeStack, error) {
|
||||
rv := preallocSequences[:0]
|
||||
|
||||
startBytes := preallocStartBytes
|
||||
if cap(startBytes) < utf8.UTFMax {
|
||||
startBytes = make([]byte, utf8.UTFMax)
|
||||
}
|
||||
startBytes = startBytes[:utf8.UTFMax]
|
||||
|
||||
endBytes := preallocEndBytes
|
||||
if cap(endBytes) < utf8.UTFMax {
|
||||
endBytes = make([]byte, utf8.UTFMax)
|
||||
}
|
||||
endBytes = endBytes[:utf8.UTFMax]
|
||||
|
||||
rangeStack := preallocRangeStack[:0]
|
||||
rangeStack = rangeStack.Push(scalarRange{start, end})
|
||||
|
||||
rangeStack, r := rangeStack.Pop()
|
||||
TOP:
|
||||
for r != nil {
|
||||
for r != nilScalarRange {
|
||||
INNER:
|
||||
for {
|
||||
r1, r2 := r.split()
|
||||
if r1 != nil {
|
||||
rangeStack = rangeStack.Push(&scalarRange{r2.start, r2.end})
|
||||
if r1 != nilScalarRange {
|
||||
rangeStack = rangeStack.Push(scalarRange{r2.start, r2.end})
|
||||
r.start = r1.start
|
||||
r.end = r1.end
|
||||
continue INNER
|
||||
|
@ -49,13 +69,13 @@ TOP:
|
|||
for i := 1; i < utf8.UTFMax; i++ {
|
||||
max := maxScalarValue(i)
|
||||
if r.start <= max && max < r.end {
|
||||
rangeStack = rangeStack.Push(&scalarRange{max + 1, r.end})
|
||||
rangeStack = rangeStack.Push(scalarRange{max + 1, r.end})
|
||||
r.end = max
|
||||
continue INNER
|
||||
}
|
||||
}
|
||||
asciiRange := r.ascii()
|
||||
if asciiRange != nil {
|
||||
if asciiRange != nilRange {
|
||||
rv = append(rv, Sequence{
|
||||
asciiRange,
|
||||
})
|
||||
|
@ -66,23 +86,21 @@ TOP:
|
|||
m := rune((1 << (6 * i)) - 1)
|
||||
if (r.start & ^m) != (r.end & ^m) {
|
||||
if (r.start & m) != 0 {
|
||||
rangeStack = rangeStack.Push(&scalarRange{(r.start | m) + 1, r.end})
|
||||
rangeStack = rangeStack.Push(scalarRange{(r.start | m) + 1, r.end})
|
||||
r.end = r.start | m
|
||||
continue INNER
|
||||
}
|
||||
if (r.end & m) != m {
|
||||
rangeStack = rangeStack.Push(&scalarRange{r.end & ^m, r.end})
|
||||
rangeStack = rangeStack.Push(scalarRange{r.end & ^m, r.end})
|
||||
r.end = (r.end & ^m) - 1
|
||||
continue INNER
|
||||
}
|
||||
}
|
||||
}
|
||||
start := make([]byte, utf8.UTFMax)
|
||||
end := make([]byte, utf8.UTFMax)
|
||||
n, m := r.encode(start, end)
|
||||
seq, err := SequenceFromEncodedRange(start[0:n], end[0:m])
|
||||
n, m := r.encode(startBytes, endBytes)
|
||||
seq, err := SequenceFromEncodedRange(startBytes[0:n], endBytes[0:m])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, nil, err
|
||||
}
|
||||
rv = append(rv, seq)
|
||||
rangeStack, r = rangeStack.Pop()
|
||||
|
@ -90,11 +108,11 @@ TOP:
|
|||
}
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
return rv, rangeStack, nil
|
||||
}
|
||||
|
||||
// Sequence is a collection of *Range
|
||||
type Sequence []*Range
|
||||
// Sequence is a collection of Range
|
||||
type Sequence []Range
|
||||
|
||||
// SequenceFromEncodedRange creates sequence from the encoded bytes
|
||||
func SequenceFromEncodedRange(start, end []byte) (Sequence, error) {
|
||||
|
@ -104,21 +122,21 @@ func SequenceFromEncodedRange(start, end []byte) (Sequence, error) {
|
|||
switch len(start) {
|
||||
case 2:
|
||||
return Sequence{
|
||||
&Range{start[0], end[0]},
|
||||
&Range{start[1], end[1]},
|
||||
Range{start[0], end[0]},
|
||||
Range{start[1], end[1]},
|
||||
}, nil
|
||||
case 3:
|
||||
return Sequence{
|
||||
&Range{start[0], end[0]},
|
||||
&Range{start[1], end[1]},
|
||||
&Range{start[2], end[2]},
|
||||
Range{start[0], end[0]},
|
||||
Range{start[1], end[1]},
|
||||
Range{start[2], end[2]},
|
||||
}, nil
|
||||
case 4:
|
||||
return Sequence{
|
||||
&Range{start[0], end[0]},
|
||||
&Range{start[1], end[1]},
|
||||
&Range{start[2], end[2]},
|
||||
&Range{start[3], end[3]},
|
||||
Range{start[0], end[0]},
|
||||
Range{start[1], end[1]},
|
||||
Range{start[2], end[2]},
|
||||
Range{start[3], end[3]},
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
@ -159,6 +177,8 @@ type Range struct {
|
|||
End byte
|
||||
}
|
||||
|
||||
var nilRange = Range{0xff, 0}
|
||||
|
||||
func (u Range) matches(b byte) bool {
|
||||
if u.Start <= b && b <= u.End {
|
||||
return true
|
||||
|
@ -178,37 +198,39 @@ type scalarRange struct {
|
|||
end rune
|
||||
}
|
||||
|
||||
var nilScalarRange = scalarRange{0xffff, 0}
|
||||
|
||||
func (s *scalarRange) String() string {
|
||||
return fmt.Sprintf("ScalarRange(%d,%d)", s.start, s.end)
|
||||
}
|
||||
|
||||
// split this scalar range if it overlaps with a surrogate codepoint
|
||||
func (s *scalarRange) split() (*scalarRange, *scalarRange) {
|
||||
func (s *scalarRange) split() (scalarRange, scalarRange) {
|
||||
if s.start < 0xe000 && s.end > 0xd7ff {
|
||||
return &scalarRange{
|
||||
return scalarRange{
|
||||
start: s.start,
|
||||
end: 0xd7ff,
|
||||
},
|
||||
&scalarRange{
|
||||
scalarRange{
|
||||
start: 0xe000,
|
||||
end: s.end,
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
return nilScalarRange, nilScalarRange
|
||||
}
|
||||
|
||||
func (s *scalarRange) valid() bool {
|
||||
return s.start <= s.end
|
||||
}
|
||||
|
||||
func (s *scalarRange) ascii() *Range {
|
||||
func (s *scalarRange) ascii() Range {
|
||||
if s.valid() && s.end <= 0x7f {
|
||||
return &Range{
|
||||
return Range{
|
||||
Start: byte(s.start),
|
||||
End: byte(s.end),
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return nilRange
|
||||
}
|
||||
|
||||
// start and end MUST have capacity for utf8.UTFMax bytes
|
||||
|
@ -218,16 +240,16 @@ func (s *scalarRange) encode(start, end []byte) (int, int) {
|
|||
return n, m
|
||||
}
|
||||
|
||||
type rangeStack []*scalarRange
|
||||
type RangeStack []scalarRange
|
||||
|
||||
func (s rangeStack) Push(v *scalarRange) rangeStack {
|
||||
func (s RangeStack) Push(v scalarRange) RangeStack {
|
||||
return append(s, v)
|
||||
}
|
||||
|
||||
func (s rangeStack) Pop() (rangeStack, *scalarRange) {
|
||||
func (s RangeStack) Pop() (RangeStack, scalarRange) {
|
||||
l := len(s)
|
||||
if l < 1 {
|
||||
return s, nil
|
||||
return s, nilScalarRange
|
||||
}
|
||||
return s[:l-1], s[l-1]
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue