Update to last common bleve (#3986)
This commit is contained in:
parent
1b7cd3d0b0
commit
917b9641ec
184 changed files with 39576 additions and 121 deletions
316
vendor/github.com/couchbase/vellum/regexp/compile.go
generated
vendored
Normal file
316
vendor/github.com/couchbase/vellum/regexp/compile.go
generated
vendored
Normal file
|
@ -0,0 +1,316 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package regexp
|
||||
|
||||
import (
|
||||
"regexp/syntax"
|
||||
"unicode"
|
||||
|
||||
"github.com/couchbase/vellum/utf8"
|
||||
)
|
||||
|
||||
type compiler struct {
|
||||
sizeLimit uint
|
||||
insts prog
|
||||
}
|
||||
|
||||
func newCompiler(sizeLimit uint) *compiler {
|
||||
return &compiler{
|
||||
sizeLimit: sizeLimit,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *compiler) compile(ast *syntax.Regexp) (prog, error) {
|
||||
err := c.c(ast)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c.insts = append(c.insts, &inst{
|
||||
op: OpMatch,
|
||||
})
|
||||
return c.insts, nil
|
||||
}
|
||||
|
||||
func (c *compiler) c(ast *syntax.Regexp) error {
|
||||
if ast.Flags&syntax.NonGreedy > 1 {
|
||||
return ErrNoLazy
|
||||
}
|
||||
|
||||
switch ast.Op {
|
||||
case syntax.OpEndLine, syntax.OpBeginLine,
|
||||
syntax.OpBeginText, syntax.OpEndText:
|
||||
return ErrNoEmpty
|
||||
case syntax.OpWordBoundary, syntax.OpNoWordBoundary:
|
||||
return ErrNoWordBoundary
|
||||
case syntax.OpEmptyMatch:
|
||||
return nil
|
||||
case syntax.OpLiteral:
|
||||
for _, r := range ast.Rune {
|
||||
if ast.Flags&syntax.FoldCase > 0 {
|
||||
next := syntax.Regexp{
|
||||
Op: syntax.OpCharClass,
|
||||
Flags: ast.Flags & syntax.FoldCase,
|
||||
Rune0: [2]rune{r, r},
|
||||
}
|
||||
next.Rune = next.Rune0[0:2]
|
||||
return c.c(&next)
|
||||
}
|
||||
seqs, err := utf8.NewSequences(r, r)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, seq := range seqs {
|
||||
c.compileUtf8Ranges(seq)
|
||||
}
|
||||
}
|
||||
case syntax.OpAnyChar:
|
||||
next := syntax.Regexp{
|
||||
Op: syntax.OpCharClass,
|
||||
Flags: ast.Flags & syntax.FoldCase,
|
||||
Rune0: [2]rune{0, unicode.MaxRune},
|
||||
}
|
||||
next.Rune = next.Rune0[:2]
|
||||
return c.c(&next)
|
||||
case syntax.OpAnyCharNotNL:
|
||||
next := syntax.Regexp{
|
||||
Op: syntax.OpCharClass,
|
||||
Flags: ast.Flags & syntax.FoldCase,
|
||||
Rune: []rune{0, 0x09, 0x0B, unicode.MaxRune},
|
||||
}
|
||||
return c.c(&next)
|
||||
case syntax.OpCharClass:
|
||||
return c.compileClass(ast)
|
||||
case syntax.OpCapture:
|
||||
return c.c(ast.Sub[0])
|
||||
case syntax.OpConcat:
|
||||
for _, sub := range ast.Sub {
|
||||
err := c.c(sub)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
case syntax.OpAlternate:
|
||||
if len(ast.Sub) == 0 {
|
||||
return nil
|
||||
}
|
||||
jmpsToEnd := []uint{}
|
||||
|
||||
// does not handle last entry
|
||||
for i := 0; i < len(ast.Sub)-1; i++ {
|
||||
sub := ast.Sub[i]
|
||||
split := c.emptySplit()
|
||||
j1 := c.top()
|
||||
err := c.c(sub)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
jmpsToEnd = append(jmpsToEnd, c.emptyJump())
|
||||
j2 := c.top()
|
||||
c.setSplit(split, j1, j2)
|
||||
}
|
||||
// handle last entry
|
||||
err := c.c(ast.Sub[len(ast.Sub)-1])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
end := uint(len(c.insts))
|
||||
for _, jmpToEnd := range jmpsToEnd {
|
||||
c.setJump(jmpToEnd, end)
|
||||
}
|
||||
case syntax.OpQuest:
|
||||
split := c.emptySplit()
|
||||
j1 := c.top()
|
||||
err := c.c(ast.Sub[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
j2 := c.top()
|
||||
c.setSplit(split, j1, j2)
|
||||
|
||||
case syntax.OpStar:
|
||||
j1 := c.top()
|
||||
split := c.emptySplit()
|
||||
j2 := c.top()
|
||||
err := c.c(ast.Sub[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
jmp := c.emptyJump()
|
||||
j3 := uint(len(c.insts))
|
||||
|
||||
c.setJump(jmp, j1)
|
||||
c.setSplit(split, j2, j3)
|
||||
|
||||
case syntax.OpPlus:
|
||||
j1 := c.top()
|
||||
err := c.c(ast.Sub[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
split := c.emptySplit()
|
||||
j2 := c.top()
|
||||
c.setSplit(split, j1, j2)
|
||||
|
||||
case syntax.OpRepeat:
|
||||
if ast.Max == -1 {
|
||||
for i := 0; i < ast.Min; i++ {
|
||||
err := c.c(ast.Sub[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
next := syntax.Regexp{
|
||||
Op: syntax.OpStar,
|
||||
Flags: ast.Flags,
|
||||
Sub: ast.Sub,
|
||||
Sub0: ast.Sub0,
|
||||
Rune: ast.Rune,
|
||||
Rune0: ast.Rune0,
|
||||
}
|
||||
return c.c(&next)
|
||||
}
|
||||
for i := 0; i < ast.Min; i++ {
|
||||
err := c.c(ast.Sub[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
var splits, starts []uint
|
||||
for i := ast.Min; i < ast.Max; i++ {
|
||||
splits = append(splits, c.emptySplit())
|
||||
starts = append(starts, uint(len(c.insts)))
|
||||
err := c.c(ast.Sub[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
end := uint(len(c.insts))
|
||||
for i := 0; i < len(splits); i++ {
|
||||
c.setSplit(splits[i], starts[i], end)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return c.checkSize()
|
||||
}
|
||||
|
||||
func (c *compiler) checkSize() error {
|
||||
if uint(len(c.insts)*instSize) > c.sizeLimit {
|
||||
return ErrCompiledTooBig
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *compiler) compileClass(ast *syntax.Regexp) error {
|
||||
if len(ast.Rune) == 0 {
|
||||
return nil
|
||||
}
|
||||
var jmps []uint
|
||||
|
||||
// does not do last pair
|
||||
for i := 0; i < len(ast.Rune)-2; i += 2 {
|
||||
rstart := ast.Rune[i]
|
||||
rend := ast.Rune[i+1]
|
||||
|
||||
split := c.emptySplit()
|
||||
j1 := c.top()
|
||||
err := c.compileClassRange(rstart, rend)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
jmps = append(jmps, c.emptyJump())
|
||||
j2 := c.top()
|
||||
c.setSplit(split, j1, j2)
|
||||
}
|
||||
// handle last pair
|
||||
rstart := ast.Rune[len(ast.Rune)-2]
|
||||
rend := ast.Rune[len(ast.Rune)-1]
|
||||
err := c.compileClassRange(rstart, rend)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
end := c.top()
|
||||
for _, jmp := range jmps {
|
||||
c.setJump(jmp, end)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *compiler) compileClassRange(startR, endR rune) error {
|
||||
seqs, err := utf8.NewSequences(startR, endR)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var jmps []uint
|
||||
|
||||
// does not do last entry
|
||||
for i := 0; i < len(seqs)-1; i++ {
|
||||
seq := seqs[i]
|
||||
split := c.emptySplit()
|
||||
j1 := c.top()
|
||||
c.compileUtf8Ranges(seq)
|
||||
jmps = append(jmps, c.emptyJump())
|
||||
j2 := c.top()
|
||||
c.setSplit(split, j1, j2)
|
||||
}
|
||||
// handle last entry
|
||||
c.compileUtf8Ranges(seqs[len(seqs)-1])
|
||||
end := c.top()
|
||||
for _, jmp := range jmps {
|
||||
c.setJump(jmp, end)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *compiler) compileUtf8Ranges(seq utf8.Sequence) {
|
||||
for _, r := range seq {
|
||||
c.insts = append(c.insts, &inst{
|
||||
op: OpRange,
|
||||
rangeStart: r.Start,
|
||||
rangeEnd: r.End,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func (c *compiler) emptySplit() uint {
|
||||
c.insts = append(c.insts, &inst{
|
||||
op: OpSplit,
|
||||
})
|
||||
return c.top() - 1
|
||||
}
|
||||
|
||||
func (c *compiler) emptyJump() uint {
|
||||
c.insts = append(c.insts, &inst{
|
||||
op: OpJmp,
|
||||
})
|
||||
return c.top() - 1
|
||||
}
|
||||
|
||||
func (c *compiler) setSplit(i, pc1, pc2 uint) {
|
||||
split := c.insts[i]
|
||||
split.splitA = pc1
|
||||
split.splitB = pc2
|
||||
}
|
||||
|
||||
func (c *compiler) setJump(i, pc uint) {
|
||||
jmp := c.insts[i]
|
||||
jmp.to = pc
|
||||
}
|
||||
|
||||
func (c *compiler) top() uint {
|
||||
return uint(len(c.insts))
|
||||
}
|
188
vendor/github.com/couchbase/vellum/regexp/dfa.go
generated
vendored
Normal file
188
vendor/github.com/couchbase/vellum/regexp/dfa.go
generated
vendored
Normal file
|
@ -0,0 +1,188 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package regexp
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// StateLimit is the maximum number of states allowed
|
||||
const StateLimit = 10000
|
||||
|
||||
// ErrTooManyStates is returned if you attempt to build a Levenshtein
|
||||
// automaton which requries too many states.
|
||||
var ErrTooManyStates = fmt.Errorf("dfa contains more than %d states",
|
||||
StateLimit)
|
||||
|
||||
type dfaBuilder struct {
|
||||
dfa *dfa
|
||||
cache map[string]int
|
||||
keyBuf []byte
|
||||
}
|
||||
|
||||
func newDfaBuilder(insts prog) *dfaBuilder {
|
||||
d := &dfaBuilder{
|
||||
dfa: &dfa{
|
||||
insts: insts,
|
||||
states: make([]*state, 0, 16),
|
||||
},
|
||||
cache: make(map[string]int, 1024),
|
||||
}
|
||||
// add 0 state that is invalid
|
||||
d.dfa.states = append(d.dfa.states, &state{
|
||||
next: make([]int, 256),
|
||||
match: false,
|
||||
})
|
||||
return d
|
||||
}
|
||||
|
||||
func (d *dfaBuilder) build() (*dfa, error) {
|
||||
cur := newSparseSet(uint(len(d.dfa.insts)))
|
||||
next := newSparseSet(uint(len(d.dfa.insts)))
|
||||
|
||||
d.dfa.add(cur, 0)
|
||||
states := intStack{d.cachedState(cur)}
|
||||
seen := make(map[int]struct{})
|
||||
var s int
|
||||
states, s = states.Pop()
|
||||
for s != 0 {
|
||||
for b := 0; b < 256; b++ {
|
||||
ns := d.runState(cur, next, s, byte(b))
|
||||
if ns != 0 {
|
||||
if _, ok := seen[ns]; !ok {
|
||||
seen[ns] = struct{}{}
|
||||
states = states.Push(ns)
|
||||
}
|
||||
}
|
||||
if len(d.dfa.states) > StateLimit {
|
||||
return nil, ErrTooManyStates
|
||||
}
|
||||
}
|
||||
states, s = states.Pop()
|
||||
}
|
||||
return d.dfa, nil
|
||||
}
|
||||
|
||||
func (d *dfaBuilder) runState(cur, next *sparseSet, state int, b byte) int {
|
||||
cur.Clear()
|
||||
for _, ip := range d.dfa.states[state].insts {
|
||||
cur.Add(ip)
|
||||
}
|
||||
d.dfa.run(cur, next, b)
|
||||
nextState := d.cachedState(next)
|
||||
d.dfa.states[state].next[b] = nextState
|
||||
return nextState
|
||||
}
|
||||
|
||||
func instsKey(insts []uint, buf []byte) []byte {
|
||||
if cap(buf) < 8*len(insts) {
|
||||
buf = make([]byte, 8*len(insts))
|
||||
} else {
|
||||
buf = buf[0 : 8*len(insts)]
|
||||
}
|
||||
for i, inst := range insts {
|
||||
binary.LittleEndian.PutUint64(buf[i*8:], uint64(inst))
|
||||
}
|
||||
return buf
|
||||
}
|
||||
|
||||
func (d *dfaBuilder) cachedState(set *sparseSet) int {
|
||||
var insts []uint
|
||||
var isMatch bool
|
||||
for i := uint(0); i < uint(set.Len()); i++ {
|
||||
ip := set.Get(i)
|
||||
switch d.dfa.insts[ip].op {
|
||||
case OpRange:
|
||||
insts = append(insts, ip)
|
||||
case OpMatch:
|
||||
isMatch = true
|
||||
insts = append(insts, ip)
|
||||
}
|
||||
}
|
||||
if len(insts) == 0 {
|
||||
return 0
|
||||
}
|
||||
d.keyBuf = instsKey(insts, d.keyBuf)
|
||||
v, ok := d.cache[string(d.keyBuf)]
|
||||
if ok {
|
||||
return v
|
||||
}
|
||||
d.dfa.states = append(d.dfa.states, &state{
|
||||
insts: insts,
|
||||
next: make([]int, 256),
|
||||
match: isMatch,
|
||||
})
|
||||
newV := len(d.dfa.states) - 1
|
||||
d.cache[string(d.keyBuf)] = newV
|
||||
return newV
|
||||
}
|
||||
|
||||
type dfa struct {
|
||||
insts prog
|
||||
states []*state
|
||||
}
|
||||
|
||||
func (d *dfa) add(set *sparseSet, ip uint) {
|
||||
if set.Contains(ip) {
|
||||
return
|
||||
}
|
||||
set.Add(ip)
|
||||
switch d.insts[ip].op {
|
||||
case OpJmp:
|
||||
d.add(set, d.insts[ip].to)
|
||||
case OpSplit:
|
||||
d.add(set, d.insts[ip].splitA)
|
||||
d.add(set, d.insts[ip].splitB)
|
||||
}
|
||||
}
|
||||
|
||||
func (d *dfa) run(from, to *sparseSet, b byte) bool {
|
||||
to.Clear()
|
||||
var isMatch bool
|
||||
for i := uint(0); i < uint(from.Len()); i++ {
|
||||
ip := from.Get(i)
|
||||
switch d.insts[ip].op {
|
||||
case OpMatch:
|
||||
isMatch = true
|
||||
case OpRange:
|
||||
if d.insts[ip].rangeStart <= b &&
|
||||
b <= d.insts[ip].rangeEnd {
|
||||
d.add(to, ip+1)
|
||||
}
|
||||
}
|
||||
}
|
||||
return isMatch
|
||||
}
|
||||
|
||||
type state struct {
|
||||
insts []uint
|
||||
next []int
|
||||
match bool
|
||||
}
|
||||
|
||||
type intStack []int
|
||||
|
||||
func (s intStack) Push(v int) intStack {
|
||||
return append(s, v)
|
||||
}
|
||||
|
||||
func (s intStack) Pop() (intStack, int) {
|
||||
l := len(s)
|
||||
if l < 1 {
|
||||
return s, 0
|
||||
}
|
||||
return s[:l-1], s[l-1]
|
||||
}
|
62
vendor/github.com/couchbase/vellum/regexp/inst.go
generated
vendored
Normal file
62
vendor/github.com/couchbase/vellum/regexp/inst.go
generated
vendored
Normal file
|
@ -0,0 +1,62 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package regexp
|
||||
|
||||
import "fmt"
|
||||
|
||||
// instOp represents a instruction operation
|
||||
type instOp int
|
||||
|
||||
// the enumeration of operations
|
||||
const (
|
||||
OpMatch instOp = iota
|
||||
OpJmp
|
||||
OpSplit
|
||||
OpRange
|
||||
)
|
||||
|
||||
// instSize is the approxmiate size of the an inst struct in bytes
|
||||
const instSize = 40
|
||||
|
||||
type inst struct {
|
||||
op instOp
|
||||
to uint
|
||||
splitA uint
|
||||
splitB uint
|
||||
rangeStart byte
|
||||
rangeEnd byte
|
||||
}
|
||||
|
||||
func (i *inst) String() string {
|
||||
switch i.op {
|
||||
case OpJmp:
|
||||
return fmt.Sprintf("JMP: %d", i.to)
|
||||
case OpSplit:
|
||||
return fmt.Sprintf("SPLIT: %d - %d", i.splitA, i.splitB)
|
||||
case OpRange:
|
||||
return fmt.Sprintf("RANGE: %x - %x", i.rangeStart, i.rangeEnd)
|
||||
}
|
||||
return "MATCH"
|
||||
}
|
||||
|
||||
type prog []*inst
|
||||
|
||||
func (p prog) String() string {
|
||||
rv := "\n"
|
||||
for i, pi := range p {
|
||||
rv += fmt.Sprintf("%d %v\n", i, pi)
|
||||
}
|
||||
return rv
|
||||
}
|
113
vendor/github.com/couchbase/vellum/regexp/regexp.go
generated
vendored
Normal file
113
vendor/github.com/couchbase/vellum/regexp/regexp.go
generated
vendored
Normal file
|
@ -0,0 +1,113 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package regexp
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp/syntax"
|
||||
)
|
||||
|
||||
// ErrNoEmpty returned when "zero width assertions" are used
|
||||
var ErrNoEmpty = fmt.Errorf("zero width assertions not allowed")
|
||||
|
||||
// ErrNoWordBoundary returned when word boundaries are used
|
||||
var ErrNoWordBoundary = fmt.Errorf("word boundaries are not allowed")
|
||||
|
||||
// ErrNoBytes returned when byte literals are used
|
||||
var ErrNoBytes = fmt.Errorf("byte literals are not allowed")
|
||||
|
||||
// ErrNoLazy returned when lazy quantifiers are used
|
||||
var ErrNoLazy = fmt.Errorf("lazy quantifiers are not allowed")
|
||||
|
||||
// ErrCompiledTooBig returned when regular expression parses into
|
||||
// too many instructions
|
||||
var ErrCompiledTooBig = fmt.Errorf("too many instructions")
|
||||
|
||||
// Regexp implements the vellum.Automaton interface for matcing a user
|
||||
// specified regular expression.
|
||||
type Regexp struct {
|
||||
orig string
|
||||
dfa *dfa
|
||||
}
|
||||
|
||||
// NewRegexp creates a new Regular Expression automaton with the specified
|
||||
// expression. By default it is limited to approximately 10MB for the
|
||||
// compiled finite state automaton. If this size is exceeded,
|
||||
// ErrCompiledTooBig will be returned.
|
||||
func New(expr string) (*Regexp, error) {
|
||||
return NewWithLimit(expr, 10*(1<<20))
|
||||
}
|
||||
|
||||
// NewRegexpWithLimit creates a new Regular Expression automaton with
|
||||
// the specified expression. The size of the compiled finite state
|
||||
// automaton exceeds the user specified size, ErrCompiledTooBig will be
|
||||
// returned.
|
||||
func NewWithLimit(expr string, size uint) (*Regexp, error) {
|
||||
parsed, err := syntax.Parse(expr, syntax.Perl)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
compiler := newCompiler(size)
|
||||
insts, err := compiler.compile(parsed)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dfaBuilder := newDfaBuilder(insts)
|
||||
dfa, err := dfaBuilder.build()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &Regexp{
|
||||
orig: expr,
|
||||
dfa: dfa,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Start returns the start state of this automaton.
|
||||
func (r *Regexp) Start() int {
|
||||
return 1
|
||||
}
|
||||
|
||||
// IsMatch returns if the specified state is a matching state.
|
||||
func (r *Regexp) IsMatch(s int) bool {
|
||||
if s < len(r.dfa.states) {
|
||||
return r.dfa.states[s].match
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// CanMatch returns if the specified state can ever transition to a matching
|
||||
// state.
|
||||
func (r *Regexp) CanMatch(s int) bool {
|
||||
if s < len(r.dfa.states) && s > 0 {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// WillAlwaysMatch returns if the specified state will always end in a
|
||||
// matching state.
|
||||
func (r *Regexp) WillAlwaysMatch(int) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// Accept returns the new state, resulting from the transite byte b
|
||||
// when currently in the state s.
|
||||
func (r *Regexp) Accept(s int, b byte) int {
|
||||
if s < len(r.dfa.states) {
|
||||
return r.dfa.states[s].next[b]
|
||||
}
|
||||
return 0
|
||||
}
|
54
vendor/github.com/couchbase/vellum/regexp/sparse.go
generated
vendored
Normal file
54
vendor/github.com/couchbase/vellum/regexp/sparse.go
generated
vendored
Normal file
|
@ -0,0 +1,54 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package regexp
|
||||
|
||||
type sparseSet struct {
|
||||
dense []uint
|
||||
sparse []uint
|
||||
size uint
|
||||
}
|
||||
|
||||
func newSparseSet(size uint) *sparseSet {
|
||||
return &sparseSet{
|
||||
dense: make([]uint, size),
|
||||
sparse: make([]uint, size),
|
||||
size: 0,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *sparseSet) Len() int {
|
||||
return int(s.size)
|
||||
}
|
||||
|
||||
func (s *sparseSet) Add(ip uint) uint {
|
||||
i := s.size
|
||||
s.dense[i] = ip
|
||||
s.sparse[ip] = i
|
||||
s.size++
|
||||
return i
|
||||
}
|
||||
|
||||
func (s *sparseSet) Get(i uint) uint {
|
||||
return s.dense[i]
|
||||
}
|
||||
|
||||
func (s *sparseSet) Contains(ip uint) bool {
|
||||
i := s.sparse[ip]
|
||||
return i < s.size && s.dense[i] == ip
|
||||
}
|
||||
|
||||
func (s *sparseSet) Clear() {
|
||||
s.size = 0
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue