Migrate to dep (#3972)
* Update makefile to use dep * Migrate to dep * Fix some deps * Try to find a better version for golang.org/x/net * Try to find a better version for golang.org/x/oauth2
This commit is contained in:
parent
d7fd9bf7bb
commit
3f3383dc0a
281 changed files with 12024 additions and 32676 deletions
285
vendor/github.com/blevesearch/segment/segment_words.rl
generated
vendored
285
vendor/github.com/blevesearch/segment/segment_words.rl
generated
vendored
|
@ -1,285 +0,0 @@
|
|||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
// +build BUILDTAGS
|
||||
|
||||
package segment
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
var RagelFlags = "RAGELFLAGS"
|
||||
|
||||
var ParseError = fmt.Errorf("unicode word segmentation parse error")
|
||||
|
||||
// Word Types
|
||||
const (
|
||||
None = iota
|
||||
Number
|
||||
Letter
|
||||
Kana
|
||||
Ideo
|
||||
)
|
||||
|
||||
%%{
|
||||
machine s;
|
||||
write data;
|
||||
}%%
|
||||
|
||||
func segmentWords(data []byte, maxTokens int, atEOF bool, val [][]byte, types []int) ([][]byte, []int, int, error) {
|
||||
cs, p, pe := 0, 0, len(data)
|
||||
cap := maxTokens
|
||||
if cap < 0 {
|
||||
cap = 1000
|
||||
}
|
||||
if val == nil {
|
||||
val = make([][]byte, 0, cap)
|
||||
}
|
||||
if types == nil {
|
||||
types = make([]int, 0, cap)
|
||||
}
|
||||
|
||||
// added for scanner
|
||||
ts := 0
|
||||
te := 0
|
||||
act := 0
|
||||
eof := pe
|
||||
_ = ts // compiler not happy
|
||||
_ = te
|
||||
_ = act
|
||||
|
||||
// our state
|
||||
startPos := 0
|
||||
endPos := 0
|
||||
totalConsumed := 0
|
||||
%%{
|
||||
|
||||
include SCRIPTS "ragel/uscript.rl";
|
||||
include WB "ragel/uwb.rl";
|
||||
|
||||
action startToken {
|
||||
startPos = p
|
||||
}
|
||||
|
||||
action endToken {
|
||||
endPos = p
|
||||
}
|
||||
|
||||
action finishNumericToken {
|
||||
if !atEOF {
|
||||
return val, types, totalConsumed, nil
|
||||
}
|
||||
|
||||
val = append(val, data[startPos:endPos+1])
|
||||
types = append(types, Number)
|
||||
totalConsumed = endPos+1
|
||||
if maxTokens > 0 && len(val) >= maxTokens {
|
||||
return val, types, totalConsumed, nil
|
||||
}
|
||||
}
|
||||
|
||||
action finishHangulToken {
|
||||
if endPos+1 == pe && !atEOF {
|
||||
return val, types, totalConsumed, nil
|
||||
} else if dr, size := utf8.DecodeRune(data[endPos+1:]); dr == utf8.RuneError && size == 1 {
|
||||
return val, types, totalConsumed, nil
|
||||
}
|
||||
|
||||
val = append(val, data[startPos:endPos+1])
|
||||
types = append(types, Letter)
|
||||
totalConsumed = endPos+1
|
||||
if maxTokens > 0 && len(val) >= maxTokens {
|
||||
return val, types, totalConsumed, nil
|
||||
}
|
||||
}
|
||||
|
||||
action finishKatakanaToken {
|
||||
if endPos+1 == pe && !atEOF {
|
||||
return val, types, totalConsumed, nil
|
||||
} else if dr, size := utf8.DecodeRune(data[endPos+1:]); dr == utf8.RuneError && size == 1 {
|
||||
return val, types, totalConsumed, nil
|
||||
}
|
||||
|
||||
val = append(val, data[startPos:endPos+1])
|
||||
types = append(types, Ideo)
|
||||
totalConsumed = endPos+1
|
||||
if maxTokens > 0 && len(val) >= maxTokens {
|
||||
return val, types, totalConsumed, nil
|
||||
}
|
||||
}
|
||||
|
||||
action finishWordToken {
|
||||
if !atEOF {
|
||||
return val, types, totalConsumed, nil
|
||||
}
|
||||
val = append(val, data[startPos:endPos+1])
|
||||
types = append(types, Letter)
|
||||
totalConsumed = endPos+1
|
||||
if maxTokens > 0 && len(val) >= maxTokens {
|
||||
return val, types, totalConsumed, nil
|
||||
}
|
||||
}
|
||||
|
||||
action finishHanToken {
|
||||
if endPos+1 == pe && !atEOF {
|
||||
return val, types, totalConsumed, nil
|
||||
} else if dr, size := utf8.DecodeRune(data[endPos+1:]); dr == utf8.RuneError && size == 1 {
|
||||
return val, types, totalConsumed, nil
|
||||
}
|
||||
|
||||
val = append(val, data[startPos:endPos+1])
|
||||
types = append(types, Ideo)
|
||||
totalConsumed = endPos+1
|
||||
if maxTokens > 0 && len(val) >= maxTokens {
|
||||
return val, types, totalConsumed, nil
|
||||
}
|
||||
}
|
||||
|
||||
action finishHiraganaToken {
|
||||
if endPos+1 == pe && !atEOF {
|
||||
return val, types, totalConsumed, nil
|
||||
} else if dr, size := utf8.DecodeRune(data[endPos+1:]); dr == utf8.RuneError && size == 1 {
|
||||
return val, types, totalConsumed, nil
|
||||
}
|
||||
|
||||
val = append(val, data[startPos:endPos+1])
|
||||
types = append(types, Ideo)
|
||||
totalConsumed = endPos+1
|
||||
if maxTokens > 0 && len(val) >= maxTokens {
|
||||
return val, types, totalConsumed, nil
|
||||
}
|
||||
}
|
||||
|
||||
action finishNoneToken {
|
||||
lastPos := startPos
|
||||
for lastPos <= endPos {
|
||||
_, size := utf8.DecodeRune(data[lastPos:])
|
||||
lastPos += size
|
||||
}
|
||||
endPos = lastPos -1
|
||||
p = endPos
|
||||
|
||||
if endPos+1 == pe && !atEOF {
|
||||
return val, types, totalConsumed, nil
|
||||
} else if dr, size := utf8.DecodeRune(data[endPos+1:]); dr == utf8.RuneError && size == 1 {
|
||||
return val, types, totalConsumed, nil
|
||||
}
|
||||
// otherwise, consume this as well
|
||||
val = append(val, data[startPos:endPos+1])
|
||||
types = append(types, None)
|
||||
totalConsumed = endPos+1
|
||||
if maxTokens > 0 && len(val) >= maxTokens {
|
||||
return val, types, totalConsumed, nil
|
||||
}
|
||||
}
|
||||
|
||||
HangulEx = Hangul ( Extend | Format )*;
|
||||
HebrewOrALetterEx = ( Hebrew_Letter | ALetter ) ( Extend | Format )*;
|
||||
NumericEx = Numeric ( Extend | Format )*;
|
||||
KatakanaEx = Katakana ( Extend | Format )*;
|
||||
MidLetterEx = ( MidLetter | MidNumLet | Single_Quote ) ( Extend | Format )*;
|
||||
MidNumericEx = ( MidNum | MidNumLet | Single_Quote ) ( Extend | Format )*;
|
||||
ExtendNumLetEx = ExtendNumLet ( Extend | Format )*;
|
||||
HanEx = Han ( Extend | Format )*;
|
||||
HiraganaEx = Hiragana ( Extend | Format )*;
|
||||
SingleQuoteEx = Single_Quote ( Extend | Format )*;
|
||||
DoubleQuoteEx = Double_Quote ( Extend | Format )*;
|
||||
HebrewLetterEx = Hebrew_Letter ( Extend | Format )*;
|
||||
RegionalIndicatorEx = Regional_Indicator ( Extend | Format )*;
|
||||
NLCRLF = Newline | CR | LF;
|
||||
OtherEx = ^(NLCRLF) ( Extend | Format )* ;
|
||||
|
||||
# UAX#29 WB8. Numeric × Numeric
|
||||
# WB11. Numeric (MidNum | MidNumLet | Single_Quote) × Numeric
|
||||
# WB12. Numeric × (MidNum | MidNumLet | Single_Quote) Numeric
|
||||
# WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
|
||||
# WB13b. ExtendNumLet × (ALetter | Hebrew_Letter | Numeric | Katakana)
|
||||
#
|
||||
WordNumeric = ( ( ExtendNumLetEx )* NumericEx ( ( ( ExtendNumLetEx )* | MidNumericEx ) NumericEx )* ( ExtendNumLetEx )* ) >startToken @endToken;
|
||||
|
||||
# subset of the below for typing purposes only!
|
||||
WordHangul = ( HangulEx )+ >startToken @endToken;
|
||||
WordKatakana = ( KatakanaEx )+ >startToken @endToken;
|
||||
|
||||
# UAX#29 WB5. (ALetter | Hebrew_Letter) × (ALetter | Hebrew_Letter)
|
||||
# WB6. (ALetter | Hebrew_Letter) × (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter)
|
||||
# WB7. (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote) × (ALetter | Hebrew_Letter)
|
||||
# WB7a. Hebrew_Letter × Single_Quote
|
||||
# WB7b. Hebrew_Letter × Double_Quote Hebrew_Letter
|
||||
# WB7c. Hebrew_Letter Double_Quote × Hebrew_Letter
|
||||
# WB9. (ALetter | Hebrew_Letter) × Numeric
|
||||
# WB10. Numeric × (ALetter | Hebrew_Letter)
|
||||
# WB13. Katakana × Katakana
|
||||
# WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
|
||||
# WB13b. ExtendNumLet × (ALetter | Hebrew_Letter | Numeric | Katakana)
|
||||
#
|
||||
# Marty -deviated here to allow for (ExtendNumLetEx x ExtendNumLetEx) part of 13a
|
||||
#
|
||||
Word = ( ( ExtendNumLetEx )* ( KatakanaEx ( ( ExtendNumLetEx )* KatakanaEx )*
|
||||
| ( HebrewLetterEx ( SingleQuoteEx | DoubleQuoteEx HebrewLetterEx )
|
||||
| NumericEx ( ( ( ExtendNumLetEx )* | MidNumericEx ) NumericEx )*
|
||||
| HebrewOrALetterEx ( ( ( ExtendNumLetEx )* | MidLetterEx ) HebrewOrALetterEx )*
|
||||
|ExtendNumLetEx
|
||||
)+
|
||||
)
|
||||
(
|
||||
( ExtendNumLetEx )+ ( KatakanaEx ( ( ExtendNumLetEx )* KatakanaEx )*
|
||||
| ( HebrewLetterEx ( SingleQuoteEx | DoubleQuoteEx HebrewLetterEx )
|
||||
| NumericEx ( ( ( ExtendNumLetEx )* | MidNumericEx ) NumericEx )*
|
||||
| HebrewOrALetterEx ( ( ( ExtendNumLetEx )* | MidLetterEx ) HebrewOrALetterEx )*
|
||||
)+
|
||||
)
|
||||
)* ExtendNumLetEx*) >startToken @endToken;
|
||||
|
||||
# UAX#29 WB14. Any ÷ Any
|
||||
WordHan = HanEx >startToken @endToken;
|
||||
WordHiragana = HiraganaEx >startToken @endToken;
|
||||
|
||||
WordExt = ( ( Extend | Format )* ) >startToken @endToken; # maybe plus not star
|
||||
|
||||
WordCRLF = (CR LF) >startToken @endToken;
|
||||
|
||||
WordCR = CR >startToken @endToken;
|
||||
|
||||
WordLF = LF >startToken @endToken;
|
||||
|
||||
WordNL = Newline >startToken @endToken;
|
||||
|
||||
WordRegional = (RegionalIndicatorEx+) >startToken @endToken;
|
||||
|
||||
Other = OtherEx >startToken @endToken;
|
||||
|
||||
main := |*
|
||||
WordNumeric => finishNumericToken;
|
||||
WordHangul => finishHangulToken;
|
||||
WordKatakana => finishKatakanaToken;
|
||||
Word => finishWordToken;
|
||||
WordHan => finishHanToken;
|
||||
WordHiragana => finishHiraganaToken;
|
||||
WordRegional =>finishNoneToken;
|
||||
WordCRLF => finishNoneToken;
|
||||
WordCR => finishNoneToken;
|
||||
WordLF => finishNoneToken;
|
||||
WordNL => finishNoneToken;
|
||||
WordExt => finishNoneToken;
|
||||
Other => finishNoneToken;
|
||||
*|;
|
||||
|
||||
write init;
|
||||
write exec;
|
||||
}%%
|
||||
|
||||
if cs < s_first_final {
|
||||
return val, types, totalConsumed, ParseError
|
||||
}
|
||||
|
||||
return val, types, totalConsumed, nil
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue