deps: update and fix chardet import (#9351)
This commit is contained in:
parent
74179d1b5e
commit
81a52442a1
16 changed files with 9 additions and 10 deletions
83
vendor/github.com/gogs/chardet/recognizer.go
generated
vendored
Normal file
83
vendor/github.com/gogs/chardet/recognizer.go
generated
vendored
Normal file
|
@ -0,0 +1,83 @@
|
|||
package chardet
|
||||
|
||||
type recognizer interface {
|
||||
Match(*recognizerInput) recognizerOutput
|
||||
}
|
||||
|
||||
type recognizerOutput Result
|
||||
|
||||
type recognizerInput struct {
|
||||
raw []byte
|
||||
input []byte
|
||||
tagStripped bool
|
||||
byteStats []int
|
||||
hasC1Bytes bool
|
||||
}
|
||||
|
||||
func newRecognizerInput(raw []byte, stripTag bool) *recognizerInput {
|
||||
input, stripped := mayStripInput(raw, stripTag)
|
||||
byteStats := computeByteStats(input)
|
||||
return &recognizerInput{
|
||||
raw: raw,
|
||||
input: input,
|
||||
tagStripped: stripped,
|
||||
byteStats: byteStats,
|
||||
hasC1Bytes: computeHasC1Bytes(byteStats),
|
||||
}
|
||||
}
|
||||
|
||||
func mayStripInput(raw []byte, stripTag bool) (out []byte, stripped bool) {
|
||||
const inputBufferSize = 8192
|
||||
out = make([]byte, 0, inputBufferSize)
|
||||
var badTags, openTags int32
|
||||
var inMarkup bool = false
|
||||
stripped = false
|
||||
if stripTag {
|
||||
stripped = true
|
||||
for _, c := range raw {
|
||||
if c == '<' {
|
||||
if inMarkup {
|
||||
badTags += 1
|
||||
}
|
||||
inMarkup = true
|
||||
openTags += 1
|
||||
}
|
||||
if !inMarkup {
|
||||
out = append(out, c)
|
||||
if len(out) >= inputBufferSize {
|
||||
break
|
||||
}
|
||||
}
|
||||
if c == '>' {
|
||||
inMarkup = false
|
||||
}
|
||||
}
|
||||
}
|
||||
if openTags < 5 || openTags/5 < badTags || (len(out) < 100 && len(raw) > 600) {
|
||||
limit := len(raw)
|
||||
if limit > inputBufferSize {
|
||||
limit = inputBufferSize
|
||||
}
|
||||
out = make([]byte, limit)
|
||||
copy(out, raw[:limit])
|
||||
stripped = false
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func computeByteStats(input []byte) []int {
|
||||
r := make([]int, 256)
|
||||
for _, c := range input {
|
||||
r[c] += 1
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func computeHasC1Bytes(byteStats []int) bool {
|
||||
for _, count := range byteStats[0x80 : 0x9F+1] {
|
||||
if count > 0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue