update: macaron cores,gzip,session (#10522)

Co-authored-by: zeripath <art27@cantab.net>
This commit is contained in:
6543 2020-02-28 10:51:18 +01:00 committed by GitHub
parent 694f44660f
commit 8d2059a201
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
47 changed files with 2154 additions and 349 deletions

View file

@ -48,6 +48,8 @@ const (
maxHashOffset = 1 << 24
skipNever = math.MaxInt32
debugDeflate = false
)
type compressionLevel struct {
@ -59,15 +61,13 @@ type compressionLevel struct {
// See https://blog.klauspost.com/rebalancing-deflate-compression-levels/
var levels = []compressionLevel{
{}, // 0
// Level 1-4 uses specialized algorithm - values not used
// Level 1-6 uses specialized algorithm - values not used
{0, 0, 0, 0, 0, 1},
{0, 0, 0, 0, 0, 2},
{0, 0, 0, 0, 0, 3},
{0, 0, 0, 0, 0, 4},
// For levels 5-6 we don't bother trying with lazy matches.
// Lazy matching is at least 30% slower, with 1.5% increase.
{6, 0, 12, 8, 12, 5},
{8, 0, 24, 16, 16, 6},
{0, 0, 0, 0, 0, 5},
{0, 0, 0, 0, 0, 6},
// Levels 7-9 use increasingly more lazy matching
// and increasingly stringent conditions for "good enough".
{8, 8, 24, 16, skipNever, 7},
@ -203,9 +203,8 @@ func (d *compressor) writeBlockSkip(tok *tokens, index int, eof bool) error {
// This is much faster than doing a full encode.
// Should only be used after a start/reset.
func (d *compressor) fillWindow(b []byte) {
// Do not fill window if we are in store-only mode,
// use constant or Snappy compression.
if d.level == 0 {
// Do not fill window if we are in store-only or huffman mode.
if d.level <= 0 {
return
}
if d.fast != nil {
@ -368,7 +367,7 @@ func (d *compressor) deflateLazy() {
// Sanity enables additional runtime tests.
// It's intended to be used during development
// to supplement the currently ad-hoc unit tests.
const sanity = false
const sanity = debugDeflate
if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync {
return
@ -644,7 +643,7 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
d.fill = (*compressor).fillBlock
d.step = (*compressor).store
case level == ConstantCompression:
d.w.logReusePenalty = uint(4)
d.w.logNewTablePenalty = 4
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillBlock
d.step = (*compressor).storeHuff
@ -652,13 +651,13 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
level = 5
fallthrough
case level >= 1 && level <= 6:
d.w.logReusePenalty = uint(level + 1)
d.w.logNewTablePenalty = 6
d.fast = newFastEnc(level)
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillBlock
d.step = (*compressor).storeFast
case 7 <= level && level <= 9:
d.w.logReusePenalty = uint(level)
d.w.logNewTablePenalty = 10
d.state = &advancedState{}
d.compressionLevel = levels[level]
d.initDeflate()
@ -667,6 +666,7 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
default:
return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level)
}
d.level = level
return nil
}
@ -720,6 +720,7 @@ func (d *compressor) close() error {
return d.w.err
}
d.w.flush()
d.w.reset(nil)
return d.w.err
}
@ -750,8 +751,7 @@ func NewWriter(w io.Writer, level int) (*Writer, error) {
// can only be decompressed by a Reader initialized with the
// same dictionary.
func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) {
dw := &dictWriter{w}
zw, err := NewWriter(dw, level)
zw, err := NewWriter(w, level)
if err != nil {
return nil, err
}
@ -760,14 +760,6 @@ func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) {
return zw, err
}
type dictWriter struct {
w io.Writer
}
func (w *dictWriter) Write(b []byte) (n int, err error) {
return w.w.Write(b)
}
// A Writer takes data written to it and writes the compressed
// form of that data to an underlying writer (see NewWriter).
type Writer struct {
@ -805,11 +797,12 @@ func (w *Writer) Close() error {
// the result of NewWriter or NewWriterDict called with dst
// and w's level and dictionary.
func (w *Writer) Reset(dst io.Writer) {
if dw, ok := w.d.w.writer.(*dictWriter); ok {
if len(w.dict) > 0 {
// w was created with NewWriterDict
dw.w = dst
w.d.reset(dw)
w.d.fillWindow(w.dict)
w.d.reset(dst)
if dst != nil {
w.d.fillWindow(w.dict)
}
} else {
// w was created with NewWriter
w.d.reset(dst)

View file

@ -35,17 +35,17 @@ func newFastEnc(level int) fastEnc {
}
const (
tableBits = 16 // Bits used in the table
tableBits = 15 // Bits used in the table
tableSize = 1 << tableBits // Size of the table
tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
baseMatchOffset = 1 // The smallest match offset
baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5
maxMatchOffset = 1 << 15 // The largest match offset
bTableBits = 18 // Bits used in the big tables
bTableSize = 1 << bTableBits // Size of the table
allocHistory = maxMatchOffset * 10 // Size to preallocate for history.
bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize // Reset the buffer offset when reaching this.
bTableBits = 17 // Bits used in the big tables
bTableSize = 1 << bTableBits // Size of the table
allocHistory = maxStoreBlockSize * 10 // Size to preallocate for history.
bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this.
)
const (
@ -92,7 +92,6 @@ func hash(u uint32) uint32 {
}
type tableEntry struct {
val uint32
offset int32
}
@ -210,16 +209,14 @@ func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 {
// Reset the encoding table.
func (e *fastGen) Reset() {
if cap(e.hist) < int(maxMatchOffset*8) {
l := maxMatchOffset * 8
// Make it at least 1MB.
if l < 1<<20 {
l = 1 << 20
}
e.hist = make([]byte, 0, l)
if cap(e.hist) < allocHistory {
e.hist = make([]byte, 0, allocHistory)
}
// We offset current position so everything will be out of reach.
// If we are above the buffer reset it will be cleared anyway since len(hist) == 0.
if e.cur <= bufferReset {
e.cur += maxMatchOffset + int32(len(e.hist))
}
// We offset current position so everything will be out of reach
e.cur += maxMatchOffset + int32(len(e.hist))
e.hist = e.hist[:0]
}

View file

@ -0,0 +1,274 @@
// +build generate
//go:generate go run $GOFILE && gofmt -w inflate_gen.go
package main
import (
"os"
"strings"
)
func main() {
f, err := os.Create("inflate_gen.go")
if err != nil {
panic(err)
}
defer f.Close()
types := []string{"*bytes.Buffer", "*bytes.Reader", "*bufio.Reader", "*strings.Reader"}
names := []string{"BytesBuffer", "BytesReader", "BufioReader", "StringsReader"}
imports := []string{"bytes", "bufio", "io", "strings", "math/bits"}
f.WriteString(`// Code generated by go generate gen_inflate.go. DO NOT EDIT.
package flate
import (
`)
for _, imp := range imports {
f.WriteString("\t\"" + imp + "\"\n")
}
f.WriteString(")\n\n")
template := `
// Decode a single Huffman block from f.
// hl and hd are the Huffman states for the lit/length values
// and the distance values, respectively. If hd == nil, using the
// fixed distance encoding associated with fixed Huffman blocks.
func (f *decompressor) $FUNCNAME$() {
const (
stateInit = iota // Zero value must be stateInit
stateDict
)
fr := f.r.($TYPE$)
moreBits := func() error {
c, err := fr.ReadByte()
if err != nil {
return noEOF(err)
}
f.roffset++
f.b |= uint32(c) << f.nb
f.nb += 8
return nil
}
switch f.stepState {
case stateInit:
goto readLiteral
case stateDict:
goto copyHistory
}
readLiteral:
// Read literal and/or (length, distance) according to RFC section 3.2.3.
{
var v int
{
// Inlined v, err := f.huffSym(f.hl)
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
// with single element, huffSym must error on these two edge cases. In both
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(f.hl.maxRead)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
nb, b := f.nb, f.b
for {
for nb < n {
c, err := fr.ReadByte()
if err != nil {
f.b = b
f.nb = nb
f.err = noEOF(err)
return
}
f.roffset++
b |= uint32(c) << (nb & 31)
nb += 8
}
chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
n = uint(chunk & huffmanCountMask)
}
if n <= nb {
if n == 0 {
f.b = b
f.nb = nb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
f.b = b >> (n & 31)
f.nb = nb - n
v = int(chunk >> huffmanValueShift)
break
}
}
}
var n uint // number of bits extra
var length int
var err error
switch {
case v < 256:
f.dict.writeByte(byte(v))
if f.dict.availWrite() == 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).$FUNCNAME$
f.stepState = stateInit
return
}
goto readLiteral
case v == 256:
f.finishBlock()
return
// otherwise, reference to older data
case v < 265:
length = v - (257 - 3)
n = 0
case v < 269:
length = v*2 - (265*2 - 11)
n = 1
case v < 273:
length = v*4 - (269*4 - 19)
n = 2
case v < 277:
length = v*8 - (273*8 - 35)
n = 3
case v < 281:
length = v*16 - (277*16 - 67)
n = 4
case v < 285:
length = v*32 - (281*32 - 131)
n = 5
case v < maxNumLit:
length = 258
n = 0
default:
if debugDecode {
fmt.Println(v, ">= maxNumLit")
}
f.err = CorruptInputError(f.roffset)
return
}
if n > 0 {
for f.nb < n {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits n>0:", err)
}
f.err = err
return
}
}
length += int(f.b & uint32(1<<n-1))
f.b >>= n
f.nb -= n
}
var dist int
if f.hd == nil {
for f.nb < 5 {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<5:", err)
}
f.err = err
return
}
}
dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
f.b >>= 5
f.nb -= 5
} else {
if dist, err = f.huffSym(f.hd); err != nil {
if debugDecode {
fmt.Println("huffsym:", err)
}
f.err = err
return
}
}
switch {
case dist < 4:
dist++
case dist < maxNumDist:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << nb
for f.nb < nb {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<nb:", err)
}
f.err = err
return
}
}
extra |= int(f.b & uint32(1<<nb-1))
f.b >>= nb
f.nb -= nb
dist = 1<<(nb+1) + 1 + extra
default:
if debugDecode {
fmt.Println("dist too big:", dist, maxNumDist)
}
f.err = CorruptInputError(f.roffset)
return
}
// No check on length; encoding can be prescient.
if dist > f.dict.histSize() {
if debugDecode {
fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
}
f.err = CorruptInputError(f.roffset)
return
}
f.copyLen, f.copyDist = length, dist
goto copyHistory
}
copyHistory:
// Perform a backwards copy according to RFC section 3.2.3.
{
cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
if cnt == 0 {
cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
}
f.copyLen -= cnt
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).$FUNCNAME$ // We need to continue this work
f.stepState = stateDict
return
}
goto readLiteral
}
}
`
for i, t := range types {
s := strings.Replace(template, "$FUNCNAME$", "huffman"+names[i], -1)
s = strings.Replace(s, "$TYPE$", t, -1)
f.WriteString(s)
}
f.WriteString("func (f *decompressor) huffmanBlockDecoder() func() {\n")
f.WriteString("\tswitch f.r.(type) {\n")
for i, t := range types {
f.WriteString("\t\tcase " + t + ":\n")
f.WriteString("\t\t\treturn f.huffman" + names[i] + "\n")
}
f.WriteString("\t\tdefault:\n")
f.WriteString("\t\t\treturn f.huffmanBlockGeneric")
f.WriteString("\t}\n}\n")
}

View file

@ -93,12 +93,12 @@ type huffmanBitWriter struct {
err error
lastHeader int
// Set between 0 (reused block can be up to 2x the size)
logReusePenalty uint
lastHuffMan bool
bytes [256]byte
literalFreq [lengthCodesStart + 32]uint16
offsetFreq [32]uint16
codegenFreq [codegenCodeCount]uint16
logNewTablePenalty uint
lastHuffMan bool
bytes [256]byte
literalFreq [lengthCodesStart + 32]uint16
offsetFreq [32]uint16
codegenFreq [codegenCodeCount]uint16
// codegen must have an extra space for the final symbol.
codegen [literalCount + offsetCodeCount + 1]uint8
@ -119,7 +119,7 @@ type huffmanBitWriter struct {
// If lastHuffMan is set, a table for outputting literals has been generated and offsets are invalid.
//
// An incoming block estimates the output size of a new table using a 'fresh' by calculating the
// optimal size and adding a penalty in 'logReusePenalty'.
// optimal size and adding a penalty in 'logNewTablePenalty'.
// A Huffman table is not optimal, which is why we add a penalty, and generating a new table
// is slower both for compression and decompression.
@ -135,7 +135,6 @@ func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
func (w *huffmanBitWriter) reset(writer io.Writer) {
w.writer = writer
w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil
w.bytes = [256]byte{}
w.lastHeader = 0
w.lastHuffMan = false
}
@ -178,6 +177,11 @@ func (w *huffmanBitWriter) flush() {
w.nbits = 0
return
}
if w.lastHeader > 0 {
// We owe an EOB
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
}
n := w.nbytes
for w.nbits != 0 {
w.bytes[n] = byte(w.bits)
@ -350,6 +354,13 @@ func (w *huffmanBitWriter) headerSize() (size, numCodegens int) {
int(w.codegenFreq[18])*7, numCodegens
}
// dynamicSize returns the size of dynamically encoded data in bits.
func (w *huffmanBitWriter) dynamicReuseSize(litEnc, offEnc *huffmanEncoder) (size int) {
size = litEnc.bitLength(w.literalFreq[:]) +
offEnc.bitLength(w.offsetFreq[:])
return size
}
// dynamicSize returns the size of dynamically encoded data in bits.
func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) {
header, numCodegens := w.headerSize()
@ -452,30 +463,30 @@ func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, n
i := 0
for {
var codeWord int = int(w.codegen[i])
var codeWord = uint32(w.codegen[i])
i++
if codeWord == badCode {
break
}
w.writeCode(w.codegenEncoding.codes[uint32(codeWord)])
w.writeCode(w.codegenEncoding.codes[codeWord])
switch codeWord {
case 16:
w.writeBits(int32(w.codegen[i]), 2)
i++
break
case 17:
w.writeBits(int32(w.codegen[i]), 3)
i++
break
case 18:
w.writeBits(int32(w.codegen[i]), 7)
i++
break
}
}
}
// writeStoredHeader will write a stored header.
// If the stored block is only used for EOF,
// it is replaced with a fixed huffman block.
func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
if w.err != nil {
return
@ -485,6 +496,16 @@ func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
}
// To write EOF, use a fixed encoding block. 10 bits instead of 5 bytes.
if length == 0 && isEof {
w.writeFixedHeader(isEof)
// EOB: 7 bits, value: 0
w.writeBits(0, 7)
w.flush()
return
}
var flag int32
if isEof {
flag = 1
@ -591,8 +612,8 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b
tokens.AddEOB()
}
// We cannot reuse pure huffman table.
if w.lastHuffMan && w.lastHeader > 0 {
// We cannot reuse pure huffman table, and must mark as EOF.
if (w.lastHuffMan || eof) && w.lastHeader > 0 {
// We will not try to reuse.
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
@ -606,14 +627,14 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b
var size int
// Check if we should reuse.
if w.lastHeader > 0 {
// Estimate size for using a new table
// Estimate size for using a new table.
// Use the previous header size as the best estimate.
newSize := w.lastHeader + tokens.EstimatedBits()
newSize += newSize >> w.logNewTablePenalty
// The estimated size is calculated as an optimal table.
// We add a penalty to make it more realistic and re-use a bit more.
newSize += newSize >> (w.logReusePenalty & 31)
extra := w.extraBitSize()
reuseSize, _ := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extra)
reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + w.extraBitSize()
// Check if a new table is better.
if newSize < reuseSize {
@ -805,21 +826,30 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
}
// Add everything as literals
estBits := histogramSize(input, w.literalFreq[:], !eof && !sync) + 15
// We have to estimate the header size.
// Assume header is around 70 bytes:
// https://stackoverflow.com/a/25454430
const guessHeaderSizeBits = 70 * 8
estBits, estExtra := histogramSize(input, w.literalFreq[:], !eof && !sync)
estBits += w.lastHeader + 15
if w.lastHeader == 0 {
estBits += guessHeaderSizeBits
}
estBits += estBits >> w.logNewTablePenalty
// Store bytes, if we don't get a reasonable improvement.
ssize, storable := w.storedSize(input)
if storable && ssize < (estBits+estBits>>4) {
if storable && ssize < estBits {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
}
if w.lastHeader > 0 {
size, _ := w.dynamicSize(w.literalEncoding, huffOffset, w.lastHeader)
estBits += estBits >> (w.logReusePenalty)
reuseSize := w.literalEncoding.bitLength(w.literalFreq[:256])
estBits += estExtra
if estBits < size {
if estBits < reuseSize {
// We owe an EOB
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0

View file

@ -7,7 +7,6 @@ package flate
import (
"math"
"math/bits"
"sort"
)
const (
@ -25,8 +24,6 @@ type huffmanEncoder struct {
codes []hcode
freqcache []literalNode
bitCount [17]int32
lns byLiteral // stored to avoid repeated allocation in generate
lfs byFreq // stored to avoid repeated allocation in generate
}
type literalNode struct {
@ -85,17 +82,14 @@ func generateFixedLiteralEncoding() *huffmanEncoder {
// size 8, 000110000 .. 10111111
bits = ch + 48
size = 8
break
case ch < 256:
// size 9, 110010000 .. 111111111
bits = ch + 400 - 144
size = 9
break
case ch < 280:
// size 7, 0000000 .. 0010111
bits = ch - 256
size = 7
break
default:
// size 8, 11000000 .. 11000111
bits = ch + 192 - 280
@ -115,8 +109,8 @@ func generateFixedOffsetEncoding() *huffmanEncoder {
return h
}
var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
var fixedLiteralEncoding = generateFixedLiteralEncoding()
var fixedOffsetEncoding = generateFixedOffsetEncoding()
func (h *huffmanEncoder) bitLength(freq []uint16) int {
var total int
@ -273,7 +267,7 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN
// assigned in literal order (not frequency order).
chunk := list[len(list)-int(bits):]
h.lns.sort(chunk)
sortByLiteral(chunk)
for _, node := range chunk {
h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)}
code++
@ -318,7 +312,7 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
}
return
}
h.lfs.sort(list)
sortByFreq(list)
// Get the number of literals for each bit count
bitCount := h.bitCounts(list, maxBits)
@ -326,59 +320,44 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
h.assignEncodingAndSize(bitCount, list)
}
type byLiteral []literalNode
func (s *byLiteral) sort(a []literalNode) {
*s = byLiteral(a)
sort.Sort(s)
}
func (s byLiteral) Len() int { return len(s) }
func (s byLiteral) Less(i, j int) bool {
return s[i].literal < s[j].literal
}
func (s byLiteral) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
type byFreq []literalNode
func (s *byFreq) sort(a []literalNode) {
*s = byFreq(a)
sort.Sort(s)
}
func (s byFreq) Len() int { return len(s) }
func (s byFreq) Less(i, j int) bool {
if s[i].freq == s[j].freq {
return s[i].literal < s[j].literal
func atLeastOne(v float32) float32 {
if v < 1 {
return 1
}
return s[i].freq < s[j].freq
return v
}
func (s byFreq) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
// histogramSize accumulates a histogram of b in h.
// An estimated size in bits is returned.
// Unassigned values are assigned '1' in the histogram.
// len(h) must be >= 256, and h's elements must be all zeroes.
func histogramSize(b []byte, h []uint16, fill bool) int {
func histogramSize(b []byte, h []uint16, fill bool) (int, int) {
h = h[:256]
for _, t := range b {
h[t]++
}
invTotal := 1.0 / float64(len(b))
shannon := 0.0
single := math.Ceil(-math.Log2(invTotal))
for i, v := range h[:] {
if v > 0 {
n := float64(v)
shannon += math.Ceil(-math.Log2(n*invTotal) * n)
} else if fill {
shannon += single
h[i] = 1
invTotal := 1.0 / float32(len(b))
shannon := float32(0.0)
var extra float32
if fill {
oneBits := atLeastOne(-mFastLog2(invTotal))
for i, v := range h[:] {
if v > 0 {
n := float32(v)
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
} else {
h[i] = 1
extra += oneBits
}
}
} else {
for _, v := range h[:] {
if v > 0 {
n := float32(v)
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
}
}
}
return int(shannon + 0.99)
return int(shannon + 0.99), int(extra + 0.99)
}

View file

@ -0,0 +1,178 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
// Sort sorts data.
// It makes one call to data.Len to determine n, and O(n*log(n)) calls to
// data.Less and data.Swap. The sort is not guaranteed to be stable.
func sortByFreq(data []literalNode) {
n := len(data)
quickSortByFreq(data, 0, n, maxDepth(n))
}
func quickSortByFreq(data []literalNode, a, b, maxDepth int) {
for b-a > 12 { // Use ShellSort for slices <= 12 elements
if maxDepth == 0 {
heapSort(data, a, b)
return
}
maxDepth--
mlo, mhi := doPivotByFreq(data, a, b)
// Avoiding recursion on the larger subproblem guarantees
// a stack depth of at most lg(b-a).
if mlo-a < b-mhi {
quickSortByFreq(data, a, mlo, maxDepth)
a = mhi // i.e., quickSortByFreq(data, mhi, b)
} else {
quickSortByFreq(data, mhi, b, maxDepth)
b = mlo // i.e., quickSortByFreq(data, a, mlo)
}
}
if b-a > 1 {
// Do ShellSort pass with gap 6
// It could be written in this simplified form cause b-a <= 12
for i := a + 6; i < b; i++ {
if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq {
data[i], data[i-6] = data[i-6], data[i]
}
}
insertionSortByFreq(data, a, b)
}
}
// siftDownByFreq implements the heap property on data[lo, hi).
// first is an offset into the array where the root of the heap lies.
func siftDownByFreq(data []literalNode, lo, hi, first int) {
root := lo
for {
child := 2*root + 1
if child >= hi {
break
}
if child+1 < hi && (data[first+child].freq == data[first+child+1].freq && data[first+child].literal < data[first+child+1].literal || data[first+child].freq < data[first+child+1].freq) {
child++
}
if data[first+root].freq == data[first+child].freq && data[first+root].literal > data[first+child].literal || data[first+root].freq > data[first+child].freq {
return
}
data[first+root], data[first+child] = data[first+child], data[first+root]
root = child
}
}
func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) {
m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
if hi-lo > 40 {
// Tukey's ``Ninther,'' median of three medians of three.
s := (hi - lo) / 8
medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s)
medianOfThreeSortByFreq(data, m, m-s, m+s)
medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s)
}
medianOfThreeSortByFreq(data, lo, m, hi-1)
// Invariants are:
// data[lo] = pivot (set up by ChoosePivot)
// data[lo < i < a] < pivot
// data[a <= i < b] <= pivot
// data[b <= i < c] unexamined
// data[c <= i < hi-1] > pivot
// data[hi-1] >= pivot
pivot := lo
a, c := lo+1, hi-1
for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ {
}
b := a
for {
for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot
}
for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot
}
if b >= c {
break
}
// data[b] > pivot; data[c-1] <= pivot
data[b], data[c-1] = data[c-1], data[b]
b++
c--
}
// If hi-c<3 then there are duplicates (by property of median of nine).
// Let's be a bit more conservative, and set border to 5.
protect := hi-c < 5
if !protect && hi-c < (hi-lo)/4 {
// Lets test some points for equality to pivot
dups := 0
if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot
data[c], data[hi-1] = data[hi-1], data[c]
c++
dups++
}
if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot
b--
dups++
}
// m-lo = (hi-lo)/2 > 6
// b-lo > (hi-lo)*3/4-1 > 8
// ==> m < b ==> data[m] <= pivot
if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot
data[m], data[b-1] = data[b-1], data[m]
b--
dups++
}
// if at least 2 points are equal to pivot, assume skewed distribution
protect = dups > 1
}
if protect {
// Protect against a lot of duplicates
// Add invariant:
// data[a <= i < b] unexamined
// data[b <= i < c] = pivot
for {
for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot
}
for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot
}
if a >= b {
break
}
// data[a] == pivot; data[b-1] < pivot
data[a], data[b-1] = data[b-1], data[a]
a++
b--
}
}
// Swap pivot into middle
data[pivot], data[b-1] = data[b-1], data[pivot]
return b - 1, c
}
// Insertion sort
func insertionSortByFreq(data []literalNode, a, b int) {
for i := a + 1; i < b; i++ {
for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- {
data[j], data[j-1] = data[j-1], data[j]
}
}
}
// quickSortByFreq, loosely following Bentley and McIlroy,
// ``Engineering a Sort Function,'' SP&E November 1993.
// medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) {
// sort 3 elements
if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq {
data[m1], data[m0] = data[m0], data[m1]
}
// data[m0] <= data[m1]
if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq {
data[m2], data[m1] = data[m1], data[m2]
// data[m0] <= data[m2] && data[m1] < data[m2]
if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq {
data[m1], data[m0] = data[m0], data[m1]
}
}
// now data[m0] <= data[m1] <= data[m2]
}

View file

@ -0,0 +1,201 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
// Sort sorts data.
// It makes one call to data.Len to determine n, and O(n*log(n)) calls to
// data.Less and data.Swap. The sort is not guaranteed to be stable.
func sortByLiteral(data []literalNode) {
n := len(data)
quickSort(data, 0, n, maxDepth(n))
}
func quickSort(data []literalNode, a, b, maxDepth int) {
for b-a > 12 { // Use ShellSort for slices <= 12 elements
if maxDepth == 0 {
heapSort(data, a, b)
return
}
maxDepth--
mlo, mhi := doPivot(data, a, b)
// Avoiding recursion on the larger subproblem guarantees
// a stack depth of at most lg(b-a).
if mlo-a < b-mhi {
quickSort(data, a, mlo, maxDepth)
a = mhi // i.e., quickSort(data, mhi, b)
} else {
quickSort(data, mhi, b, maxDepth)
b = mlo // i.e., quickSort(data, a, mlo)
}
}
if b-a > 1 {
// Do ShellSort pass with gap 6
// It could be written in this simplified form cause b-a <= 12
for i := a + 6; i < b; i++ {
if data[i].literal < data[i-6].literal {
data[i], data[i-6] = data[i-6], data[i]
}
}
insertionSort(data, a, b)
}
}
func heapSort(data []literalNode, a, b int) {
first := a
lo := 0
hi := b - a
// Build heap with greatest element at top.
for i := (hi - 1) / 2; i >= 0; i-- {
siftDown(data, i, hi, first)
}
// Pop elements, largest first, into end of data.
for i := hi - 1; i >= 0; i-- {
data[first], data[first+i] = data[first+i], data[first]
siftDown(data, lo, i, first)
}
}
// siftDown implements the heap property on data[lo, hi).
// first is an offset into the array where the root of the heap lies.
func siftDown(data []literalNode, lo, hi, first int) {
root := lo
for {
child := 2*root + 1
if child >= hi {
break
}
if child+1 < hi && data[first+child].literal < data[first+child+1].literal {
child++
}
if data[first+root].literal > data[first+child].literal {
return
}
data[first+root], data[first+child] = data[first+child], data[first+root]
root = child
}
}
func doPivot(data []literalNode, lo, hi int) (midlo, midhi int) {
m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
if hi-lo > 40 {
// Tukey's ``Ninther,'' median of three medians of three.
s := (hi - lo) / 8
medianOfThree(data, lo, lo+s, lo+2*s)
medianOfThree(data, m, m-s, m+s)
medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
}
medianOfThree(data, lo, m, hi-1)
// Invariants are:
// data[lo] = pivot (set up by ChoosePivot)
// data[lo < i < a] < pivot
// data[a <= i < b] <= pivot
// data[b <= i < c] unexamined
// data[c <= i < hi-1] > pivot
// data[hi-1] >= pivot
pivot := lo
a, c := lo+1, hi-1
for ; a < c && data[a].literal < data[pivot].literal; a++ {
}
b := a
for {
for ; b < c && data[pivot].literal > data[b].literal; b++ { // data[b] <= pivot
}
for ; b < c && data[pivot].literal < data[c-1].literal; c-- { // data[c-1] > pivot
}
if b >= c {
break
}
// data[b] > pivot; data[c-1] <= pivot
data[b], data[c-1] = data[c-1], data[b]
b++
c--
}
// If hi-c<3 then there are duplicates (by property of median of nine).
// Let's be a bit more conservative, and set border to 5.
protect := hi-c < 5
if !protect && hi-c < (hi-lo)/4 {
// Lets test some points for equality to pivot
dups := 0
if data[pivot].literal > data[hi-1].literal { // data[hi-1] = pivot
data[c], data[hi-1] = data[hi-1], data[c]
c++
dups++
}
if data[b-1].literal > data[pivot].literal { // data[b-1] = pivot
b--
dups++
}
// m-lo = (hi-lo)/2 > 6
// b-lo > (hi-lo)*3/4-1 > 8
// ==> m < b ==> data[m] <= pivot
if data[m].literal > data[pivot].literal { // data[m] = pivot
data[m], data[b-1] = data[b-1], data[m]
b--
dups++
}
// if at least 2 points are equal to pivot, assume skewed distribution
protect = dups > 1
}
if protect {
// Protect against a lot of duplicates
// Add invariant:
// data[a <= i < b] unexamined
// data[b <= i < c] = pivot
for {
for ; a < b && data[b-1].literal > data[pivot].literal; b-- { // data[b] == pivot
}
for ; a < b && data[a].literal < data[pivot].literal; a++ { // data[a] < pivot
}
if a >= b {
break
}
// data[a] == pivot; data[b-1] < pivot
data[a], data[b-1] = data[b-1], data[a]
a++
b--
}
}
// Swap pivot into middle
data[pivot], data[b-1] = data[b-1], data[pivot]
return b - 1, c
}
// Insertion sort
func insertionSort(data []literalNode, a, b int) {
for i := a + 1; i < b; i++ {
for j := i; j > a && data[j].literal < data[j-1].literal; j-- {
data[j], data[j-1] = data[j-1], data[j]
}
}
}
// maxDepth returns a threshold at which quicksort should switch
// to heapsort. It returns 2*ceil(lg(n+1)).
func maxDepth(n int) int {
var depth int
for i := n; i > 0; i >>= 1 {
depth++
}
return depth * 2
}
// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
func medianOfThree(data []literalNode, m1, m0, m2 int) {
// sort 3 elements
if data[m1].literal < data[m0].literal {
data[m1], data[m0] = data[m0], data[m1]
}
// data[m0] <= data[m1]
if data[m2].literal < data[m1].literal {
data[m2], data[m1] = data[m1], data[m2]
// data[m0] <= data[m2] && data[m1] < data[m2]
if data[m1].literal < data[m0].literal {
data[m1], data[m0] = data[m0], data[m1]
}
}
// now data[m0] <= data[m1] <= data[m2]
}

View file

@ -106,7 +106,7 @@ const (
)
type huffmanDecoder struct {
min int // the minimum code length
maxRead int // the maximum number of bits we can read and not overread
chunks *[huffmanNumChunks]uint16 // chunks as described above
links [][]uint16 // overflow links
linkMask uint32 // mask the width of the link table
@ -126,12 +126,12 @@ func (h *huffmanDecoder) init(lengths []int) bool {
if h.chunks == nil {
h.chunks = &[huffmanNumChunks]uint16{}
}
if h.min != 0 {
if h.maxRead != 0 {
*h = huffmanDecoder{chunks: h.chunks, links: h.links}
}
// Count number of codes of each length,
// compute min and max length.
// compute maxRead and max length.
var count [maxCodeLen]int
var min, max int
for _, n := range lengths {
@ -178,7 +178,7 @@ func (h *huffmanDecoder) init(lengths []int) bool {
return false
}
h.min = min
h.maxRead = min
chunks := h.chunks[:]
for i := range chunks {
chunks[i] = 0
@ -342,7 +342,7 @@ func (f *decompressor) nextBlock() {
// compressed, fixed Huffman tables
f.hl = &fixedHuffmanDecoder
f.hd = nil
f.huffmanBlock()
f.huffmanBlockDecoder()()
case 2:
// compressed, dynamic Huffman tables
if f.err = f.readHuffman(); f.err != nil {
@ -350,7 +350,7 @@ func (f *decompressor) nextBlock() {
}
f.hl = &f.h1
f.hd = &f.h2
f.huffmanBlock()
f.huffmanBlockDecoder()()
default:
// 3 is reserved.
if debugDecode {
@ -543,12 +543,18 @@ func (f *decompressor) readHuffman() error {
return CorruptInputError(f.roffset)
}
// As an optimization, we can initialize the min bits to read at a time
// As an optimization, we can initialize the maxRead bits to read at a time
// for the HLIT tree to the length of the EOB marker since we know that
// every block must terminate with one. This preserves the property that
// we never read any extra bytes after the end of the DEFLATE stream.
if f.h1.min < f.bits[endBlockMarker] {
f.h1.min = f.bits[endBlockMarker]
if f.h1.maxRead < f.bits[endBlockMarker] {
f.h1.maxRead = f.bits[endBlockMarker]
}
if !f.final {
// If not the final block, the smallest block possible is
// a predefined table, BTYPE=01, with a single EOB marker.
// This will take up 3 + 7 bits.
f.h1.maxRead += 10
}
return nil
@ -558,7 +564,7 @@ func (f *decompressor) readHuffman() error {
// hl and hd are the Huffman states for the lit/length values
// and the distance values, respectively. If hd == nil, using the
// fixed distance encoding associated with fixed Huffman blocks.
func (f *decompressor) huffmanBlock() {
func (f *decompressor) huffmanBlockGeneric() {
const (
stateInit = iota // Zero value must be stateInit
stateDict
@ -574,19 +580,64 @@ func (f *decompressor) huffmanBlock() {
readLiteral:
// Read literal and/or (length, distance) according to RFC section 3.2.3.
{
v, err := f.huffSym(f.hl)
if err != nil {
f.err = err
return
var v int
{
// Inlined v, err := f.huffSym(f.hl)
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
// with single element, huffSym must error on these two edge cases. In both
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(f.hl.maxRead)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
nb, b := f.nb, f.b
for {
for nb < n {
c, err := f.r.ReadByte()
if err != nil {
f.b = b
f.nb = nb
f.err = noEOF(err)
return
}
f.roffset++
b |= uint32(c) << (nb & 31)
nb += 8
}
chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
n = uint(chunk & huffmanCountMask)
}
if n <= nb {
if n == 0 {
f.b = b
f.nb = nb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
f.b = b >> (n & 31)
f.nb = nb - n
v = int(chunk >> huffmanValueShift)
break
}
}
}
var n uint // number of bits extra
var length int
var err error
switch {
case v < 256:
f.dict.writeByte(byte(v))
if f.dict.availWrite() == 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBlock
f.step = (*decompressor).huffmanBlockGeneric
f.stepState = stateInit
return
}
@ -714,7 +765,7 @@ copyHistory:
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBlock // We need to continue this work
f.step = (*decompressor).huffmanBlockGeneric // We need to continue this work
f.stepState = stateDict
return
}
@ -726,21 +777,33 @@ copyHistory:
func (f *decompressor) dataBlock() {
// Uncompressed.
// Discard current half-byte.
f.nb = 0
f.b = 0
left := (f.nb) & 7
f.nb -= left
f.b >>= left
offBytes := f.nb >> 3
// Unfilled values will be overwritten.
f.buf[0] = uint8(f.b)
f.buf[1] = uint8(f.b >> 8)
f.buf[2] = uint8(f.b >> 16)
f.buf[3] = uint8(f.b >> 24)
f.roffset += int64(offBytes)
f.nb, f.b = 0, 0
// Length then ones-complement of length.
nr, err := io.ReadFull(f.r, f.buf[0:4])
nr, err := io.ReadFull(f.r, f.buf[offBytes:4])
f.roffset += int64(nr)
if err != nil {
f.err = noEOF(err)
return
}
n := int(f.buf[0]) | int(f.buf[1])<<8
nn := int(f.buf[2]) | int(f.buf[3])<<8
if uint16(nn) != uint16(^n) {
n := uint16(f.buf[0]) | uint16(f.buf[1])<<8
nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8
if nn != ^n {
if debugDecode {
fmt.Println("uint16(nn) != uint16(^n)", nn, ^n)
ncomp := ^n
fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp)
}
f.err = CorruptInputError(f.roffset)
return
@ -752,7 +815,7 @@ func (f *decompressor) dataBlock() {
return
}
f.copyLen = n
f.copyLen = int(n)
f.copyData()
}
@ -816,7 +879,7 @@ func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
// with single element, huffSym must error on these two edge cases. In both
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(h.min)
n := uint(h.maxRead)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.

View file

@ -0,0 +1,922 @@
// Code generated by go generate gen_inflate.go. DO NOT EDIT.
package flate
import (
"bufio"
"bytes"
"fmt"
"math/bits"
"strings"
)
// Decode a single Huffman block from f.
// hl and hd are the Huffman states for the lit/length values
// and the distance values, respectively. If hd == nil, using the
// fixed distance encoding associated with fixed Huffman blocks.
func (f *decompressor) huffmanBytesBuffer() {
const (
stateInit = iota // Zero value must be stateInit
stateDict
)
fr := f.r.(*bytes.Buffer)
moreBits := func() error {
c, err := fr.ReadByte()
if err != nil {
return noEOF(err)
}
f.roffset++
f.b |= uint32(c) << f.nb
f.nb += 8
return nil
}
switch f.stepState {
case stateInit:
goto readLiteral
case stateDict:
goto copyHistory
}
readLiteral:
// Read literal and/or (length, distance) according to RFC section 3.2.3.
{
var v int
{
// Inlined v, err := f.huffSym(f.hl)
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
// with single element, huffSym must error on these two edge cases. In both
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(f.hl.maxRead)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
nb, b := f.nb, f.b
for {
for nb < n {
c, err := fr.ReadByte()
if err != nil {
f.b = b
f.nb = nb
f.err = noEOF(err)
return
}
f.roffset++
b |= uint32(c) << (nb & 31)
nb += 8
}
chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
n = uint(chunk & huffmanCountMask)
}
if n <= nb {
if n == 0 {
f.b = b
f.nb = nb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
f.b = b >> (n & 31)
f.nb = nb - n
v = int(chunk >> huffmanValueShift)
break
}
}
}
var n uint // number of bits extra
var length int
var err error
switch {
case v < 256:
f.dict.writeByte(byte(v))
if f.dict.availWrite() == 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBytesBuffer
f.stepState = stateInit
return
}
goto readLiteral
case v == 256:
f.finishBlock()
return
// otherwise, reference to older data
case v < 265:
length = v - (257 - 3)
n = 0
case v < 269:
length = v*2 - (265*2 - 11)
n = 1
case v < 273:
length = v*4 - (269*4 - 19)
n = 2
case v < 277:
length = v*8 - (273*8 - 35)
n = 3
case v < 281:
length = v*16 - (277*16 - 67)
n = 4
case v < 285:
length = v*32 - (281*32 - 131)
n = 5
case v < maxNumLit:
length = 258
n = 0
default:
if debugDecode {
fmt.Println(v, ">= maxNumLit")
}
f.err = CorruptInputError(f.roffset)
return
}
if n > 0 {
for f.nb < n {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits n>0:", err)
}
f.err = err
return
}
}
length += int(f.b & uint32(1<<n-1))
f.b >>= n
f.nb -= n
}
var dist int
if f.hd == nil {
for f.nb < 5 {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<5:", err)
}
f.err = err
return
}
}
dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
f.b >>= 5
f.nb -= 5
} else {
if dist, err = f.huffSym(f.hd); err != nil {
if debugDecode {
fmt.Println("huffsym:", err)
}
f.err = err
return
}
}
switch {
case dist < 4:
dist++
case dist < maxNumDist:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << nb
for f.nb < nb {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<nb:", err)
}
f.err = err
return
}
}
extra |= int(f.b & uint32(1<<nb-1))
f.b >>= nb
f.nb -= nb
dist = 1<<(nb+1) + 1 + extra
default:
if debugDecode {
fmt.Println("dist too big:", dist, maxNumDist)
}
f.err = CorruptInputError(f.roffset)
return
}
// No check on length; encoding can be prescient.
if dist > f.dict.histSize() {
if debugDecode {
fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
}
f.err = CorruptInputError(f.roffset)
return
}
f.copyLen, f.copyDist = length, dist
goto copyHistory
}
copyHistory:
// Perform a backwards copy according to RFC section 3.2.3.
{
cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
if cnt == 0 {
cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
}
f.copyLen -= cnt
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work
f.stepState = stateDict
return
}
goto readLiteral
}
}
// Decode a single Huffman block from f.
// hl and hd are the Huffman states for the lit/length values
// and the distance values, respectively. If hd == nil, using the
// fixed distance encoding associated with fixed Huffman blocks.
func (f *decompressor) huffmanBytesReader() {
const (
stateInit = iota // Zero value must be stateInit
stateDict
)
fr := f.r.(*bytes.Reader)
moreBits := func() error {
c, err := fr.ReadByte()
if err != nil {
return noEOF(err)
}
f.roffset++
f.b |= uint32(c) << f.nb
f.nb += 8
return nil
}
switch f.stepState {
case stateInit:
goto readLiteral
case stateDict:
goto copyHistory
}
readLiteral:
// Read literal and/or (length, distance) according to RFC section 3.2.3.
{
var v int
{
// Inlined v, err := f.huffSym(f.hl)
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
// with single element, huffSym must error on these two edge cases. In both
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(f.hl.maxRead)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
nb, b := f.nb, f.b
for {
for nb < n {
c, err := fr.ReadByte()
if err != nil {
f.b = b
f.nb = nb
f.err = noEOF(err)
return
}
f.roffset++
b |= uint32(c) << (nb & 31)
nb += 8
}
chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
n = uint(chunk & huffmanCountMask)
}
if n <= nb {
if n == 0 {
f.b = b
f.nb = nb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
f.b = b >> (n & 31)
f.nb = nb - n
v = int(chunk >> huffmanValueShift)
break
}
}
}
var n uint // number of bits extra
var length int
var err error
switch {
case v < 256:
f.dict.writeByte(byte(v))
if f.dict.availWrite() == 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBytesReader
f.stepState = stateInit
return
}
goto readLiteral
case v == 256:
f.finishBlock()
return
// otherwise, reference to older data
case v < 265:
length = v - (257 - 3)
n = 0
case v < 269:
length = v*2 - (265*2 - 11)
n = 1
case v < 273:
length = v*4 - (269*4 - 19)
n = 2
case v < 277:
length = v*8 - (273*8 - 35)
n = 3
case v < 281:
length = v*16 - (277*16 - 67)
n = 4
case v < 285:
length = v*32 - (281*32 - 131)
n = 5
case v < maxNumLit:
length = 258
n = 0
default:
if debugDecode {
fmt.Println(v, ">= maxNumLit")
}
f.err = CorruptInputError(f.roffset)
return
}
if n > 0 {
for f.nb < n {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits n>0:", err)
}
f.err = err
return
}
}
length += int(f.b & uint32(1<<n-1))
f.b >>= n
f.nb -= n
}
var dist int
if f.hd == nil {
for f.nb < 5 {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<5:", err)
}
f.err = err
return
}
}
dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
f.b >>= 5
f.nb -= 5
} else {
if dist, err = f.huffSym(f.hd); err != nil {
if debugDecode {
fmt.Println("huffsym:", err)
}
f.err = err
return
}
}
switch {
case dist < 4:
dist++
case dist < maxNumDist:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << nb
for f.nb < nb {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<nb:", err)
}
f.err = err
return
}
}
extra |= int(f.b & uint32(1<<nb-1))
f.b >>= nb
f.nb -= nb
dist = 1<<(nb+1) + 1 + extra
default:
if debugDecode {
fmt.Println("dist too big:", dist, maxNumDist)
}
f.err = CorruptInputError(f.roffset)
return
}
// No check on length; encoding can be prescient.
if dist > f.dict.histSize() {
if debugDecode {
fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
}
f.err = CorruptInputError(f.roffset)
return
}
f.copyLen, f.copyDist = length, dist
goto copyHistory
}
copyHistory:
// Perform a backwards copy according to RFC section 3.2.3.
{
cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
if cnt == 0 {
cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
}
f.copyLen -= cnt
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBytesReader // We need to continue this work
f.stepState = stateDict
return
}
goto readLiteral
}
}
// Decode a single Huffman block from f.
// hl and hd are the Huffman states for the lit/length values
// and the distance values, respectively. If hd == nil, using the
// fixed distance encoding associated with fixed Huffman blocks.
func (f *decompressor) huffmanBufioReader() {
const (
stateInit = iota // Zero value must be stateInit
stateDict
)
fr := f.r.(*bufio.Reader)
moreBits := func() error {
c, err := fr.ReadByte()
if err != nil {
return noEOF(err)
}
f.roffset++
f.b |= uint32(c) << f.nb
f.nb += 8
return nil
}
switch f.stepState {
case stateInit:
goto readLiteral
case stateDict:
goto copyHistory
}
readLiteral:
// Read literal and/or (length, distance) according to RFC section 3.2.3.
{
var v int
{
// Inlined v, err := f.huffSym(f.hl)
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
// with single element, huffSym must error on these two edge cases. In both
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(f.hl.maxRead)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
nb, b := f.nb, f.b
for {
for nb < n {
c, err := fr.ReadByte()
if err != nil {
f.b = b
f.nb = nb
f.err = noEOF(err)
return
}
f.roffset++
b |= uint32(c) << (nb & 31)
nb += 8
}
chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
n = uint(chunk & huffmanCountMask)
}
if n <= nb {
if n == 0 {
f.b = b
f.nb = nb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
f.b = b >> (n & 31)
f.nb = nb - n
v = int(chunk >> huffmanValueShift)
break
}
}
}
var n uint // number of bits extra
var length int
var err error
switch {
case v < 256:
f.dict.writeByte(byte(v))
if f.dict.availWrite() == 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBufioReader
f.stepState = stateInit
return
}
goto readLiteral
case v == 256:
f.finishBlock()
return
// otherwise, reference to older data
case v < 265:
length = v - (257 - 3)
n = 0
case v < 269:
length = v*2 - (265*2 - 11)
n = 1
case v < 273:
length = v*4 - (269*4 - 19)
n = 2
case v < 277:
length = v*8 - (273*8 - 35)
n = 3
case v < 281:
length = v*16 - (277*16 - 67)
n = 4
case v < 285:
length = v*32 - (281*32 - 131)
n = 5
case v < maxNumLit:
length = 258
n = 0
default:
if debugDecode {
fmt.Println(v, ">= maxNumLit")
}
f.err = CorruptInputError(f.roffset)
return
}
if n > 0 {
for f.nb < n {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits n>0:", err)
}
f.err = err
return
}
}
length += int(f.b & uint32(1<<n-1))
f.b >>= n
f.nb -= n
}
var dist int
if f.hd == nil {
for f.nb < 5 {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<5:", err)
}
f.err = err
return
}
}
dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
f.b >>= 5
f.nb -= 5
} else {
if dist, err = f.huffSym(f.hd); err != nil {
if debugDecode {
fmt.Println("huffsym:", err)
}
f.err = err
return
}
}
switch {
case dist < 4:
dist++
case dist < maxNumDist:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << nb
for f.nb < nb {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<nb:", err)
}
f.err = err
return
}
}
extra |= int(f.b & uint32(1<<nb-1))
f.b >>= nb
f.nb -= nb
dist = 1<<(nb+1) + 1 + extra
default:
if debugDecode {
fmt.Println("dist too big:", dist, maxNumDist)
}
f.err = CorruptInputError(f.roffset)
return
}
// No check on length; encoding can be prescient.
if dist > f.dict.histSize() {
if debugDecode {
fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
}
f.err = CorruptInputError(f.roffset)
return
}
f.copyLen, f.copyDist = length, dist
goto copyHistory
}
copyHistory:
// Perform a backwards copy according to RFC section 3.2.3.
{
cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
if cnt == 0 {
cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
}
f.copyLen -= cnt
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBufioReader // We need to continue this work
f.stepState = stateDict
return
}
goto readLiteral
}
}
// Decode a single Huffman block from f.
// hl and hd are the Huffman states for the lit/length values
// and the distance values, respectively. If hd == nil, using the
// fixed distance encoding associated with fixed Huffman blocks.
func (f *decompressor) huffmanStringsReader() {
const (
stateInit = iota // Zero value must be stateInit
stateDict
)
fr := f.r.(*strings.Reader)
moreBits := func() error {
c, err := fr.ReadByte()
if err != nil {
return noEOF(err)
}
f.roffset++
f.b |= uint32(c) << f.nb
f.nb += 8
return nil
}
switch f.stepState {
case stateInit:
goto readLiteral
case stateDict:
goto copyHistory
}
readLiteral:
// Read literal and/or (length, distance) according to RFC section 3.2.3.
{
var v int
{
// Inlined v, err := f.huffSym(f.hl)
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
// with single element, huffSym must error on these two edge cases. In both
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(f.hl.maxRead)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
nb, b := f.nb, f.b
for {
for nb < n {
c, err := fr.ReadByte()
if err != nil {
f.b = b
f.nb = nb
f.err = noEOF(err)
return
}
f.roffset++
b |= uint32(c) << (nb & 31)
nb += 8
}
chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
n = uint(chunk & huffmanCountMask)
}
if n <= nb {
if n == 0 {
f.b = b
f.nb = nb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
f.b = b >> (n & 31)
f.nb = nb - n
v = int(chunk >> huffmanValueShift)
break
}
}
}
var n uint // number of bits extra
var length int
var err error
switch {
case v < 256:
f.dict.writeByte(byte(v))
if f.dict.availWrite() == 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanStringsReader
f.stepState = stateInit
return
}
goto readLiteral
case v == 256:
f.finishBlock()
return
// otherwise, reference to older data
case v < 265:
length = v - (257 - 3)
n = 0
case v < 269:
length = v*2 - (265*2 - 11)
n = 1
case v < 273:
length = v*4 - (269*4 - 19)
n = 2
case v < 277:
length = v*8 - (273*8 - 35)
n = 3
case v < 281:
length = v*16 - (277*16 - 67)
n = 4
case v < 285:
length = v*32 - (281*32 - 131)
n = 5
case v < maxNumLit:
length = 258
n = 0
default:
if debugDecode {
fmt.Println(v, ">= maxNumLit")
}
f.err = CorruptInputError(f.roffset)
return
}
if n > 0 {
for f.nb < n {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits n>0:", err)
}
f.err = err
return
}
}
length += int(f.b & uint32(1<<n-1))
f.b >>= n
f.nb -= n
}
var dist int
if f.hd == nil {
for f.nb < 5 {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<5:", err)
}
f.err = err
return
}
}
dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
f.b >>= 5
f.nb -= 5
} else {
if dist, err = f.huffSym(f.hd); err != nil {
if debugDecode {
fmt.Println("huffsym:", err)
}
f.err = err
return
}
}
switch {
case dist < 4:
dist++
case dist < maxNumDist:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << nb
for f.nb < nb {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<nb:", err)
}
f.err = err
return
}
}
extra |= int(f.b & uint32(1<<nb-1))
f.b >>= nb
f.nb -= nb
dist = 1<<(nb+1) + 1 + extra
default:
if debugDecode {
fmt.Println("dist too big:", dist, maxNumDist)
}
f.err = CorruptInputError(f.roffset)
return
}
// No check on length; encoding can be prescient.
if dist > f.dict.histSize() {
if debugDecode {
fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
}
f.err = CorruptInputError(f.roffset)
return
}
f.copyLen, f.copyDist = length, dist
goto copyHistory
}
copyHistory:
// Perform a backwards copy according to RFC section 3.2.3.
{
cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
if cnt == 0 {
cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
}
f.copyLen -= cnt
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanStringsReader // We need to continue this work
f.stepState = stateDict
return
}
goto readLiteral
}
}
func (f *decompressor) huffmanBlockDecoder() func() {
switch f.r.(type) {
case *bytes.Buffer:
return f.huffmanBytesBuffer
case *bytes.Reader:
return f.huffmanBytesReader
case *bufio.Reader:
return f.huffmanBufioReader
case *strings.Reader:
return f.huffmanStringsReader
default:
return f.huffmanBlockGeneric
}
}

View file

@ -1,5 +1,7 @@
package flate
import "fmt"
// fastGen maintains the table for matches,
// and the previous byte block for level 2.
// This is the generic implementation.
@ -14,6 +16,9 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
@ -76,12 +81,12 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
}
now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
e.table[nextHash] = tableEntry{offset: s + e.cur}
nextHash = hash(uint32(now))
offset := s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == candidate.val {
e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)}
if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}
@ -91,11 +96,11 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
nextS++
candidate = e.table[nextHash]
now >>= 8
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
e.table[nextHash] = tableEntry{offset: s + e.cur}
offset = s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == candidate.val {
e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)}
if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}
cv = uint32(now)
@ -134,7 +139,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
// Index first pair after match end.
if int(s+l+4) < len(src) {
cv := load3232(src, s)
e.table[hash(cv)] = tableEntry{offset: s + e.cur, val: cv}
e.table[hash(cv)] = tableEntry{offset: s + e.cur}
}
goto emitRemainder
}
@ -148,14 +153,14 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
x := load6432(src, s-2)
o := e.cur + s - 2
prevHash := hash(uint32(x))
e.table[prevHash] = tableEntry{offset: o, val: uint32(x)}
e.table[prevHash] = tableEntry{offset: o}
x >>= 16
currHash := hash(uint32(x))
candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x)}
e.table[currHash] = tableEntry{offset: o + 2}
offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x) != candidate.val {
if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) {
cv = uint32(x >> 8)
s++
break

View file

@ -1,5 +1,7 @@
package flate
import "fmt"
// fastGen maintains the table for matches,
// and the previous byte block for level 2.
// This is the generic implementation.
@ -16,6 +18,10 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
@ -77,12 +83,12 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
}
candidate = e.table[nextHash]
now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
e.table[nextHash] = tableEntry{offset: s + e.cur}
nextHash = hash4u(uint32(now), bTableBits)
offset := s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == candidate.val {
e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)}
if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}
@ -92,10 +98,10 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
nextS++
candidate = e.table[nextHash]
now >>= 8
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
e.table[nextHash] = tableEntry{offset: s + e.cur}
offset = s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == candidate.val {
if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
break
}
cv = uint32(now)
@ -142,7 +148,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
// Index first pair after match end.
if int(s+l+4) < len(src) {
cv := load3232(src, s)
e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur, val: cv}
e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur}
}
goto emitRemainder
}
@ -151,15 +157,15 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
for i := s - l + 2; i < s-5; i += 7 {
x := load6432(src, int32(i))
nextHash := hash4u(uint32(x), bTableBits)
e.table[nextHash] = tableEntry{offset: e.cur + i, val: uint32(x)}
e.table[nextHash] = tableEntry{offset: e.cur + i}
// Skip one
x >>= 16
nextHash = hash4u(uint32(x), bTableBits)
e.table[nextHash] = tableEntry{offset: e.cur + i + 2, val: uint32(x)}
e.table[nextHash] = tableEntry{offset: e.cur + i + 2}
// Skip one
x >>= 16
nextHash = hash4u(uint32(x), bTableBits)
e.table[nextHash] = tableEntry{offset: e.cur + i + 4, val: uint32(x)}
e.table[nextHash] = tableEntry{offset: e.cur + i + 4}
}
// We could immediately start working at s now, but to improve
@ -172,14 +178,14 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
o := e.cur + s - 2
prevHash := hash4u(uint32(x), bTableBits)
prevHash2 := hash4u(uint32(x>>8), bTableBits)
e.table[prevHash] = tableEntry{offset: o, val: uint32(x)}
e.table[prevHash2] = tableEntry{offset: o + 1, val: uint32(x >> 8)}
e.table[prevHash] = tableEntry{offset: o}
e.table[prevHash2] = tableEntry{offset: o + 1}
currHash := hash4u(uint32(x>>16), bTableBits)
candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x >> 16)}
e.table[currHash] = tableEntry{offset: o + 2}
offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x>>16) != candidate.val {
if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) {
cv = uint32(x >> 24)
s++
break

View file

@ -1,5 +1,7 @@
package flate
import "fmt"
// fastEncL3
type fastEncL3 struct {
fastGen
@ -13,6 +15,10 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
@ -75,22 +81,26 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
}
candidates := e.table[nextHash]
now := load3232(src, nextS)
e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}}
// Safe offset distance until s + 4...
minOffset := e.cur + s - (maxMatchOffset - 4)
e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur}}
// Check both candidates
candidate = candidates.Cur
offset := s - (candidate.offset - e.cur)
if cv == candidate.val {
if offset > maxMatchOffset {
cv = now
// Previous will also be invalid, we have nothing.
continue
}
o2 := s - (candidates.Prev.offset - e.cur)
if cv != candidates.Prev.val || o2 > maxMatchOffset {
if candidate.offset < minOffset {
cv = now
// Previous will also be invalid, we have nothing.
continue
}
if cv == load3232(src, candidate.offset-e.cur) {
if candidates.Prev.offset < minOffset || cv != load3232(src, candidates.Prev.offset-e.cur) {
break
}
// Both match and are valid, pick longest.
offset := s - (candidate.offset - e.cur)
o2 := s - (candidates.Prev.offset - e.cur)
l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:])
if l2 > l1 {
candidate = candidates.Prev
@ -100,11 +110,8 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
// We only check if value mismatches.
// Offset will always be invalid in other cases.
candidate = candidates.Prev
if cv == candidate.val {
offset := s - (candidate.offset - e.cur)
if offset <= maxMatchOffset {
break
}
if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) {
break
}
}
cv = now
@ -152,7 +159,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
nextHash := hash(cv)
e.table[nextHash] = tableEntryPrev{
Prev: e.table[nextHash].Cur,
Cur: tableEntry{offset: e.cur + t, val: cv},
Cur: tableEntry{offset: e.cur + t},
}
}
goto emitRemainder
@ -164,21 +171,21 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
prevHash := hash(uint32(x))
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 3, val: uint32(x)},
Cur: tableEntry{offset: e.cur + s - 3},
}
x >>= 8
prevHash = hash(uint32(x))
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 2, val: uint32(x)},
Cur: tableEntry{offset: e.cur + s - 2},
}
x >>= 8
prevHash = hash(uint32(x))
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 1, val: uint32(x)},
Cur: tableEntry{offset: e.cur + s - 1},
}
x >>= 8
currHash := hash(uint32(x))
@ -186,21 +193,18 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
cv = uint32(x)
e.table[currHash] = tableEntryPrev{
Prev: candidates.Cur,
Cur: tableEntry{offset: s + e.cur, val: cv},
Cur: tableEntry{offset: s + e.cur},
}
// Check both candidates
candidate = candidates.Cur
if cv == candidate.val {
offset := s - (candidate.offset - e.cur)
if offset <= maxMatchOffset {
continue
}
} else {
minOffset := e.cur + s - (maxMatchOffset - 4)
if candidate.offset > minOffset && cv != load3232(src, candidate.offset-e.cur) {
// We only check if value mismatches.
// Offset will always be invalid in other cases.
candidate = candidates.Prev
if cv == candidate.val {
if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) {
offset := s - (candidate.offset - e.cur)
if offset <= maxMatchOffset {
continue

View file

@ -13,7 +13,9 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
@ -90,24 +92,24 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
sCandidate := e.table[nextHashS]
lCandidate := e.bTable[nextHashL]
next := load6432(src, nextS)
entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
entry := tableEntry{offset: s + e.cur}
e.table[nextHashS] = entry
e.bTable[nextHashL] = entry
t = lCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == lCandidate.val {
if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.offset-e.cur) {
// We got a long match. Use that.
break
}
t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == sCandidate.val {
if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
// Found a 4 match...
lCandidate = e.bTable[hash7(next, tableBits)]
// If the next long is a candidate, check if we should use that instead...
lOff := nextS - (lCandidate.offset - e.cur)
if lOff < maxMatchOffset && lCandidate.val == uint32(next) {
if lOff < maxMatchOffset && load3232(src, lCandidate.offset-e.cur) == uint32(next) {
l1, l2 := matchLen(src[s+4:], src[t+4:]), matchLen(src[nextS+4:], src[nextS-lOff+4:])
if l2 > l1 {
s = nextS
@ -135,7 +137,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
if nextEmit < s {
emitLiteral(dst, src[nextEmit:s])
}
if false {
if debugDeflate {
if t >= s {
panic("s-t")
}
@ -158,8 +160,8 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
// Index first pair after match end.
if int(s+8) < len(src) {
cv := load6432(src, s)
e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur, val: uint32(cv)}
e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur, val: uint32(cv)}
e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur}
e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur}
}
goto emitRemainder
}
@ -169,20 +171,20 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
i := nextS
if i < s-1 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur, val: uint32(cv)}
t2 := tableEntry{val: uint32(cv >> 8), offset: t.offset + 1}
t := tableEntry{offset: i + e.cur}
t2 := tableEntry{offset: t.offset + 1}
e.bTable[hash7(cv, tableBits)] = t
e.bTable[hash7(cv>>8, tableBits)] = t2
e.table[hash4u(t2.val, tableBits)] = t2
e.table[hash4u(uint32(cv>>8), tableBits)] = t2
i += 3
for ; i < s-1; i += 3 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur, val: uint32(cv)}
t2 := tableEntry{val: uint32(cv >> 8), offset: t.offset + 1}
t := tableEntry{offset: i + e.cur}
t2 := tableEntry{offset: t.offset + 1}
e.bTable[hash7(cv, tableBits)] = t
e.bTable[hash7(cv>>8, tableBits)] = t2
e.table[hash4u(t2.val, tableBits)] = t2
e.table[hash4u(uint32(cv>>8), tableBits)] = t2
}
}
}
@ -193,8 +195,8 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
o := e.cur + s - 1
prevHashS := hash4x64(x, tableBits)
prevHashL := hash7(x, tableBits)
e.table[prevHashS] = tableEntry{offset: o, val: uint32(x)}
e.bTable[prevHashL] = tableEntry{offset: o, val: uint32(x)}
e.table[prevHashS] = tableEntry{offset: o}
e.bTable[prevHashL] = tableEntry{offset: o}
cv = x >> 8
}

View file

@ -13,6 +13,9 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
@ -97,7 +100,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
sCandidate := e.table[nextHashS]
lCandidate := e.bTable[nextHashL]
next := load6432(src, nextS)
entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
entry := tableEntry{offset: s + e.cur}
e.table[nextHashS] = entry
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = entry, eLong.Cur
@ -107,14 +110,14 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
t = lCandidate.Cur.offset - e.cur
if s-t < maxMatchOffset {
if uint32(cv) == lCandidate.Cur.val {
if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
t2 := lCandidate.Prev.offset - e.cur
if s-t2 < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
l = e.matchlen(s+4, t+4, src) + 4
ml1 := e.matchlen(s+4, t2+4, src) + 4
if ml1 > l {
@ -126,30 +129,30 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
break
}
t = lCandidate.Prev.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
break
}
}
t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == sCandidate.val {
if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
// Found a 4 match...
l = e.matchlen(s+4, t+4, src) + 4
lCandidate = e.bTable[nextHashL]
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
// If the next long is a candidate, use that...
t2 := lCandidate.Cur.offset - e.cur
if nextS-t2 < maxMatchOffset {
if lCandidate.Cur.val == uint32(next) {
if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
@ -160,7 +163,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
}
// If the previous long is a candidate, use that...
t2 = lCandidate.Prev.offset - e.cur
if nextS-t2 < maxMatchOffset && lCandidate.Prev.val == uint32(next) {
if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
@ -194,7 +197,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
if nextEmit < s {
emitLiteral(dst, src[nextEmit:s])
}
if false {
if debugDeflate {
if t >= s {
panic(fmt.Sprintln("s-t", s, t))
}
@ -223,31 +226,31 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
i := s - l + 1
if i < s-1 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur, val: uint32(cv)}
t := tableEntry{offset: i + e.cur}
e.table[hash4x64(cv, tableBits)] = t
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
// Do an long at i+1
cv >>= 8
t = tableEntry{offset: t.offset + 1, val: uint32(cv)}
t = tableEntry{offset: t.offset + 1}
eLong = &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
// We only have enough bits for a short entry at i+2
cv >>= 8
t = tableEntry{offset: t.offset + 1, val: uint32(cv)}
t = tableEntry{offset: t.offset + 1}
e.table[hash4x64(cv, tableBits)] = t
// Skip one - otherwise we risk hitting 's'
i += 4
for ; i < s-1; i += hashEvery {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur, val: uint32(cv)}
t2 := tableEntry{offset: t.offset + 1, val: uint32(cv >> 8)}
t := tableEntry{offset: i + e.cur}
t2 := tableEntry{offset: t.offset + 1}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
e.table[hash4u(t2.val, tableBits)] = t2
e.table[hash4u(uint32(cv>>8), tableBits)] = t2
}
}
}
@ -258,9 +261,9 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
o := e.cur + s - 1
prevHashS := hash4x64(x, tableBits)
prevHashL := hash7(x, tableBits)
e.table[prevHashS] = tableEntry{offset: o, val: uint32(x)}
e.table[prevHashS] = tableEntry{offset: o}
eLong := &e.bTable[prevHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: o, val: uint32(x)}, eLong.Cur
eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur
cv = x >> 8
}

View file

@ -13,6 +13,9 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
@ -98,7 +101,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
sCandidate := e.table[nextHashS]
lCandidate := e.bTable[nextHashL]
next := load6432(src, nextS)
entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
entry := tableEntry{offset: s + e.cur}
e.table[nextHashS] = entry
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = entry, eLong.Cur
@ -109,17 +112,17 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
t = lCandidate.Cur.offset - e.cur
if s-t < maxMatchOffset {
if uint32(cv) == lCandidate.Cur.val {
if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
// Long candidate matches at least 4 bytes.
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
// Check the previous long candidate as well.
t2 := lCandidate.Prev.offset - e.cur
if s-t2 < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
l = e.matchlen(s+4, t+4, src) + 4
ml1 := e.matchlen(s+4, t2+4, src) + 4
if ml1 > l {
@ -132,17 +135,17 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
}
// Current value did not match, but check if previous long value does.
t = lCandidate.Prev.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
break
}
}
t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == sCandidate.val {
if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
// Found a 4 match...
l = e.matchlen(s+4, t+4, src) + 4
@ -150,9 +153,9 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
lCandidate = e.bTable[nextHashL]
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
// Check repeat at s + repOff
const repOff = 1
@ -171,7 +174,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
// If the next long is a candidate, use that...
t2 = lCandidate.Cur.offset - e.cur
if nextS-t2 < maxMatchOffset {
if lCandidate.Cur.val == uint32(next) {
if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
@ -182,7 +185,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
}
// If the previous long is a candidate, use that...
t2 = lCandidate.Prev.offset - e.cur
if nextS-t2 < maxMatchOffset && lCandidate.Prev.val == uint32(next) {
if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
@ -241,9 +244,9 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
// Index after match end.
for i := nextS + 1; i < int32(len(src))-8; i += 2 {
cv := load6432(src, i)
e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur, val: uint32(cv)}
e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur, val: uint32(cv)}, eLong.Cur
eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur
}
goto emitRemainder
}
@ -252,8 +255,8 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
if true {
for i := nextS + 1; i < s-1; i += 2 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur, val: uint32(cv)}
t2 := tableEntry{offset: t.offset + 1, val: uint32(cv >> 8)}
t := tableEntry{offset: i + e.cur}
t2 := tableEntry{offset: t.offset + 1}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong2 := &e.bTable[hash7(cv>>8, tableBits)]
e.table[hash4x64(cv, tableBits)] = t

View file

@ -3,10 +3,13 @@ package flate
import (
"io"
"math"
"sync"
)
const (
maxStatelessBlock = math.MaxInt16
// dictionary will be taken from maxStatelessBlock, so limit it.
maxStatelessDict = 8 << 10
slTableBits = 13
slTableSize = 1 << slTableBits
@ -24,11 +27,11 @@ func (s *statelessWriter) Close() error {
}
s.closed = true
// Emit EOF block
return StatelessDeflate(s.dst, nil, true)
return StatelessDeflate(s.dst, nil, true, nil)
}
func (s *statelessWriter) Write(p []byte) (n int, err error) {
err = StatelessDeflate(s.dst, p, false)
err = StatelessDeflate(s.dst, p, false, nil)
if err != nil {
return 0, err
}
@ -49,11 +52,27 @@ func NewStatelessWriter(dst io.Writer) io.WriteCloser {
return &statelessWriter{dst: dst}
}
// bitWriterPool contains bit writers that can be reused.
var bitWriterPool = sync.Pool{
New: func() interface{} {
return newHuffmanBitWriter(nil)
},
}
// StatelessDeflate allows to compress directly to a Writer without retaining state.
// When returning everything will be flushed.
func StatelessDeflate(out io.Writer, in []byte, eof bool) error {
// Up to 8KB of an optional dictionary can be given which is presumed to presumed to precede the block.
// Longer dictionaries will be truncated and will still produce valid output.
// Sending nil dictionary is perfectly fine.
func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
var dst tokens
bw := newHuffmanBitWriter(out)
bw := bitWriterPool.Get().(*huffmanBitWriter)
bw.reset(out)
defer func() {
// don't keep a reference to our output
bw.reset(nil)
bitWriterPool.Put(bw)
}()
if eof && len(in) == 0 {
// Just write an EOF block.
// Could be faster...
@ -62,35 +81,53 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool) error {
return bw.err
}
// Truncate dict
if len(dict) > maxStatelessDict {
dict = dict[len(dict)-maxStatelessDict:]
}
for len(in) > 0 {
todo := in
if len(todo) > maxStatelessBlock {
todo = todo[:maxStatelessBlock]
if len(todo) > maxStatelessBlock-len(dict) {
todo = todo[:maxStatelessBlock-len(dict)]
}
in = in[len(todo):]
uncompressed := todo
if len(dict) > 0 {
// combine dict and source
bufLen := len(todo) + len(dict)
combined := make([]byte, bufLen)
copy(combined, dict)
copy(combined[len(dict):], todo)
todo = combined
}
// Compress
statelessEnc(&dst, todo)
statelessEnc(&dst, todo, int16(len(dict)))
isEof := eof && len(in) == 0
if dst.n == 0 {
bw.writeStoredHeader(len(todo), isEof)
bw.writeStoredHeader(len(uncompressed), isEof)
if bw.err != nil {
return bw.err
}
bw.writeBytes(todo)
} else if int(dst.n) > len(todo)-len(todo)>>4 {
bw.writeBytes(uncompressed)
} else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 {
// If we removed less than 1/16th, huffman compress the block.
bw.writeBlockHuff(isEof, todo, false)
bw.writeBlockHuff(isEof, uncompressed, len(in) == 0)
} else {
bw.writeBlockDynamic(&dst, isEof, todo, false)
bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0)
}
if len(in) > 0 {
// Retain a dict if we have more
dict = todo[len(todo)-maxStatelessDict:]
dst.Reset()
}
if bw.err != nil {
return bw.err
}
dst.Reset()
}
if !eof {
// Align.
// Align, only a stored block can do that.
bw.writeStoredHeader(0, false)
}
bw.flush()
@ -116,7 +153,7 @@ func load6416(b []byte, i int16) uint64 {
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
}
func statelessEnc(dst *tokens, src []byte) {
func statelessEnc(dst *tokens, src []byte, startAt int16) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
@ -130,15 +167,23 @@ func statelessEnc(dst *tokens, src []byte) {
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
if len(src)-int(startAt) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
dst.n = 0
return
}
// Index until startAt
if startAt > 0 {
cv := load3232(src, 0)
for i := int16(0); i < startAt; i++ {
table[hashSL(cv)] = tableEntry{offset: i}
cv = (cv >> 8) | (uint32(src[i+4]) << 24)
}
}
s := int16(1)
nextEmit := int16(0)
s := startAt + 1
nextEmit := startAt
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.

View file

@ -184,9 +184,7 @@ func (t *tokens) indexTokens(in []token) {
t.Reset()
for _, tok := range in {
if tok < matchType {
t.tokens[t.n] = tok
t.litHist[tok]++
t.n++
t.AddLiteral(tok.literal())
continue
}
t.AddMatch(uint32(tok.length()), tok.offset())
@ -211,50 +209,60 @@ func (t *tokens) AddLiteral(lit byte) {
t.nLits++
}
// from https://stackoverflow.com/a/28730362
func mFastLog2(val float32) float32 {
ux := int32(math.Float32bits(val))
log2 := (float32)(((ux >> 23) & 255) - 128)
ux &= -0x7f800001
ux += 127 << 23
uval := math.Float32frombits(uint32(ux))
log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759
return log2
}
// EstimatedBits will return an minimum size estimated by an *optimal*
// compression of the block.
// The size of the block
func (t *tokens) EstimatedBits() int {
shannon := float64(0)
shannon := float32(0)
bits := int(0)
nMatches := 0
if t.nLits > 0 {
invTotal := 1.0 / float64(t.nLits)
invTotal := 1.0 / float32(t.nLits)
for _, v := range t.litHist[:] {
if v > 0 {
n := float64(v)
shannon += math.Ceil(-math.Log2(n*invTotal) * n)
n := float32(v)
shannon += -mFastLog2(n*invTotal) * n
}
}
// Just add 15 for EOB
shannon += 15
for _, v := range t.extraHist[1 : literalCount-256] {
for i, v := range t.extraHist[1 : literalCount-256] {
if v > 0 {
n := float64(v)
shannon += math.Ceil(-math.Log2(n*invTotal) * n)
bits += int(lengthExtraBits[v&31]) * int(v)
n := float32(v)
shannon += -mFastLog2(n*invTotal) * n
bits += int(lengthExtraBits[i&31]) * int(v)
nMatches += int(v)
}
}
}
if nMatches > 0 {
invTotal := 1.0 / float64(nMatches)
for _, v := range t.offHist[:offsetCodeCount] {
invTotal := 1.0 / float32(nMatches)
for i, v := range t.offHist[:offsetCodeCount] {
if v > 0 {
n := float64(v)
shannon += math.Ceil(-math.Log2(n*invTotal) * n)
bits += int(offsetExtraBits[v&31]) * int(n)
n := float32(v)
shannon += -mFastLog2(n*invTotal) * n
bits += int(offsetExtraBits[i&31]) * int(v)
}
}
}
return int(shannon) + bits
}
// AddMatch adds a match to the tokens.
// This function is very sensitive to inlining and right on the border.
func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
if debugDecode {
if debugDeflate {
if xlength >= maxMatchLength+baseMatchLength {
panic(fmt.Errorf("invalid length: %v", xlength))
}
@ -273,7 +281,7 @@ func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
// AddMatchLong adds a match to the tokens, potentially longer than max match length.
// Length should NOT have the base subtracted, only offset should.
func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
if debugDecode {
if debugDeflate {
if xoffset >= maxMatchOffset+baseMatchOffset {
panic(fmt.Errorf("invalid offset: %v", xoffset))
}

View file

@ -207,7 +207,7 @@ func (z *Writer) Write(p []byte) (int, error) {
z.size += uint32(len(p))
z.digest = crc32.Update(z.digest, crc32.IEEETable, p)
if z.level == StatelessCompression {
return len(p), flate.StatelessDeflate(z.w, p, false)
return len(p), flate.StatelessDeflate(z.w, p, false, nil)
}
n, z.err = z.compressor.Write(p)
return n, z.err
@ -255,7 +255,7 @@ func (z *Writer) Close() error {
}
}
if z.level == StatelessCompression {
z.err = flate.StatelessDeflate(z.w, nil, true)
z.err = flate.StatelessDeflate(z.w, nil, true, nil)
} else {
z.err = z.compressor.Close()
}