Upgrade blevesearch to v0.8.1 (#9177)

For #1441

a91b427b59
This commit is contained in:
Mura Li 2019-11-27 17:23:33 +08:00 committed by Lauris BH
parent b50dee5a61
commit 9591185c8f
180 changed files with 43400 additions and 41105 deletions

20
vendor/github.com/RoaringBitmap/roaring/.drone.yml generated vendored Normal file
View file

@ -0,0 +1,20 @@
kind: pipeline
name: default
workspace:
base: /go
path: src/github.com/RoaringBitmap/roaring
steps:
- name: test
image: golang
commands:
- go get -t
- go test
- go test -race -run TestConcurrent*
- go build -tags appengine
- go test -tags appengine
- GOARCH=386 go build
- GOARCH=386 go test
- GOARCH=arm go build
- GOARCH=arm64 go build

View file

@ -8,10 +8,12 @@ install:
notifications:
email: false
go:
- 1.7.x
- 1.8.x
- 1.9.x
- 1.10.x
- "1.7.x"
- "1.8.x"
- "1.9.x"
- "1.10.x"
- "1.11.x"
- "1.12.x"
- tip
# whitelist
@ -21,10 +23,14 @@ branches:
script:
- goveralls -v -service travis-ci -ignore arraycontainer_gen.go,bitmapcontainer_gen.go,rle16_gen.go,rle_gen.go,roaringarray_gen.go,rle.go || go test
- go test -race -run TestConcurrent*
- go build -tags appengine
- go test -tags appengine
- GOARCH=arm64 go build
- GOARCH=386 go build
- GOARCH=386 go test
- GOARCH=arm go build
- GOARCH=arm64 go build
matrix:
allow_failures:
- go: tip

View file

@ -7,4 +7,5 @@ Bob Potter (@bpot),
Tyson Maly (@tvmaly),
Will Glynn (@willglynn),
Brent Pedersen (@brentp)
Maciej Biłas (@maciej)
Maciej Biłas (@maciej),
Joe Nall (@joenall)

View file

@ -9,4 +9,8 @@ Will Glynn (@willglynn),
Brent Pedersen (@brentp),
Jason E. Aten (@glycerine),
Vali Malinoiu (@0x4139),
Forud Ghafouri (@fzerorubigd)
Forud Ghafouri (@fzerorubigd),
Joe Nall (@joenall),
(@fredim),
Edd Robinson (@e-dard),
Alexander Petrov (@alldroll)

View file

@ -200,3 +200,36 @@
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================================================
Portions of runcontainer.go are from the Go standard library, which is licensed
under:
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -1,4 +1,4 @@
.PHONY: help all test format fmtcheck vet lint qa deps clean nuke rle backrle ser fetch-real-roaring-datasets
.PHONY: help all test format fmtcheck vet lint qa deps clean nuke ser fetch-real-roaring-datasets
@ -63,7 +63,7 @@ qa: fmtcheck test vet lint
# Get the dependencies
deps:
GOPATH=$(GOPATH) go get github.com/smartystreets/goconvey/convey
GOPATH=$(GOPATH) go get github.com/stretchr/testify
GOPATH=$(GOPATH) go get github.com/willf/bitset
GOPATH=$(GOPATH) go get github.com/golang/lint/golint
GOPATH=$(GOPATH) go get github.com/mschoch/smat
@ -97,18 +97,8 @@ nuke:
rm -rf ./target
GOPATH=$(GOPATH) go clean -i ./...
rle:
cp rle.go rle16.go
perl -pi -e 's/32/16/g' rle16.go
cp rle_test.go rle16_test.go
perl -pi -e 's/32/16/g' rle16_test.go
backrle:
cp rle16.go rle.go
perl -pi -e 's/16/32/g' rle.go
perl -pi -e 's/2032/2016/g' rle.go
ser: rle
ser:
go generate
cover:

View file

@ -1,4 +1,5 @@
roaring [![Build Status](https://travis-ci.org/RoaringBitmap/roaring.png)](https://travis-ci.org/RoaringBitmap/roaring) [![Coverage Status](https://coveralls.io/repos/github/RoaringBitmap/roaring/badge.svg?branch=master)](https://coveralls.io/github/RoaringBitmap/roaring?branch=master) [![GoDoc](https://godoc.org/github.com/RoaringBitmap/roaring?status.svg)](https://godoc.org/github.com/RoaringBitmap/roaring) [![Go Report Card](https://goreportcard.com/badge/RoaringBitmap/roaring)](https://goreportcard.com/report/github.com/RoaringBitmap/roaring)
[![Build Status](https://cloud.drone.io/api/badges/RoaringBitmap/roaring/status.svg)](https://cloud.drone.io/RoaringBitmap/roaring)
=============
This is a go version of the Roaring bitmap data structure.
@ -6,12 +7,12 @@ This is a go version of the Roaring bitmap data structure.
Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and
[Elasticsearch][elasticsearch], [Metamarkets' Druid][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin].
[Elasticsearch][elasticsearch], [Apache Druid (Incubating)][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin].
[lucene]: https://lucene.apache.org/
[solr]: https://lucene.apache.org/solr/
[elasticsearch]: https://www.elastic.co/products/elasticsearch
[druid]: http://druid.io/
[druid]: https://druid.apache.org/
[spark]: https://spark.apache.org/
[opensearchserver]: http://www.opensearchserver.com
[cloudtorrent]: https://github.com/jpillora/cloud-torrent
@ -61,7 +62,6 @@ http://arxiv.org/abs/1402.6407 This paper used data from http://lemire.me/data/r
Dependencies are fetched automatically by giving the `-t` flag to `go get`.
they include
- github.com/smartystreets/goconvey/convey
- github.com/willf/bitset
- github.com/mschoch/smat
- github.com/glycerine/go-unsnap-stream
@ -133,6 +133,7 @@ func main() {
if rb1.Equals(newrb) {
fmt.Println("I wrote the content to a byte stream and read it back.")
}
// you can iterate over bitmaps using ReverseIterator(), Iterator, ManyIterator()
}
```
@ -206,7 +207,7 @@ You can use roaring with gore:
- go get -u github.com/motemen/gore
- Make sure that ``$GOPATH/bin`` is in your ``$PATH``.
- go get github/RoaringBitmap/roaring
- go get github.com/RoaringBitmap/roaring
```go
$ gore

View file

@ -24,12 +24,16 @@ func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uin
}
}
func (ac *arrayContainer) getShortIterator() shortIterable {
func (ac *arrayContainer) getShortIterator() shortPeekable {
return &shortIterator{ac.content, 0}
}
func (ac *arrayContainer) getReverseIterator() shortIterable {
return &reverseIterator{ac.content, len(ac.content) - 1}
}
func (ac *arrayContainer) getManyIterator() manyIterable {
return &manyIterator{ac.content, 0}
return &shortIterator{ac.content, 0}
}
func (ac *arrayContainer) minimum() uint16 {
@ -115,7 +119,6 @@ func (ac *arrayContainer) iremoveRange(firstOfRange, endx int) container {
// flip the values in the range [firstOfRange,endx)
func (ac *arrayContainer) not(firstOfRange, endx int) container {
if firstOfRange >= endx {
//p("arrayContainer.not(): exiting early with ac.clone()")
return ac.clone()
}
return ac.notClose(firstOfRange, endx-1) // remove everything in [firstOfRange,endx-1]
@ -124,18 +127,15 @@ func (ac *arrayContainer) not(firstOfRange, endx int) container {
// flip the values in the range [firstOfRange,lastOfRange]
func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange]
//p("arrayContainer.notClose(): exiting early with ac.clone()")
return ac.clone()
}
// determine the span of array indices to be affected^M
startIndex := binarySearch(ac.content, uint16(firstOfRange))
//p("startIndex=%v", startIndex)
if startIndex < 0 {
startIndex = -startIndex - 1
}
lastIndex := binarySearch(ac.content, uint16(lastOfRange))
//p("lastIndex=%v", lastIndex)
if lastIndex < 0 {
lastIndex = -lastIndex - 2
}
@ -144,9 +144,7 @@ func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
newValuesInRange := spanToBeFlipped - currentValuesInRange
cardinalityChange := newValuesInRange - currentValuesInRange
newCardinality := len(ac.content) + cardinalityChange
//p("new card is %v", newCardinality)
if newCardinality > arrayDefaultMaxSize {
//p("new card over arrayDefaultMaxSize, so returning bitmap")
return ac.toBitmapContainer().not(firstOfRange, lastOfRange+1)
}
answer := newArrayContainer()
@ -503,7 +501,6 @@ func (ac *arrayContainer) lazyorArray(value2 *arrayContainer) container {
}
func (ac *arrayContainer) and(a container) container {
//p("ac.and() called")
switch x := a.(type) {
case *arrayContainer:
return ac.andArray(x)
@ -550,7 +547,7 @@ func (ac *arrayContainer) iand(a container) container {
return ac.iandBitmap(x)
case *runContainer16:
if x.isFull() {
return ac.clone()
return ac
}
return x.andArray(ac)
}
@ -722,7 +719,6 @@ func (ac *arrayContainer) inot(firstOfRange, endx int) container {
// flip the values in the range [firstOfRange,lastOfRange]
func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
//p("ac.inotClose() starting")
if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange]
return ac
}
@ -745,7 +741,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
if cardinalityChange > 0 {
if newCardinality > len(ac.content) {
if newCardinality > arrayDefaultMaxSize {
//p("ac.inotClose() converting to bitmap and doing inot there")
bcRet := ac.toBitmapContainer()
bcRet.inot(firstOfRange, lastOfRange+1)
*ac = *bcRet.toArrayContainer()
@ -766,7 +761,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
}
}
ac.content = ac.content[:newCardinality]
//p("bottom of ac.inotClose(): returning ac")
return ac
}
@ -958,3 +952,17 @@ func (ac *arrayContainer) toEfficientContainer() container {
func (ac *arrayContainer) containerType() contype {
return arrayContype
}
func (ac *arrayContainer) addOffset(x uint16) []container {
low := &arrayContainer{}
high := &arrayContainer{}
for _, val := range ac.content {
y := uint32(val) + uint32(x)
if highbits(y) > 0 {
high.content = append(high.content, lowbits(y))
} else {
low.content = append(low.content, lowbits(y))
}
}
return []container{low, high}
}

View file

@ -6,7 +6,7 @@ package roaring
import "github.com/tinylib/msgp/msgp"
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *arrayContainer) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
@ -49,7 +49,7 @@ func (z *arrayContainer) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z *arrayContainer) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 1
// write "content"
@ -70,7 +70,7 @@ func (z *arrayContainer) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z *arrayContainer) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 1
@ -83,7 +83,7 @@ func (z *arrayContainer) MarshalMsg(b []byte) (o []byte, err error) {
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *arrayContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
@ -127,7 +127,7 @@ func (z *arrayContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *arrayContainer) Msgsize() (s int) {
s = 1 + 8 + msgp.ArrayHeaderSize + (len(z.content) * (msgp.Uint16Size))
return

View file

@ -110,14 +110,54 @@ func (bcsi *bitmapContainerShortIterator) hasNext() bool {
return bcsi.i >= 0
}
func (bcsi *bitmapContainerShortIterator) peekNext() uint16 {
return uint16(bcsi.i)
}
func (bcsi *bitmapContainerShortIterator) advanceIfNeeded(minval uint16) {
if bcsi.hasNext() && bcsi.peekNext() < minval {
bcsi.i = bcsi.ptr.NextSetBit(int(minval))
}
}
func newBitmapContainerShortIterator(a *bitmapContainer) *bitmapContainerShortIterator {
return &bitmapContainerShortIterator{a, a.NextSetBit(0)}
}
func (bc *bitmapContainer) getShortIterator() shortIterable {
func (bc *bitmapContainer) getShortIterator() shortPeekable {
return newBitmapContainerShortIterator(bc)
}
type reverseBitmapContainerShortIterator struct {
ptr *bitmapContainer
i int
}
func (bcsi *reverseBitmapContainerShortIterator) next() uint16 {
if bcsi.i == -1 {
panic("reverseBitmapContainerShortIterator.next() going beyond what is available")
}
j := bcsi.i
bcsi.i = bcsi.ptr.PrevSetBit(bcsi.i - 1)
return uint16(j)
}
func (bcsi *reverseBitmapContainerShortIterator) hasNext() bool {
return bcsi.i >= 0
}
func newReverseBitmapContainerShortIterator(a *bitmapContainer) *reverseBitmapContainerShortIterator {
if a.cardinality == 0 {
return &reverseBitmapContainerShortIterator{a, -1}
}
return &reverseBitmapContainerShortIterator{a, int(a.maximum())}
}
func (bc *bitmapContainer) getReverseIterator() shortIterable {
return newReverseBitmapContainerShortIterator(bc)
}
type bitmapContainerManyIterator struct {
ptr *bitmapContainer
base int
@ -131,7 +171,7 @@ func (bcmi *bitmapContainerManyIterator) nextMany(hs uint32, buf []uint32) int {
for n < len(buf) {
if bitset == 0 {
base += 1
base++
if base >= len(bcmi.ptr.bitmap) {
bcmi.base = base
bcmi.bitset = bitset
@ -177,16 +217,13 @@ func bitmapContainerSizeInBytes() int {
func bitmapEquals(a, b []uint64) bool {
if len(a) != len(b) {
//p("bitmaps differ on length. len(a)=%v; len(b)=%v", len(a), len(b))
return false
}
for i, v := range a {
if v != b[i] {
//p("bitmaps differ on element i=%v", i)
return false
}
}
//p("bitmapEquals returning true")
return true
}
@ -209,9 +246,7 @@ func (bc *bitmapContainer) fillLeastSignificant16bits(x []uint32, i int, mask ui
func (bc *bitmapContainer) equals(o container) bool {
srb, ok := o.(*bitmapContainer)
if ok {
//p("bitmapContainers.equals: both are bitmapContainers")
if srb.cardinality != bc.cardinality {
//p("bitmapContainers.equals: card differs: %v vs %v", srb.cardinality, bc.cardinality)
return false
}
return bitmapEquals(bc.bitmap, srb.bitmap)
@ -261,12 +296,6 @@ func (bc *bitmapContainer) iremoveReturnMinimized(i uint16) container {
// iremove returns true if i was found.
func (bc *bitmapContainer) iremove(i uint16) bool {
/* branchless code
w := bc.bitmap[i>>6]
mask := uint64(1) << (i % 64)
neww := w &^ mask
bc.cardinality -= int((w ^ neww) >> (i % 64))
bc.bitmap[i>>6] = neww */
if bc.contains(i) {
bc.cardinality--
bc.bitmap[i/64] &^= (uint64(1) << (i % 64))
@ -306,14 +335,10 @@ func (bc *bitmapContainer) iremoveRange(firstOfRange, lastOfRange int) container
// flip all values in range [firstOfRange,endx)
func (bc *bitmapContainer) inot(firstOfRange, endx int) container {
p("bc.inot() called with [%v, %v)", firstOfRange, endx)
if endx-firstOfRange == maxCapacity {
//p("endx-firstOfRange == maxCapacity")
flipBitmapRange(bc.bitmap, firstOfRange, endx)
bc.cardinality = maxCapacity - bc.cardinality
//p("bc.cardinality is now %v", bc.cardinality)
} else if endx-firstOfRange > maxCapacity/2 {
//p("endx-firstOfRange > maxCapacity/2")
flipBitmapRange(bc.bitmap, firstOfRange, endx)
bc.computeCardinality()
} else {
@ -517,11 +542,31 @@ func (bc *bitmapContainer) iorBitmap(value2 *bitmapContainer) container {
func (bc *bitmapContainer) lazyIORArray(value2 *arrayContainer) container {
answer := bc
c := value2.getCardinality()
for k := 0; k < c; k++ {
for k := 0; k+3 < c; k += 4 {
content := (*[4]uint16)(unsafe.Pointer(&value2.content[k]))
vc0 := content[0]
i0 := uint(vc0) >> 6
answer.bitmap[i0] = answer.bitmap[i0] | (uint64(1) << (vc0 % 64))
vc1 := content[1]
i1 := uint(vc1) >> 6
answer.bitmap[i1] = answer.bitmap[i1] | (uint64(1) << (vc1 % 64))
vc2 := content[2]
i2 := uint(vc2) >> 6
answer.bitmap[i2] = answer.bitmap[i2] | (uint64(1) << (vc2 % 64))
vc3 := content[3]
i3 := uint(vc3) >> 6
answer.bitmap[i3] = answer.bitmap[i3] | (uint64(1) << (vc3 % 64))
}
for k := c &^ 3; k < c; k++ {
vc := value2.content[k]
i := uint(vc) >> 6
answer.bitmap[i] = answer.bitmap[i] | (uint64(1) << (vc % 64))
}
answer.cardinality = invalidCardinality
return answer
}
@ -789,8 +834,6 @@ func (bc *bitmapContainer) andNotRun16(rc *runContainer16) container {
}
func (bc *bitmapContainer) iandNot(a container) container {
//p("bitmapContainer.iandNot() starting")
switch x := a.(type) {
case *arrayContainer:
return bc.iandNotArray(x)
@ -844,12 +887,15 @@ func (bc *bitmapContainer) andNotBitmap(value2 *bitmapContainer) container {
return ac
}
func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) *bitmapContainer {
func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) container {
newCardinality := int(popcntMaskSlice(bc.bitmap, value2.bitmap))
for k := 0; k < len(bc.bitmap); k++ {
bc.bitmap[k] = bc.bitmap[k] &^ value2.bitmap[k]
}
bc.cardinality = newCardinality
if bc.getCardinality() <= arrayDefaultMaxSize {
return bc.toArrayContainer()
}
return bc
}
@ -917,6 +963,32 @@ func (bc *bitmapContainer) NextSetBit(i int) int {
return -1
}
func (bc *bitmapContainer) PrevSetBit(i int) int {
if i < 0 {
return -1
}
x := i / 64
if x >= len(bc.bitmap) {
return -1
}
w := bc.bitmap[x]
b := i % 64
w = w << uint(63-b)
if w != 0 {
return i - countLeadingZeros(w)
}
x--
for ; x >= 0; x-- {
if bc.bitmap[x] != 0 {
return (x * 64) + 63 - countLeadingZeros(bc.bitmap[x])
}
}
return -1
}
// reference the java implementation
// https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/BitmapContainer.java#L875-L892
//
@ -980,3 +1052,35 @@ func newBitmapContainerFromRun(rc *runContainer16) *bitmapContainer {
func (bc *bitmapContainer) containerType() contype {
return bitmapContype
}
func (bc *bitmapContainer) addOffset(x uint16) []container {
low := newBitmapContainer()
high := newBitmapContainer()
b := uint32(x) >> 6
i := uint32(x) % 64
end := uint32(1024) - b
if i == 0 {
copy(low.bitmap[b:], bc.bitmap[:end])
copy(high.bitmap[:b], bc.bitmap[end:])
} else {
low.bitmap[b] = bc.bitmap[0] << i
for k := uint32(1); k < end; k++ {
newval := bc.bitmap[k] << i
if newval == 0 {
newval = bc.bitmap[k-1] >> (64 - i)
}
low.bitmap[b+k] = newval
}
for k := end; k < 1024; k++ {
newval := bc.bitmap[k] << i
if newval == 0 {
newval = bc.bitmap[k-1] >> (64 - i)
}
high.bitmap[k-end] = newval
}
high.bitmap[b] = bc.bitmap[1023] >> (64 - i)
}
low.computeCardinality()
high.computeCardinality()
return []container{low, high}
}

View file

@ -6,7 +6,7 @@ package roaring
import "github.com/tinylib/msgp/msgp"
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *bitmapContainer) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
@ -54,7 +54,7 @@ func (z *bitmapContainer) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z *bitmapContainer) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 2
// write "cardinality"
@ -84,7 +84,7 @@ func (z *bitmapContainer) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z *bitmapContainer) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 2
@ -100,7 +100,7 @@ func (z *bitmapContainer) MarshalMsg(b []byte) (o []byte, err error) {
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *bitmapContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
@ -149,13 +149,13 @@ func (z *bitmapContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *bitmapContainer) Msgsize() (s int) {
s = 1 + 12 + msgp.IntSize + 7 + msgp.ArrayHeaderSize + (len(z.bitmap) * (msgp.Uint64Size))
return
}
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *bitmapContainerShortIterator) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
@ -239,7 +239,7 @@ func (z *bitmapContainerShortIterator) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z *bitmapContainerShortIterator) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 2
// write "ptr"
@ -291,7 +291,7 @@ func (z *bitmapContainerShortIterator) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z *bitmapContainerShortIterator) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 2
@ -317,7 +317,7 @@ func (z *bitmapContainerShortIterator) MarshalMsg(b []byte) (o []byte, err error
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *bitmapContainerShortIterator) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
@ -402,7 +402,7 @@ func (z *bitmapContainerShortIterator) UnmarshalMsg(bts []byte) (o []byte, err e
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *bitmapContainerShortIterator) Msgsize() (s int) {
s = 1 + 4
if z.ptr == nil {

161
vendor/github.com/RoaringBitmap/roaring/byte_input.go generated vendored Normal file
View file

@ -0,0 +1,161 @@
package roaring
import (
"encoding/binary"
"io"
)
type byteInput interface {
// next returns a slice containing the next n bytes from the buffer,
// advancing the buffer as if the bytes had been returned by Read.
next(n int) ([]byte, error)
// readUInt32 reads uint32 with LittleEndian order
readUInt32() (uint32, error)
// readUInt16 reads uint16 with LittleEndian order
readUInt16() (uint16, error)
// getReadBytes returns read bytes
getReadBytes() int64
// skipBytes skips exactly n bytes
skipBytes(n int) error
}
func newByteInputFromReader(reader io.Reader) byteInput {
return &byteInputAdapter{
r: reader,
readBytes: 0,
}
}
func newByteInput(buf []byte) byteInput {
return &byteBuffer{
buf: buf,
off: 0,
}
}
type byteBuffer struct {
buf []byte
off int
}
// next returns a slice containing the next n bytes from the reader
// If there are fewer bytes than the given n, io.ErrUnexpectedEOF will be returned
func (b *byteBuffer) next(n int) ([]byte, error) {
m := len(b.buf) - b.off
if n > m {
return nil, io.ErrUnexpectedEOF
}
data := b.buf[b.off : b.off+n]
b.off += n
return data, nil
}
// readUInt32 reads uint32 with LittleEndian order
func (b *byteBuffer) readUInt32() (uint32, error) {
if len(b.buf)-b.off < 4 {
return 0, io.ErrUnexpectedEOF
}
v := binary.LittleEndian.Uint32(b.buf[b.off:])
b.off += 4
return v, nil
}
// readUInt16 reads uint16 with LittleEndian order
func (b *byteBuffer) readUInt16() (uint16, error) {
if len(b.buf)-b.off < 2 {
return 0, io.ErrUnexpectedEOF
}
v := binary.LittleEndian.Uint16(b.buf[b.off:])
b.off += 2
return v, nil
}
// getReadBytes returns read bytes
func (b *byteBuffer) getReadBytes() int64 {
return int64(b.off)
}
// skipBytes skips exactly n bytes
func (b *byteBuffer) skipBytes(n int) error {
m := len(b.buf) - b.off
if n > m {
return io.ErrUnexpectedEOF
}
b.off += n
return nil
}
// reset resets the given buffer with a new byte slice
func (b *byteBuffer) reset(buf []byte) {
b.buf = buf
b.off = 0
}
type byteInputAdapter struct {
r io.Reader
readBytes int
}
// next returns a slice containing the next n bytes from the buffer,
// advancing the buffer as if the bytes had been returned by Read.
func (b *byteInputAdapter) next(n int) ([]byte, error) {
buf := make([]byte, n)
m, err := io.ReadAtLeast(b.r, buf, n)
b.readBytes += m
if err != nil {
return nil, err
}
return buf, nil
}
// readUInt32 reads uint32 with LittleEndian order
func (b *byteInputAdapter) readUInt32() (uint32, error) {
buf, err := b.next(4)
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint32(buf), nil
}
// readUInt16 reads uint16 with LittleEndian order
func (b *byteInputAdapter) readUInt16() (uint16, error) {
buf, err := b.next(2)
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint16(buf), nil
}
// getReadBytes returns read bytes
func (b *byteInputAdapter) getReadBytes() int64 {
return int64(b.readBytes)
}
// skipBytes skips exactly n bytes
func (b *byteInputAdapter) skipBytes(n int) error {
_, err := b.next(n)
return err
}
// reset resets the given buffer with a new stream
func (b *byteInputAdapter) reset(stream io.Reader) {
b.r = stream
b.readBytes = 0
}

11
vendor/github.com/RoaringBitmap/roaring/clz.go generated vendored Normal file
View file

@ -0,0 +1,11 @@
// +build go1.9
// "go1.9", from Go version 1.9 onward
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
package roaring
import "math/bits"
func countLeadingZeros(x uint64) int {
return bits.LeadingZeros64(x)
}

36
vendor/github.com/RoaringBitmap/roaring/clz_compat.go generated vendored Normal file
View file

@ -0,0 +1,36 @@
// +build !go1.9
package roaring
// LeadingZeroBits returns the number of consecutive most significant zero
// bits of x.
func countLeadingZeros(i uint64) int {
if i == 0 {
return 64
}
n := 1
x := uint32(i >> 32)
if x == 0 {
n += 32
x = uint32(i)
}
if (x >> 16) == 0 {
n += 16
x <<= 16
}
if (x >> 24) == 0 {
n += 8
x <<= 8
}
if x>>28 == 0 {
n += 4
x <<= 4
}
if x>>30 == 0 {
n += 2
x <<= 2
}
n -= int(x >> 31)
return n
}

16
vendor/github.com/RoaringBitmap/roaring/go.mod generated vendored Normal file
View file

@ -0,0 +1,16 @@
module github.com/RoaringBitmap/roaring
go 1.12
require (
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 // indirect
github.com/golang/snappy v0.0.1 // indirect
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 // indirect
github.com/jtolds/gls v4.20.0+incompatible // indirect
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae
github.com/philhofer/fwd v1.0.0 // indirect
github.com/stretchr/testify v1.4.0
github.com/tinylib/msgp v1.1.0
github.com/willf/bitset v1.1.10
)

30
vendor/github.com/RoaringBitmap/roaring/go.sum generated vendored Normal file
View file

@ -0,0 +1,30 @@
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2 h1:Ujru1hufTHVb++eG6OuNDKMxZnGIvF6o/u8q/8h2+I4=
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8=
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 h1:twflg0XRTjwKpxb/jFExr4HGq6on2dEOmnL6FV+fgPw=
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae h1:VeRdUYdCw49yizlSbMEn2SZ+gT+3IUKx8BqxyQdz+BY=
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae/go.mod h1:qAyveg+e4CE+eKJXWVjKXM4ck2QobLqTDytGJbLLhJg=
github.com/philhofer/fwd v1.0.0 h1:UbZqGr5Y38ApvM/V/jEljVxwocdweyH+vmYvRPBnbqQ=
github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/tinylib/msgp v1.1.0 h1:9fQd+ICuRIu/ue4vxJZu6/LzxN0HwMds2nq/0cFvxHU=
github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc=
github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

View file

@ -4,12 +4,7 @@ type manyIterable interface {
nextMany(hs uint32, buf []uint32) int
}
type manyIterator struct {
slice []uint16
loc int
}
func (si *manyIterator) nextMany(hs uint32, buf []uint32) int {
func (si *shortIterator) nextMany(hs uint32, buf []uint32) int {
n := 0
l := si.loc
s := si.slice

View file

@ -143,8 +143,8 @@ func toBitmapContainer(c container) container {
func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer, expectedKeysChan <-chan int) {
expectedKeys := -1
appendedKeys := 0
keys := make([]uint16, 0)
containers := make([]container, 0)
var keys []uint16
var containers []container
for appendedKeys != expectedKeys {
select {
case item := <-resultChan:
@ -337,7 +337,7 @@ func ParAnd(parallelism int, bitmaps ...*Bitmap) *Bitmap {
// (if it is set to 0, a default number of workers is chosen)
func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
var lKey uint16 = MaxUint16
var hKey uint16 = 0
var hKey uint16
bitmapsFiltered := bitmaps[:0]
for _, b := range bitmaps {

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,163 +0,0 @@
package roaring
import (
"fmt"
)
// common to rle32.go and rle16.go
// rleVerbose controls whether p() prints show up.
// The testing package sets this based on
// testing.Verbose().
var rleVerbose bool
// p is a shorthand for fmt.Printf with beginning and
// trailing newlines. p() makes it easy
// to add diagnostic print statements.
func p(format string, args ...interface{}) {
if rleVerbose {
fmt.Printf("\n"+format+"\n", args...)
}
}
// MaxUint32 is the largest uint32 value.
const MaxUint32 = 4294967295
// MaxUint16 is the largest 16 bit unsigned int.
// This is the largest value an interval16 can store.
const MaxUint16 = 65535
// searchOptions allows us to accelerate runContainer32.search with
// prior knowledge of (mostly lower) bounds. This is used by Union
// and Intersect.
type searchOptions struct {
// start here instead of at 0
startIndex int64
// upper bound instead of len(rc.iv);
// endxIndex == 0 means ignore the bound and use
// endxIndex == n ==len(rc.iv) which is also
// naturally the default for search()
// when opt = nil.
endxIndex int64
}
// And finds the intersection of rc and b.
func (rc *runContainer32) And(b *Bitmap) *Bitmap {
out := NewBitmap()
for _, p := range rc.iv {
for i := p.start; i <= p.last; i++ {
if b.Contains(i) {
out.Add(i)
}
}
}
return out
}
// Xor returns the exclusive-or of rc and b.
func (rc *runContainer32) Xor(b *Bitmap) *Bitmap {
out := b.Clone()
for _, p := range rc.iv {
for v := p.start; v <= p.last; v++ {
if out.Contains(v) {
out.RemoveRange(uint64(v), uint64(v+1))
} else {
out.Add(v)
}
}
}
return out
}
// Or returns the union of rc and b.
func (rc *runContainer32) Or(b *Bitmap) *Bitmap {
out := b.Clone()
for _, p := range rc.iv {
for v := p.start; v <= p.last; v++ {
out.Add(v)
}
}
return out
}
// trial is used in the randomized testing of runContainers
type trial struct {
n int
percentFill float64
ntrial int
// only in the union test
// only subtract test
percentDelete float64
// only in 067 randomized operations
// we do this + 1 passes
numRandomOpsPass int
// allow sampling range control
// only recent tests respect this.
srang *interval16
}
// And finds the intersection of rc and b.
func (rc *runContainer16) And(b *Bitmap) *Bitmap {
out := NewBitmap()
for _, p := range rc.iv {
plast := p.last()
for i := p.start; i <= plast; i++ {
if b.Contains(uint32(i)) {
out.Add(uint32(i))
}
}
}
return out
}
// Xor returns the exclusive-or of rc and b.
func (rc *runContainer16) Xor(b *Bitmap) *Bitmap {
out := b.Clone()
for _, p := range rc.iv {
plast := p.last()
for v := p.start; v <= plast; v++ {
w := uint32(v)
if out.Contains(w) {
out.RemoveRange(uint64(w), uint64(w+1))
} else {
out.Add(w)
}
}
}
return out
}
// Or returns the union of rc and b.
func (rc *runContainer16) Or(b *Bitmap) *Bitmap {
out := b.Clone()
for _, p := range rc.iv {
plast := p.last()
for v := p.start; v <= plast; v++ {
out.Add(uint32(v))
}
}
return out
}
//func (rc *runContainer32) and(container) container {
// panic("TODO. not yet implemented")
//}
// serializedSizeInBytes returns the number of bytes of memory
// required by this runContainer16. This is for the
// Roaring format, as specified https://github.com/RoaringBitmap/RoaringFormatSpec/
func (rc *runContainer16) serializedSizeInBytes() int {
// number of runs in one uint16, then each run
// needs two more uint16
return 2 + len(rc.iv)*4
}
// serializedSizeInBytes returns the number of bytes of memory
// required by this runContainer32.
func (rc *runContainer32) serializedSizeInBytes() int {
return 4 + len(rc.iv)*8
}

View file

@ -1,695 +0,0 @@
package roaring
///////////////////////////////////////////////////
//
// container interface methods for runContainer16
//
///////////////////////////////////////////////////
import (
"fmt"
)
// compile time verify we meet interface requirements
var _ container = &runContainer16{}
func (rc *runContainer16) clone() container {
return newRunContainer16CopyIv(rc.iv)
}
func (rc *runContainer16) minimum() uint16 {
return rc.iv[0].start // assume not empty
}
func (rc *runContainer16) maximum() uint16 {
return rc.iv[len(rc.iv)-1].last() // assume not empty
}
func (rc *runContainer16) isFull() bool {
return (len(rc.iv) == 1) && ((rc.iv[0].start == 0) && (rc.iv[0].last() == MaxUint16))
}
func (rc *runContainer16) and(a container) container {
if rc.isFull() {
return a.clone()
}
switch c := a.(type) {
case *runContainer16:
return rc.intersect(c)
case *arrayContainer:
return rc.andArray(c)
case *bitmapContainer:
return rc.andBitmapContainer(c)
}
panic("unsupported container type")
}
func (rc *runContainer16) andCardinality(a container) int {
switch c := a.(type) {
case *runContainer16:
return int(rc.intersectCardinality(c))
case *arrayContainer:
return rc.andArrayCardinality(c)
case *bitmapContainer:
return rc.andBitmapContainerCardinality(c)
}
panic("unsupported container type")
}
// andBitmapContainer finds the intersection of rc and b.
func (rc *runContainer16) andBitmapContainer(bc *bitmapContainer) container {
bc2 := newBitmapContainerFromRun(rc)
return bc2.andBitmap(bc)
}
func (rc *runContainer16) andArrayCardinality(ac *arrayContainer) int {
pos := 0
answer := 0
maxpos := ac.getCardinality()
if maxpos == 0 {
return 0 // won't happen in actual code
}
v := ac.content[pos]
mainloop:
for _, p := range rc.iv {
for v < p.start {
pos++
if pos == maxpos {
break mainloop
}
v = ac.content[pos]
}
for v <= p.last() {
answer++
pos++
if pos == maxpos {
break mainloop
}
v = ac.content[pos]
}
}
return answer
}
func (rc *runContainer16) iand(a container) container {
if rc.isFull() {
return a.clone()
}
switch c := a.(type) {
case *runContainer16:
return rc.inplaceIntersect(c)
case *arrayContainer:
return rc.andArray(c)
case *bitmapContainer:
return rc.iandBitmapContainer(c)
}
panic("unsupported container type")
}
func (rc *runContainer16) inplaceIntersect(rc2 *runContainer16) container {
// TODO: optimize by doing less allocation, possibly?
// sect will be new
sect := rc.intersect(rc2)
*rc = *sect
return rc
}
func (rc *runContainer16) iandBitmapContainer(bc *bitmapContainer) container {
isect := rc.andBitmapContainer(bc)
*rc = *newRunContainer16FromContainer(isect)
return rc
}
func (rc *runContainer16) andArray(ac *arrayContainer) container {
if len(rc.iv) == 0 {
return newArrayContainer()
}
acCardinality := ac.getCardinality()
c := newArrayContainerCapacity(acCardinality)
for rlePos, arrayPos := 0, 0; arrayPos < acCardinality; {
iv := rc.iv[rlePos]
arrayVal := ac.content[arrayPos]
for iv.last() < arrayVal {
rlePos++
if rlePos == len(rc.iv) {
return c
}
iv = rc.iv[rlePos]
}
if iv.start > arrayVal {
arrayPos = advanceUntil(ac.content, arrayPos, len(ac.content), iv.start)
} else {
c.content = append(c.content, arrayVal)
arrayPos++
}
}
return c
}
func (rc *runContainer16) andNot(a container) container {
switch c := a.(type) {
case *arrayContainer:
return rc.andNotArray(c)
case *bitmapContainer:
return rc.andNotBitmap(c)
case *runContainer16:
return rc.andNotRunContainer16(c)
}
panic("unsupported container type")
}
func (rc *runContainer16) fillLeastSignificant16bits(x []uint32, i int, mask uint32) {
k := 0
var val int64
for _, p := range rc.iv {
n := p.runlen()
for j := int64(0); j < n; j++ {
val = int64(p.start) + j
x[k+i] = uint32(val) | mask
k++
}
}
}
func (rc *runContainer16) getShortIterator() shortIterable {
return rc.newRunIterator16()
}
func (rc *runContainer16) getManyIterator() manyIterable {
return rc.newManyRunIterator16()
}
// add the values in the range [firstOfRange, endx). endx
// is still abe to express 2^16 because it is an int not an uint16.
func (rc *runContainer16) iaddRange(firstOfRange, endx int) container {
if firstOfRange >= endx {
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange", endx))
}
addme := newRunContainer16TakeOwnership([]interval16{
{
start: uint16(firstOfRange),
length: uint16(endx - 1 - firstOfRange),
},
})
*rc = *rc.union(addme)
return rc
}
// remove the values in the range [firstOfRange,endx)
func (rc *runContainer16) iremoveRange(firstOfRange, endx int) container {
if firstOfRange >= endx {
panic(fmt.Sprintf("request to iremove empty set [%v, %v),"+
" nothing to do.", firstOfRange, endx))
//return rc
}
x := newInterval16Range(uint16(firstOfRange), uint16(endx-1))
rc.isubtract(x)
return rc
}
// not flip the values in the range [firstOfRange,endx)
func (rc *runContainer16) not(firstOfRange, endx int) container {
if firstOfRange >= endx {
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange))
}
return rc.Not(firstOfRange, endx)
}
// Not flips the values in the range [firstOfRange,endx).
// This is not inplace. Only the returned value has the flipped bits.
//
// Currently implemented as (!A intersect B) union (A minus B),
// where A is rc, and B is the supplied [firstOfRange, endx) interval.
//
// TODO(time optimization): convert this to a single pass
// algorithm by copying AndNotRunContainer16() and modifying it.
// Current routine is correct but
// makes 2 more passes through the arrays than should be
// strictly necessary. Measure both ways though--this may not matter.
//
func (rc *runContainer16) Not(firstOfRange, endx int) *runContainer16 {
if firstOfRange >= endx {
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange == %v", endx, firstOfRange))
}
if firstOfRange >= endx {
return rc.Clone()
}
a := rc
// algo:
// (!A intersect B) union (A minus B)
nota := a.invert()
bs := []interval16{newInterval16Range(uint16(firstOfRange), uint16(endx-1))}
b := newRunContainer16TakeOwnership(bs)
notAintersectB := nota.intersect(b)
aMinusB := a.AndNotRunContainer16(b)
rc2 := notAintersectB.union(aMinusB)
return rc2
}
// equals is now logical equals; it does not require the
// same underlying container type.
func (rc *runContainer16) equals(o container) bool {
srb, ok := o.(*runContainer16)
if !ok {
// maybe value instead of pointer
val, valok := o.(*runContainer16)
if valok {
srb = val
ok = true
}
}
if ok {
// Check if the containers are the same object.
if rc == srb {
return true
}
if len(srb.iv) != len(rc.iv) {
return false
}
for i, v := range rc.iv {
if v != srb.iv[i] {
return false
}
}
return true
}
// use generic comparison
if o.getCardinality() != rc.getCardinality() {
return false
}
rit := rc.getShortIterator()
bit := o.getShortIterator()
//k := 0
for rit.hasNext() {
if bit.next() != rit.next() {
return false
}
//k++
}
return true
}
func (rc *runContainer16) iaddReturnMinimized(x uint16) container {
rc.Add(x)
return rc
}
func (rc *runContainer16) iadd(x uint16) (wasNew bool) {
return rc.Add(x)
}
func (rc *runContainer16) iremoveReturnMinimized(x uint16) container {
rc.removeKey(x)
return rc
}
func (rc *runContainer16) iremove(x uint16) bool {
return rc.removeKey(x)
}
func (rc *runContainer16) or(a container) container {
if rc.isFull() {
return rc.clone()
}
switch c := a.(type) {
case *runContainer16:
return rc.union(c)
case *arrayContainer:
return rc.orArray(c)
case *bitmapContainer:
return rc.orBitmapContainer(c)
}
panic("unsupported container type")
}
func (rc *runContainer16) orCardinality(a container) int {
switch c := a.(type) {
case *runContainer16:
return int(rc.unionCardinality(c))
case *arrayContainer:
return rc.orArrayCardinality(c)
case *bitmapContainer:
return rc.orBitmapContainerCardinality(c)
}
panic("unsupported container type")
}
// orBitmapContainer finds the union of rc and bc.
func (rc *runContainer16) orBitmapContainer(bc *bitmapContainer) container {
bc2 := newBitmapContainerFromRun(rc)
return bc2.iorBitmap(bc)
}
func (rc *runContainer16) andBitmapContainerCardinality(bc *bitmapContainer) int {
answer := 0
for i := range rc.iv {
answer += bc.getCardinalityInRange(uint(rc.iv[i].start), uint(rc.iv[i].last())+1)
}
//bc.computeCardinality()
return answer
}
func (rc *runContainer16) orBitmapContainerCardinality(bc *bitmapContainer) int {
return rc.getCardinality() + bc.getCardinality() - rc.andBitmapContainerCardinality(bc)
}
// orArray finds the union of rc and ac.
func (rc *runContainer16) orArray(ac *arrayContainer) container {
bc1 := newBitmapContainerFromRun(rc)
bc2 := ac.toBitmapContainer()
return bc1.orBitmap(bc2)
}
// orArray finds the union of rc and ac.
func (rc *runContainer16) orArrayCardinality(ac *arrayContainer) int {
return ac.getCardinality() + rc.getCardinality() - rc.andArrayCardinality(ac)
}
func (rc *runContainer16) ior(a container) container {
if rc.isFull() {
return rc
}
switch c := a.(type) {
case *runContainer16:
return rc.inplaceUnion(c)
case *arrayContainer:
return rc.iorArray(c)
case *bitmapContainer:
return rc.iorBitmapContainer(c)
}
panic("unsupported container type")
}
func (rc *runContainer16) inplaceUnion(rc2 *runContainer16) container {
p("rc.inplaceUnion with len(rc2.iv)=%v", len(rc2.iv))
for _, p := range rc2.iv {
last := int64(p.last())
for i := int64(p.start); i <= last; i++ {
rc.Add(uint16(i))
}
}
return rc
}
func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container {
it := bc.getShortIterator()
for it.hasNext() {
rc.Add(it.next())
}
return rc
}
func (rc *runContainer16) iorArray(ac *arrayContainer) container {
it := ac.getShortIterator()
for it.hasNext() {
rc.Add(it.next())
}
return rc
}
// lazyIOR is described (not yet implemented) in
// this nice note from @lemire on
// https://github.com/RoaringBitmap/roaring/pull/70#issuecomment-263613737
//
// Description of lazyOR and lazyIOR from @lemire:
//
// Lazy functions are optional and can be simply
// wrapper around non-lazy functions.
//
// The idea of "laziness" is as follows. It is
// inspired by the concept of lazy evaluation
// you might be familiar with (functional programming
// and all that). So a roaring bitmap is
// such that all its containers are, in some
// sense, chosen to use as little memory as
// possible. This is nice. Also, all bitsets
// are "cardinality aware" so that you can do
// fast rank/select queries, or query the
// cardinality of the whole bitmap... very fast,
// without latency.
//
// However, imagine that you are aggregating 100
// bitmaps together. So you OR the first two, then OR
// that with the third one and so forth. Clearly,
// intermediate bitmaps don't need to be as
// compressed as possible, right? They can be
// in a "dirty state". You only need the end
// result to be in a nice state... which you
// can achieve by calling repairAfterLazy at the end.
//
// The Java/C code does something special for
// the in-place lazy OR runs. The idea is that
// instead of taking two run containers and
// generating a new one, we actually try to
// do the computation in-place through a
// technique invented by @gssiyankai (pinging him!).
// What you do is you check whether the host
// run container has lots of extra capacity.
// If it does, you move its data at the end of
// the backing array, and then you write
// the answer at the beginning. What this
// trick does is minimize memory allocations.
//
func (rc *runContainer16) lazyIOR(a container) container {
// not lazy at the moment
// TODO: make it lazy
return rc.ior(a)
/*
switch c := a.(type) {
case *arrayContainer:
return rc.lazyIorArray(c)
case *bitmapContainer:
return rc.lazyIorBitmap(c)
case *runContainer16:
return rc.lazyIorRun16(c)
}
panic("unsupported container type")
*/
}
// lazyOR is described above in lazyIOR.
func (rc *runContainer16) lazyOR(a container) container {
// not lazy at the moment
// TODO: make it lazy
return rc.or(a)
/*
switch c := a.(type) {
case *arrayContainer:
return rc.lazyOrArray(c)
case *bitmapContainer:
return rc.lazyOrBitmap(c)
case *runContainer16:
return rc.lazyOrRunContainer16(c)
}
panic("unsupported container type")
*/
}
func (rc *runContainer16) intersects(a container) bool {
// TODO: optimize by doing inplace/less allocation, possibly?
isect := rc.and(a)
return isect.getCardinality() > 0
}
func (rc *runContainer16) xor(a container) container {
switch c := a.(type) {
case *arrayContainer:
return rc.xorArray(c)
case *bitmapContainer:
return rc.xorBitmap(c)
case *runContainer16:
return rc.xorRunContainer16(c)
}
panic("unsupported container type")
}
func (rc *runContainer16) iandNot(a container) container {
switch c := a.(type) {
case *arrayContainer:
return rc.iandNotArray(c)
case *bitmapContainer:
return rc.iandNotBitmap(c)
case *runContainer16:
return rc.iandNotRunContainer16(c)
}
panic("unsupported container type")
}
// flip the values in the range [firstOfRange,endx)
func (rc *runContainer16) inot(firstOfRange, endx int) container {
if firstOfRange >= endx {
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange))
}
// TODO: minimize copies, do it all inplace; not() makes a copy.
rc = rc.Not(firstOfRange, endx)
return rc
}
func (rc *runContainer16) getCardinality() int {
return int(rc.cardinality())
}
func (rc *runContainer16) rank(x uint16) int {
n := int64(len(rc.iv))
xx := int64(x)
w, already, _ := rc.search(xx, nil)
if w < 0 {
return 0
}
if !already && w == n-1 {
return rc.getCardinality()
}
var rnk int64
if !already {
for i := int64(0); i <= w; i++ {
rnk += rc.iv[i].runlen()
}
return int(rnk)
}
for i := int64(0); i < w; i++ {
rnk += rc.iv[i].runlen()
}
rnk += int64(x-rc.iv[w].start) + 1
return int(rnk)
}
func (rc *runContainer16) selectInt(x uint16) int {
return rc.selectInt16(x)
}
func (rc *runContainer16) andNotRunContainer16(b *runContainer16) container {
return rc.AndNotRunContainer16(b)
}
func (rc *runContainer16) andNotArray(ac *arrayContainer) container {
rcb := rc.toBitmapContainer()
acb := ac.toBitmapContainer()
return rcb.andNotBitmap(acb)
}
func (rc *runContainer16) andNotBitmap(bc *bitmapContainer) container {
rcb := rc.toBitmapContainer()
return rcb.andNotBitmap(bc)
}
func (rc *runContainer16) toBitmapContainer() *bitmapContainer {
p("run16 toBitmap starting; rc has %v ranges", len(rc.iv))
bc := newBitmapContainer()
for i := range rc.iv {
bc.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1)
}
bc.computeCardinality()
return bc
}
func (rc *runContainer16) iandNotRunContainer16(x2 *runContainer16) container {
rcb := rc.toBitmapContainer()
x2b := x2.toBitmapContainer()
rcb.iandNotBitmapSurely(x2b)
// TODO: check size and optimize the return value
// TODO: is inplace modification really required? If not, elide the copy.
rc2 := newRunContainer16FromBitmapContainer(rcb)
*rc = *rc2
return rc
}
func (rc *runContainer16) iandNotArray(ac *arrayContainer) container {
rcb := rc.toBitmapContainer()
acb := ac.toBitmapContainer()
rcb.iandNotBitmapSurely(acb)
// TODO: check size and optimize the return value
// TODO: is inplace modification really required? If not, elide the copy.
rc2 := newRunContainer16FromBitmapContainer(rcb)
*rc = *rc2
return rc
}
func (rc *runContainer16) iandNotBitmap(bc *bitmapContainer) container {
rcb := rc.toBitmapContainer()
rcb.iandNotBitmapSurely(bc)
// TODO: check size and optimize the return value
// TODO: is inplace modification really required? If not, elide the copy.
rc2 := newRunContainer16FromBitmapContainer(rcb)
*rc = *rc2
return rc
}
func (rc *runContainer16) xorRunContainer16(x2 *runContainer16) container {
rcb := rc.toBitmapContainer()
x2b := x2.toBitmapContainer()
return rcb.xorBitmap(x2b)
}
func (rc *runContainer16) xorArray(ac *arrayContainer) container {
rcb := rc.toBitmapContainer()
acb := ac.toBitmapContainer()
return rcb.xorBitmap(acb)
}
func (rc *runContainer16) xorBitmap(bc *bitmapContainer) container {
rcb := rc.toBitmapContainer()
return rcb.xorBitmap(bc)
}
// convert to bitmap or array *if needed*
func (rc *runContainer16) toEfficientContainer() container {
// runContainer16SerializedSizeInBytes(numRuns)
sizeAsRunContainer := rc.getSizeInBytes()
sizeAsBitmapContainer := bitmapContainerSizeInBytes()
card := int(rc.cardinality())
sizeAsArrayContainer := arrayContainerSizeInBytes(card)
if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
return rc
}
if card <= arrayDefaultMaxSize {
return rc.toArrayContainer()
}
bc := newBitmapContainerFromRun(rc)
return bc
}
func (rc *runContainer16) toArrayContainer() *arrayContainer {
ac := newArrayContainer()
for i := range rc.iv {
ac.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1)
}
return ac
}
func newRunContainer16FromContainer(c container) *runContainer16 {
switch x := c.(type) {
case *runContainer16:
return x.Clone()
case *arrayContainer:
return newRunContainer16FromArray(x)
case *bitmapContainer:
return newRunContainer16FromBitmapContainer(x)
}
panic("unsupported container type")
}

View file

@ -6,12 +6,12 @@
package roaring
import (
"bufio"
"bytes"
"encoding/base64"
"fmt"
"io"
"strconv"
"sync"
)
// Bitmap represents a compressed bitmap where you can add integers.
@ -52,7 +52,7 @@ func (rb *Bitmap) ToBytes() ([]byte, error) {
return rb.highlowcontainer.toBytes()
}
// WriteToMsgpack writes a msgpack2/snappy-streaming compressed serialized
// Deprecated: WriteToMsgpack writes a msgpack2/snappy-streaming compressed serialized
// version of this bitmap to stream. The format is not
// compatible with the WriteTo() format, and is
// experimental: it may produce smaller on disk
@ -67,8 +67,14 @@ func (rb *Bitmap) WriteToMsgpack(stream io.Writer) (int64, error) {
// The format is compatible with other RoaringBitmap
// implementations (Java, C) and is documented here:
// https://github.com/RoaringBitmap/RoaringFormatSpec
func (rb *Bitmap) ReadFrom(stream io.Reader) (int64, error) {
return rb.highlowcontainer.readFrom(stream)
func (rb *Bitmap) ReadFrom(reader io.Reader) (p int64, err error) {
stream := byteInputAdapterPool.Get().(*byteInputAdapter)
stream.reset(reader)
p, err = rb.highlowcontainer.readFrom(stream)
byteInputAdapterPool.Put(stream)
return
}
// FromBuffer creates a bitmap from its serialized version stored in buffer
@ -87,10 +93,36 @@ func (rb *Bitmap) ReadFrom(stream io.Reader) (int64, error) {
// You should *not* change the copy-on-write status of the resulting
// bitmaps (SetCopyOnWrite).
//
func (rb *Bitmap) FromBuffer(buf []byte) (int64, error) {
return rb.highlowcontainer.fromBuffer(buf)
// If buf becomes unavailable, then a bitmap created with
// FromBuffer would be effectively broken. Furthermore, any
// bitmap derived from this bitmap (e.g., via Or, And) might
// also be broken. Thus, before making buf unavailable, you should
// call CloneCopyOnWriteContainers on all such bitmaps.
//
func (rb *Bitmap) FromBuffer(buf []byte) (p int64, err error) {
stream := byteBufferPool.Get().(*byteBuffer)
stream.reset(buf)
p, err = rb.highlowcontainer.readFrom(stream)
byteBufferPool.Put(stream)
return
}
var (
byteBufferPool = sync.Pool{
New: func() interface{} {
return &byteBuffer{}
},
}
byteInputAdapterPool = sync.Pool{
New: func() interface{} {
return &byteInputAdapter{}
},
}
)
// RunOptimize attempts to further compress the runs of consecutive values found in the bitmap
func (rb *Bitmap) RunOptimize() {
rb.highlowcontainer.runOptimize()
@ -101,7 +133,7 @@ func (rb *Bitmap) HasRunCompression() bool {
return rb.highlowcontainer.hasRunCompression()
}
// ReadFromMsgpack reads a msgpack2/snappy-streaming serialized
// Deprecated: ReadFromMsgpack reads a msgpack2/snappy-streaming serialized
// version of this bitmap from stream. The format is
// expected is that written by the WriteToMsgpack()
// call; see additional notes there.
@ -110,29 +142,15 @@ func (rb *Bitmap) ReadFromMsgpack(stream io.Reader) (int64, error) {
}
// MarshalBinary implements the encoding.BinaryMarshaler interface for the bitmap
// (same as ToBytes)
func (rb *Bitmap) MarshalBinary() ([]byte, error) {
var buf bytes.Buffer
writer := bufio.NewWriter(&buf)
_, err := rb.WriteTo(writer)
if err != nil {
return nil, err
}
err = writer.Flush()
if err != nil {
return nil, err
}
return buf.Bytes(), nil
return rb.ToBytes()
}
// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface for the bitmap
func (rb *Bitmap) UnmarshalBinary(data []byte) error {
var buf bytes.Buffer
_, err := buf.Write(data)
if err != nil {
return err
}
reader := bufio.NewReader(&buf)
_, err = rb.ReadFrom(reader)
r := bytes.NewReader(data)
_, err := rb.ReadFrom(r)
return err
}
@ -215,10 +233,20 @@ type IntIterable interface {
Next() uint32
}
// IntPeekable allows you to look at the next value without advancing and
// advance as long as the next value is smaller than minval
type IntPeekable interface {
IntIterable
// PeekNext peeks the next value without advancing the iterator
PeekNext() uint32
// AdvanceIfNeeded advances as long as the next value is smaller than minval
AdvanceIfNeeded(minval uint32)
}
type intIterator struct {
pos int
hs uint32
iter shortIterable
iter shortPeekable
highlowcontainer *roaringArray
}
@ -244,6 +272,30 @@ func (ii *intIterator) Next() uint32 {
return x
}
// PeekNext peeks the next value without advancing the iterator
func (ii *intIterator) PeekNext() uint32 {
return uint32(ii.iter.peekNext()&maxLowBit) | ii.hs
}
// AdvanceIfNeeded advances as long as the next value is smaller than minval
func (ii *intIterator) AdvanceIfNeeded(minval uint32) {
to := minval >> 16
for ii.HasNext() && (ii.hs>>16) < to {
ii.pos++
ii.init()
}
if ii.HasNext() && (ii.hs>>16) == to {
ii.iter.advanceIfNeeded(lowbits(minval))
if !ii.iter.hasNext() {
ii.pos++
ii.init()
}
}
}
func newIntIterator(a *Bitmap) *intIterator {
p := new(intIterator)
p.pos = 0
@ -252,6 +304,45 @@ func newIntIterator(a *Bitmap) *intIterator {
return p
}
type intReverseIterator struct {
pos int
hs uint32
iter shortIterable
highlowcontainer *roaringArray
}
// HasNext returns true if there are more integers to iterate over
func (ii *intReverseIterator) HasNext() bool {
return ii.pos >= 0
}
func (ii *intReverseIterator) init() {
if ii.pos >= 0 {
ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).getReverseIterator()
ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16
} else {
ii.iter = nil
}
}
// Next returns the next integer
func (ii *intReverseIterator) Next() uint32 {
x := uint32(ii.iter.next()) | ii.hs
if !ii.iter.hasNext() {
ii.pos = ii.pos - 1
ii.init()
}
return x
}
func newIntReverseIterator(a *Bitmap) *intReverseIterator {
p := new(intReverseIterator)
p.highlowcontainer = &a.highlowcontainer
p.pos = a.highlowcontainer.size() - 1
p.init()
return p
}
// ManyIntIterable allows you to iterate over the values in a Bitmap
type ManyIntIterable interface {
// pass in a buffer to fill up with values, returns how many values were returned
@ -325,12 +416,20 @@ func (rb *Bitmap) String() string {
return buffer.String()
}
// Iterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order
func (rb *Bitmap) Iterator() IntIterable {
// Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order;
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
func (rb *Bitmap) Iterator() IntPeekable {
return newIntIterator(rb)
}
// Iterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order
// ReverseIterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order;
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
func (rb *Bitmap) ReverseIterator() IntIterable {
return newIntReverseIterator(rb)
}
// ManyIterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order;
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
func (rb *Bitmap) ManyIterator() ManyIntIterable {
return newManyIntIterator(rb)
}
@ -374,6 +473,46 @@ func (rb *Bitmap) Equals(o interface{}) bool {
return false
}
// AddOffset adds the value 'offset' to each and every value in a bitmap, generating a new bitmap in the process
func AddOffset(x *Bitmap, offset uint32) (answer *Bitmap) {
containerOffset := highbits(offset)
inOffset := lowbits(offset)
if inOffset == 0 {
answer = x.Clone()
for pos := 0; pos < answer.highlowcontainer.size(); pos++ {
key := answer.highlowcontainer.getKeyAtIndex(pos)
key += containerOffset
answer.highlowcontainer.keys[pos] = key
}
} else {
answer = New()
for pos := 0; pos < x.highlowcontainer.size(); pos++ {
key := x.highlowcontainer.getKeyAtIndex(pos)
key += containerOffset
c := x.highlowcontainer.getContainerAtIndex(pos)
offsetted := c.addOffset(inOffset)
if offsetted[0].getCardinality() > 0 {
curSize := answer.highlowcontainer.size()
lastkey := uint16(0)
if curSize > 0 {
lastkey = answer.highlowcontainer.getKeyAtIndex(curSize - 1)
}
if curSize > 0 && lastkey == key {
prev := answer.highlowcontainer.getContainerAtIndex(curSize - 1)
orrseult := prev.ior(offsetted[0])
answer.highlowcontainer.setContainerAtIndex(curSize-1, orrseult)
} else {
answer.highlowcontainer.appendContainer(key, offsetted[0], false)
}
}
if offsetted[1].getCardinality() > 0 {
answer.highlowcontainer.appendContainer(key+1, offsetted[1], false)
}
}
}
return answer
}
// Add the integer x to the bitmap
func (rb *Bitmap) Add(x uint32) {
hb := highbits(x)
@ -794,11 +933,6 @@ main:
}
}
/*func (rb *Bitmap) Or(x2 *Bitmap) {
results := Or(rb, x2) // Todo: could be computed in-place for reduced memory usage
rb.highlowcontainer = results.highlowcontainer
}*/
// AndNot computes the difference between two bitmaps and stores the result in the current bitmap
func (rb *Bitmap) AndNot(x2 *Bitmap) {
pos1 := 0
@ -1086,10 +1220,10 @@ func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) {
return
}
hbStart := highbits(uint32(rangeStart))
lbStart := lowbits(uint32(rangeStart))
hbLast := highbits(uint32(rangeEnd - 1))
lbLast := lowbits(uint32(rangeEnd - 1))
hbStart := uint32(highbits(uint32(rangeStart)))
lbStart := uint32(lowbits(uint32(rangeStart)))
hbLast := uint32(highbits(uint32(rangeEnd - 1)))
lbLast := uint32(lowbits(uint32(rangeEnd - 1)))
var max uint32 = maxLowBit
for hb := hbStart; hb <= hbLast; hb++ {
@ -1102,7 +1236,7 @@ func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) {
containerLast = uint32(lbLast)
}
i := rb.highlowcontainer.getIndex(hb)
i := rb.highlowcontainer.getIndex(uint16(hb))
if i >= 0 {
c := rb.highlowcontainer.getWritableContainerAtIndex(i).inot(int(containerStart), int(containerLast)+1)
@ -1113,7 +1247,7 @@ func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) {
}
} else { // *think* the range of ones must never be
// empty.
rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, rangeOfOnes(int(containerStart), int(containerLast)))
rb.highlowcontainer.insertNewKeyValueAt(-i-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast)))
}
}
}
@ -1139,24 +1273,24 @@ func (rb *Bitmap) AddRange(rangeStart, rangeEnd uint64) {
lbLast := uint32(lowbits(uint32(rangeEnd - 1)))
var max uint32 = maxLowBit
for hb := uint16(hbStart); hb <= uint16(hbLast); hb++ {
for hb := hbStart; hb <= hbLast; hb++ {
containerStart := uint32(0)
if hb == uint16(hbStart) {
if hb == hbStart {
containerStart = lbStart
}
containerLast := max
if hb == uint16(hbLast) {
if hb == hbLast {
containerLast = lbLast
}
i := rb.highlowcontainer.getIndex(hb)
i := rb.highlowcontainer.getIndex(uint16(hb))
if i >= 0 {
c := rb.highlowcontainer.getWritableContainerAtIndex(i).iaddRange(int(containerStart), int(containerLast)+1)
rb.highlowcontainer.setContainerAtIndex(i, c)
} else { // *think* the range of ones must never be
// empty.
rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, rangeOfOnes(int(containerStart), int(containerLast)))
rb.highlowcontainer.insertNewKeyValueAt(-i-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast)))
}
}
}
@ -1243,13 +1377,13 @@ func Flip(bm *Bitmap, rangeStart, rangeEnd uint64) *Bitmap {
}
answer := NewBitmap()
hbStart := highbits(uint32(rangeStart))
lbStart := lowbits(uint32(rangeStart))
hbLast := highbits(uint32(rangeEnd - 1))
lbLast := lowbits(uint32(rangeEnd - 1))
hbStart := uint32(highbits(uint32(rangeStart)))
lbStart := uint32(lowbits(uint32(rangeStart)))
hbLast := uint32(highbits(uint32(rangeEnd - 1)))
lbLast := uint32(lowbits(uint32(rangeEnd - 1)))
// copy the containers before the active area
answer.highlowcontainer.appendCopiesUntil(bm.highlowcontainer, hbStart)
answer.highlowcontainer.appendCopiesUntil(bm.highlowcontainer, uint16(hbStart))
var max uint32 = maxLowBit
for hb := hbStart; hb <= hbLast; hb++ {
@ -1262,23 +1396,23 @@ func Flip(bm *Bitmap, rangeStart, rangeEnd uint64) *Bitmap {
containerLast = uint32(lbLast)
}
i := bm.highlowcontainer.getIndex(hb)
j := answer.highlowcontainer.getIndex(hb)
i := bm.highlowcontainer.getIndex(uint16(hb))
j := answer.highlowcontainer.getIndex(uint16(hb))
if i >= 0 {
c := bm.highlowcontainer.getContainerAtIndex(i).not(int(containerStart), int(containerLast)+1)
if c.getCardinality() > 0 {
answer.highlowcontainer.insertNewKeyValueAt(-j-1, hb, c)
answer.highlowcontainer.insertNewKeyValueAt(-j-1, uint16(hb), c)
}
} else { // *think* the range of ones must never be
// empty.
answer.highlowcontainer.insertNewKeyValueAt(-j-1, hb,
answer.highlowcontainer.insertNewKeyValueAt(-j-1, uint16(hb),
rangeOfOnes(int(containerStart), int(containerLast)))
}
}
// copy the containers after the active area.
answer.highlowcontainer.appendCopiesAfter(bm.highlowcontainer, hbLast)
answer.highlowcontainer.appendCopiesAfter(bm.highlowcontainer, uint16(hbLast))
return answer
}
@ -1296,6 +1430,21 @@ func (rb *Bitmap) GetCopyOnWrite() (val bool) {
return rb.highlowcontainer.copyOnWrite
}
// CloneCopyOnWriteContainers clones all containers which have
// needCopyOnWrite set to true.
// This can be used to make sure it is safe to munmap a []byte
// that the roaring array may still have a reference to, after
// calling FromBuffer.
// More generally this function is useful if you call FromBuffer
// to construct a bitmap with a backing array buf
// and then later discard the buf array. Note that you should call
// CloneCopyOnWriteContainers on all bitmaps that were derived
// from the 'FromBuffer' bitmap since they map have dependencies
// on the buf array as well.
func (rb *Bitmap) CloneCopyOnWriteContainers() {
rb.highlowcontainer.cloneCopyOnWriteContainers()
}
// FlipInt calls Flip after casting the parameters (convenience method)
func FlipInt(bm *Bitmap, rangeStart, rangeEnd int) *Bitmap {
return Flip(bm, uint64(rangeStart), uint64(rangeEnd))

View file

@ -4,16 +4,16 @@ import (
"bytes"
"encoding/binary"
"fmt"
"io"
"io/ioutil"
snappy "github.com/glycerine/go-unsnap-stream"
"github.com/tinylib/msgp/msgp"
"io"
)
//go:generate msgp -unexported
type container interface {
addOffset(uint16) []container
clone() container
and(container) container
andCardinality(container) int
@ -37,7 +37,8 @@ type container interface {
not(start, final int) container // range is [firstOfRange,lastOfRange)
inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx)
xor(r container) container
getShortIterator() shortIterable
getShortIterator() shortPeekable
getReverseIterator() shortIterable
getManyIterator() manyIterable
contains(i uint16) bool
maximum() uint16
@ -61,7 +62,6 @@ type container interface {
iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange)
selectInt(x uint16) int // selectInt returns the xth integer in the container
serializedSizeInBytes() int
readFrom(io.Reader) (int, error)
writeTo(io.Writer) (int, error)
numberOfRuns() int
@ -280,6 +280,18 @@ func (ra *roaringArray) clone() *roaringArray {
return &sa
}
// clone all containers which have needCopyOnWrite set to true
// This can be used to make sure it is safe to munmap a []byte
// that the roaring array may still have a reference to.
func (ra *roaringArray) cloneCopyOnWriteContainers() {
for i, needCopyOnWrite := range ra.needCopyOnWrite {
if needCopyOnWrite {
ra.containers[i] = ra.containers[i].clone()
ra.needCopyOnWrite[i] = false
}
}
}
// unused function:
//func (ra *roaringArray) containsKey(x uint16) bool {
// return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0)
@ -456,8 +468,7 @@ func (ra *roaringArray) serializedSizeInBytes() uint64 {
//
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
//
func (ra *roaringArray) toBytes() ([]byte, error) {
stream := &bytes.Buffer{}
func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) {
hasRun := ra.hasRunCompression()
isRunSizeInBytes := 0
cookieSize := 8
@ -522,79 +533,77 @@ func (ra *roaringArray) toBytes() ([]byte, error) {
}
}
_, err := stream.Write(buf[:nw])
written, err := w.Write(buf[:nw])
if err != nil {
return nil, err
return n, err
}
for i, c := range ra.containers {
_ = i
_, err := c.writeTo(stream)
n += int64(written)
for _, c := range ra.containers {
written, err := c.writeTo(w)
if err != nil {
return nil, err
return n, err
}
n += int64(written)
}
return stream.Bytes(), nil
return n, nil
}
//
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
//
func (ra *roaringArray) writeTo(out io.Writer) (int64, error) {
by, err := ra.toBytes()
if err != nil {
return 0, err
}
n, err := out.Write(by)
if err == nil && n < len(by) {
err = io.ErrShortWrite
}
return int64(n), err
func (ra *roaringArray) toBytes() ([]byte, error) {
var buf bytes.Buffer
_, err := ra.writeTo(&buf)
return buf.Bytes(), err
}
func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) {
pos := 0
if len(buf) < 8 {
return 0, fmt.Errorf("buffer too small, expecting at least 8 bytes, was %d", len(buf))
func (ra *roaringArray) readFrom(stream byteInput) (int64, error) {
cookie, err := stream.readUInt32()
if err != nil {
return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
}
cookie := binary.LittleEndian.Uint32(buf)
pos += 4
var size uint32 // number of containers
haveRunContainers := false
var size uint32
var isRunBitmap []byte
// cookie header
if cookie&0x0000FFFF == serialCookie {
haveRunContainers = true
size = uint32(uint16(cookie>>16) + 1) // number of containers
size = uint32(uint16(cookie>>16) + 1)
// create is-run-container bitmap
isRunBitmapSize := (int(size) + 7) / 8
if pos+isRunBitmapSize > len(buf) {
return 0, fmt.Errorf("malformed bitmap, is-run bitmap overruns buffer at %d", pos+isRunBitmapSize)
isRunBitmap, err = stream.next(isRunBitmapSize)
if err != nil {
return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read is-run bitmap, got: %s", err)
}
isRunBitmap = buf[pos : pos+isRunBitmapSize]
pos += isRunBitmapSize
} else if cookie == serialCookieNoRunContainer {
size = binary.LittleEndian.Uint32(buf[pos:])
pos += 4
} else {
return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
}
if size > (1 << 16) {
return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.")
}
// descriptive header
// keycard - is {key, cardinality} tuple slice
if pos+2*2*int(size) > len(buf) {
return 0, fmt.Errorf("malfomred bitmap, key-cardinality slice overruns buffer at %d", pos+2*2*int(size))
}
keycard := byteSliceAsUint16Slice(buf[pos : pos+2*2*int(size)])
pos += 2 * 2 * int(size)
size, err = stream.readUInt32()
if !haveRunContainers || size >= noOffsetThreshold {
pos += 4 * int(size)
if err != nil {
return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read a bitmap size: %s", err)
}
} else {
return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
}
if size > (1 << 16) {
return stream.getReadBytes(), fmt.Errorf("it is logically impossible to have more than (1<<16) containers")
}
// descriptive header
buf, err := stream.next(2 * 2 * int(size))
if err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to read descriptive header: %s", err)
}
keycard := byteSliceAsUint16Slice(buf)
if isRunBitmap == nil || size >= noOffsetThreshold {
if err := stream.skipBytes(int(size) * 4); err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to skip bytes: %s", err)
}
}
// Allocate slices upfront as number of containers is known
@ -603,11 +612,13 @@ func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) {
} else {
ra.containers = make([]container, size)
}
if cap(ra.keys) >= int(size) {
ra.keys = ra.keys[:size]
} else {
ra.keys = make([]uint16, size)
}
if cap(ra.needCopyOnWrite) >= int(size) {
ra.needCopyOnWrite = ra.needCopyOnWrite[:size]
} else {
@ -615,129 +626,62 @@ func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) {
}
for i := uint32(0); i < size; i++ {
key := uint16(keycard[2*i])
key := keycard[2*i]
card := int(keycard[2*i+1]) + 1
ra.keys[i] = key
ra.needCopyOnWrite[i] = true
if haveRunContainers && isRunBitmap[i/8]&(1<<(i%8)) != 0 {
if isRunBitmap != nil && isRunBitmap[i/8]&(1<<(i%8)) != 0 {
// run container
nr := binary.LittleEndian.Uint16(buf[pos:])
pos += 2
if pos+int(nr)*4 > len(buf) {
return 0, fmt.Errorf("malformed bitmap, a run container overruns buffer at %d:%d", pos, pos+int(nr)*4)
nr, err := stream.readUInt16()
if err != nil {
return 0, fmt.Errorf("failed to read runtime container size: %s", err)
}
buf, err := stream.next(int(nr) * 4)
if err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to read runtime container content: %s", err)
}
nb := runContainer16{
iv: byteSliceAsInterval16Slice(buf[pos : pos+int(nr)*4]),
iv: byteSliceAsInterval16Slice(buf),
card: int64(card),
}
pos += int(nr) * 4
ra.containers[i] = &nb
} else if card > arrayDefaultMaxSize {
// bitmap container
buf, err := stream.next(arrayDefaultMaxSize * 2)
if err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to read bitmap container: %s", err)
}
nb := bitmapContainer{
cardinality: card,
bitmap: byteSliceAsUint64Slice(buf[pos : pos+arrayDefaultMaxSize*2]),
bitmap: byteSliceAsUint64Slice(buf),
}
pos += arrayDefaultMaxSize * 2
ra.containers[i] = &nb
} else {
// array container
nb := arrayContainer{
byteSliceAsUint16Slice(buf[pos : pos+card*2]),
buf, err := stream.next(card * 2)
if err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to read array container: %s", err)
}
pos += card * 2
nb := arrayContainer{
byteSliceAsUint16Slice(buf),
}
ra.containers[i] = &nb
}
}
return int64(pos), nil
}
func (ra *roaringArray) readFrom(stream io.Reader) (int64, error) {
pos := 0
var cookie uint32
err := binary.Read(stream, binary.LittleEndian, &cookie)
if err != nil {
return 0, fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
}
pos += 4
var size uint32
haveRunContainers := false
var isRun *bitmapContainer
if cookie&0x0000FFFF == serialCookie {
haveRunContainers = true
size = uint32(uint16(cookie>>16) + 1)
bytesToRead := (int(size) + 7) / 8
numwords := (bytesToRead + 7) / 8
by := make([]byte, bytesToRead, numwords*8)
nr, err := io.ReadFull(stream, by)
if err != nil {
return 8 + int64(nr), fmt.Errorf("error in readFrom: could not read the "+
"runContainer bit flags of length %v bytes: %v", bytesToRead, err)
}
pos += bytesToRead
by = by[:cap(by)]
isRun = newBitmapContainer()
for i := 0; i < numwords; i++ {
isRun.bitmap[i] = binary.LittleEndian.Uint64(by)
by = by[8:]
}
} else if cookie == serialCookieNoRunContainer {
err = binary.Read(stream, binary.LittleEndian, &size)
if err != nil {
return 0, fmt.Errorf("error in roaringArray.readFrom: when reading size, got: %s", err)
}
pos += 4
} else {
return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
}
if size > (1 << 16) {
return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.")
}
// descriptive header
keycard := make([]uint16, 2*size, 2*size)
err = binary.Read(stream, binary.LittleEndian, keycard)
if err != nil {
return 0, err
}
pos += 2 * 2 * int(size)
// offset header
if !haveRunContainers || size >= noOffsetThreshold {
io.CopyN(ioutil.Discard, stream, 4*int64(size)) // we never skip ahead so this data can be ignored
pos += 4 * int(size)
}
for i := uint32(0); i < size; i++ {
key := int(keycard[2*i])
card := int(keycard[2*i+1]) + 1
if haveRunContainers && isRun.contains(uint16(i)) {
nb := newRunContainer16()
nr, err := nb.readFrom(stream)
if err != nil {
return 0, err
}
pos += nr
ra.appendContainer(uint16(key), nb, false)
} else if card > arrayDefaultMaxSize {
nb := newBitmapContainer()
nr, err := nb.readFrom(stream)
if err != nil {
return 0, err
}
nb.cardinality = card
pos += nr
ra.appendContainer(keycard[2*i], nb, false)
} else {
nb := newArrayContainerSize(card)
nr, err := nb.readFrom(stream)
if err != nil {
return 0, err
}
pos += nr
ra.appendContainer(keycard[2*i], nb, false)
}
}
return int64(pos), nil
return stream.getReadBytes(), nil
}
func (ra *roaringArray) hasRunCompression() bool {

View file

@ -8,7 +8,7 @@ import (
"github.com/tinylib/msgp/msgp"
)
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *containerSerz) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
@ -48,7 +48,7 @@ func (z *containerSerz) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z *containerSerz) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 2
// write "t"
@ -72,7 +72,7 @@ func (z *containerSerz) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z *containerSerz) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 2
@ -88,7 +88,7 @@ func (z *containerSerz) MarshalMsg(b []byte) (o []byte, err error) {
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *containerSerz) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
@ -129,13 +129,13 @@ func (z *containerSerz) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *containerSerz) Msgsize() (s int) {
s = 1 + 2 + msgp.Uint8Size + 2 + z.r.Msgsize()
return
}
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *contype) DecodeMsg(dc *msgp.Reader) (err error) {
{
var zajw uint8
@ -148,7 +148,7 @@ func (z *contype) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z contype) EncodeMsg(en *msgp.Writer) (err error) {
err = en.WriteUint8(uint8(z))
if err != nil {
@ -157,14 +157,14 @@ func (z contype) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z contype) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
o = msgp.AppendUint8(o, uint8(z))
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *contype) UnmarshalMsg(bts []byte) (o []byte, err error) {
{
var zwht uint8
@ -178,13 +178,13 @@ func (z *contype) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z contype) Msgsize() (s int) {
s = msgp.Uint8Size
return
}
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *roaringArray) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
@ -295,7 +295,7 @@ func (z *roaringArray) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z *roaringArray) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 4
// write "keys"
@ -370,7 +370,7 @@ func (z *roaringArray) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z *roaringArray) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 4
@ -407,7 +407,7 @@ func (z *roaringArray) MarshalMsg(b []byte) (o []byte, err error) {
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *roaringArray) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
@ -519,7 +519,7 @@ func (z *roaringArray) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *roaringArray) Msgsize() (s int) {
s = 1 + 5 + msgp.ArrayHeaderSize + (len(z.keys) * (msgp.Uint16Size)) + 16 + msgp.ArrayHeaderSize + (len(z.needCopyOnWrite) * (msgp.BoolSize)) + 12 + msgp.BoolSize + 8 + msgp.ArrayHeaderSize
for zxhx := range z.conserz {

View file

@ -6,7 +6,7 @@ package roaring
import "github.com/tinylib/msgp/msgp"
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *addHelper16) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
@ -169,7 +169,7 @@ func (z *addHelper16) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z *addHelper16) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 5
// write "runstart"
@ -284,7 +284,7 @@ func (z *addHelper16) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z *addHelper16) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 5
@ -334,7 +334,7 @@ func (z *addHelper16) MarshalMsg(b []byte) (o []byte, err error) {
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *addHelper16) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
@ -498,7 +498,7 @@ func (z *addHelper16) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *addHelper16) Msgsize() (s int) {
s = 1 + 9 + msgp.Uint16Size + 7 + msgp.Uint16Size + 14 + msgp.Uint16Size + 2 + msgp.ArrayHeaderSize + (len(z.m) * (12 + msgp.Uint16Size + msgp.Uint16Size)) + 3
if z.rc == nil {
@ -509,7 +509,7 @@ func (z *addHelper16) Msgsize() (s int) {
return
}
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *interval16) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
@ -546,7 +546,7 @@ func (z *interval16) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z interval16) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 2
// write "start"
@ -570,7 +570,7 @@ func (z interval16) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z interval16) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 2
@ -583,7 +583,7 @@ func (z interval16) MarshalMsg(b []byte) (o []byte, err error) {
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *interval16) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
@ -621,13 +621,13 @@ func (z *interval16) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z interval16) Msgsize() (s int) {
s = 1 + 6 + msgp.Uint16Size + 5 + msgp.Uint16Size
return
}
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *runContainer16) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
@ -701,7 +701,7 @@ func (z *runContainer16) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z *runContainer16) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 2
// write "iv"
@ -746,7 +746,7 @@ func (z *runContainer16) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z *runContainer16) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 2
@ -768,7 +768,7 @@ func (z *runContainer16) MarshalMsg(b []byte) (o []byte, err error) {
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *runContainer16) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
@ -843,13 +843,13 @@ func (z *runContainer16) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *runContainer16) Msgsize() (s int) {
s = 1 + 3 + msgp.ArrayHeaderSize + (len(z.iv) * (12 + msgp.Uint16Size + msgp.Uint16Size)) + 5 + msgp.Int64Size
return
}
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
@ -891,11 +891,6 @@ func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) {
if err != nil {
return
}
case "curSeq":
z.curSeq, err = dc.ReadInt64()
if err != nil {
return
}
default:
err = dc.Skip()
if err != nil {
@ -906,11 +901,11 @@ func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z *runIterator16) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 4
// map header, size 3
// write "rc"
err = en.Append(0x84, 0xa2, 0x72, 0x63)
err = en.Append(0x83, 0xa2, 0x72, 0x63)
if err != nil {
return err
}
@ -943,24 +938,15 @@ func (z *runIterator16) EncodeMsg(en *msgp.Writer) (err error) {
if err != nil {
return
}
// write "curSeq"
err = en.Append(0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71)
if err != nil {
return err
}
err = en.WriteInt64(z.curSeq)
if err != nil {
return
}
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z *runIterator16) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 4
// map header, size 3
// string "rc"
o = append(o, 0x84, 0xa2, 0x72, 0x63)
o = append(o, 0x83, 0xa2, 0x72, 0x63)
if z.rc == nil {
o = msgp.AppendNil(o)
} else {
@ -975,13 +961,10 @@ func (z *runIterator16) MarshalMsg(b []byte) (o []byte, err error) {
// string "curPosInIndex"
o = append(o, 0xad, 0x63, 0x75, 0x72, 0x50, 0x6f, 0x73, 0x49, 0x6e, 0x49, 0x6e, 0x64, 0x65, 0x78)
o = msgp.AppendUint16(o, z.curPosInIndex)
// string "curSeq"
o = append(o, 0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71)
o = msgp.AppendInt64(o, z.curSeq)
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
@ -1023,11 +1006,6 @@ func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) {
if err != nil {
return
}
case "curSeq":
z.curSeq, bts, err = msgp.ReadInt64Bytes(bts)
if err != nil {
return
}
default:
bts, err = msgp.Skip(bts)
if err != nil {
@ -1039,7 +1017,7 @@ func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *runIterator16) Msgsize() (s int) {
s = 1 + 3
if z.rc == nil {
@ -1047,11 +1025,11 @@ func (z *runIterator16) Msgsize() (s int) {
} else {
s += z.rc.Msgsize()
}
s += 9 + msgp.Int64Size + 14 + msgp.Uint16Size + 7 + msgp.Int64Size
s += 9 + msgp.Int64Size + 14 + msgp.Uint16Size
return
}
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *uint16Slice) DecodeMsg(dc *msgp.Reader) (err error) {
var zjpj uint32
zjpj, err = dc.ReadArrayHeader()
@ -1072,7 +1050,7 @@ func (z *uint16Slice) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z uint16Slice) EncodeMsg(en *msgp.Writer) (err error) {
err = en.WriteArrayHeader(uint32(len(z)))
if err != nil {
@ -1087,7 +1065,7 @@ func (z uint16Slice) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z uint16Slice) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
o = msgp.AppendArrayHeader(o, uint32(len(z)))
@ -1097,7 +1075,7 @@ func (z uint16Slice) MarshalMsg(b []byte) (o []byte, err error) {
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *uint16Slice) UnmarshalMsg(bts []byte) (o []byte, err error) {
var zgmo uint32
zgmo, bts, err = msgp.ReadArrayHeaderBytes(bts)
@ -1119,7 +1097,7 @@ func (z *uint16Slice) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z uint16Slice) Msgsize() (s int) {
s = msgp.ArrayHeaderSize + (len(z) * (msgp.Uint16Size))
return

View file

@ -2,8 +2,6 @@ package roaring
import (
"encoding/binary"
"errors"
"fmt"
"io"
"github.com/tinylib/msgp/msgp"
@ -22,14 +20,6 @@ func (b *runContainer16) writeTo(stream io.Writer) (int, error) {
return stream.Write(buf)
}
func (b *runContainer32) writeToMsgpack(stream io.Writer) (int, error) {
bts, err := b.MarshalMsg(nil)
if err != nil {
return 0, err
}
return stream.Write(bts)
}
func (b *runContainer16) writeToMsgpack(stream io.Writer) (int, error) {
bts, err := b.MarshalMsg(nil)
if err != nil {
@ -38,46 +28,7 @@ func (b *runContainer16) writeToMsgpack(stream io.Writer) (int, error) {
return stream.Write(bts)
}
func (b *runContainer32) readFromMsgpack(stream io.Reader) (int, error) {
err := msgp.Decode(stream, b)
return 0, err
}
func (b *runContainer16) readFromMsgpack(stream io.Reader) (int, error) {
err := msgp.Decode(stream, b)
return 0, err
}
var errCorruptedStream = errors.New("insufficient/odd number of stored bytes, corrupted stream detected")
func (b *runContainer16) readFrom(stream io.Reader) (int, error) {
b.iv = b.iv[:0]
b.card = 0
var numRuns uint16
err := binary.Read(stream, binary.LittleEndian, &numRuns)
if err != nil {
return 0, err
}
nr := int(numRuns)
encRun := make([]uint16, 2*nr)
by := make([]byte, 4*nr)
err = binary.Read(stream, binary.LittleEndian, &by)
if err != nil {
return 0, err
}
for i := range encRun {
if len(by) < 2 {
return 0, errCorruptedStream
}
encRun[i] = binary.LittleEndian.Uint16(by)
by = by[2:]
}
for i := 0; i < nr; i++ {
if i > 0 && b.iv[i-1].last() >= encRun[i*2] {
return 0, fmt.Errorf("error: stored runContainer had runs that were not in sorted order!! (b.iv[i-1=%v].last = %v >= encRun[i=%v] = %v)", i-1, b.iv[i-1].last(), i, encRun[i*2])
}
b.iv = append(b.iv, interval16{start: encRun[i*2], length: encRun[i*2+1]})
b.card += int64(encRun[i*2+1]) + 1
}
return 0, err
}

View file

@ -4,6 +4,7 @@ package roaring
import (
"encoding/binary"
"errors"
"io"
)
@ -26,6 +27,10 @@ func (b *arrayContainer) readFrom(stream io.Reader) (int, error) {
}
func (b *bitmapContainer) writeTo(stream io.Writer) (int, error) {
if b.cardinality <= arrayDefaultMaxSize {
return 0, errors.New("refusing to write bitmap container with cardinality of array container")
}
// Write set
buf := make([]byte, 8*len(b.bitmap))
for i, v := range b.bitmap {
@ -69,6 +74,16 @@ func uint64SliceAsByteSlice(slice []uint64) []byte {
return by
}
func uint16SliceAsByteSlice(slice []uint16) []byte {
by := make([]byte, len(slice)*2)
for i, v := range slice {
binary.LittleEndian.PutUint16(by[i*2:], v)
}
return by
}
func byteSliceAsUint16Slice(slice []byte) []uint16 {
if len(slice)%2 != 0 {
panic("Slice size should be divisible by 2")

View file

@ -3,8 +3,10 @@
package roaring
import (
"errors"
"io"
"reflect"
"runtime"
"unsafe"
)
@ -14,26 +16,13 @@ func (ac *arrayContainer) writeTo(stream io.Writer) (int, error) {
}
func (bc *bitmapContainer) writeTo(stream io.Writer) (int, error) {
if bc.cardinality <= arrayDefaultMaxSize {
return 0, errors.New("refusing to write bitmap container with cardinality of array container")
}
buf := uint64SliceAsByteSlice(bc.bitmap)
return stream.Write(buf)
}
// readFrom reads an arrayContainer from stream.
// PRE-REQUISITE: you must size the arrayContainer correctly (allocate b.content)
// *before* you call readFrom. We can't guess the size in the stream
// by this point.
func (ac *arrayContainer) readFrom(stream io.Reader) (int, error) {
buf := uint16SliceAsByteSlice(ac.content)
return io.ReadFull(stream, buf)
}
func (bc *bitmapContainer) readFrom(stream io.Reader) (int, error) {
buf := uint64SliceAsByteSlice(bc.bitmap)
n, err := io.ReadFull(stream, buf)
bc.computeCardinality()
return n, err
}
func uint64SliceAsByteSlice(slice []uint64) []byte {
// make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
@ -42,8 +31,12 @@ func uint64SliceAsByteSlice(slice []uint64) []byte {
header.Len *= 8
header.Cap *= 8
// instantiate result and use KeepAlive so data isn't unmapped.
result := *(*[]byte)(unsafe.Pointer(&header))
runtime.KeepAlive(&slice)
// return it
return *(*[]byte)(unsafe.Pointer(&header))
return result
}
func uint16SliceAsByteSlice(slice []uint16) []byte {
@ -54,8 +47,12 @@ func uint16SliceAsByteSlice(slice []uint16) []byte {
header.Len *= 2
header.Cap *= 2
// instantiate result and use KeepAlive so data isn't unmapped.
result := *(*[]byte)(unsafe.Pointer(&header))
runtime.KeepAlive(&slice)
// return it
return *(*[]byte)(unsafe.Pointer(&header))
return result
}
func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
@ -64,50 +61,74 @@ func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
// Deserialization code follows
func byteSliceAsUint16Slice(slice []byte) []uint16 {
////
// These methods (byteSliceAsUint16Slice,...) do not make copies,
// they are pointer-based (unsafe). The caller is responsible to
// ensure that the input slice does not get garbage collected, deleted
// or modified while you hold the returned slince.
////
func byteSliceAsUint16Slice(slice []byte) (result []uint16) { // here we create a new slice holder
if len(slice)%2 != 0 {
panic("Slice size should be divisible by 2")
}
// reference: https://go101.org/article/unsafe.html
// make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// update its capacity and length
header.Len /= 2
header.Cap /= 2
// transfer the data from the given slice to a new variable (our result)
rHeader.Data = bHeader.Data
rHeader.Len = bHeader.Len / 2
rHeader.Cap = bHeader.Cap / 2
// return it
return *(*[]uint16)(unsafe.Pointer(&header))
// instantiate result and use KeepAlive so data isn't unmapped.
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
}
func byteSliceAsUint64Slice(slice []byte) []uint64 {
func byteSliceAsUint64Slice(slice []byte) (result []uint64) {
if len(slice)%8 != 0 {
panic("Slice size should be divisible by 8")
}
// reference: https://go101.org/article/unsafe.html
// make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// update its capacity and length
header.Len /= 8
header.Cap /= 8
// transfer the data from the given slice to a new variable (our result)
rHeader.Data = bHeader.Data
rHeader.Len = bHeader.Len / 8
rHeader.Cap = bHeader.Cap / 8
// return it
return *(*[]uint64)(unsafe.Pointer(&header))
// instantiate result and use KeepAlive so data isn't unmapped.
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
}
func byteSliceAsInterval16Slice(slice []byte) []interval16 {
func byteSliceAsInterval16Slice(slice []byte) (result []interval16) {
if len(slice)%4 != 0 {
panic("Slice size should be divisible by 4")
}
// reference: https://go101.org/article/unsafe.html
// make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// update its capacity and length
header.Len /= 4
header.Cap /= 4
// transfer the data from the given slice to a new variable (our result)
rHeader.Data = bHeader.Data
rHeader.Len = bHeader.Len / 4
rHeader.Cap = bHeader.Cap / 4
// return it
return *(*[]interval16)(unsafe.Pointer(&header))
// instantiate result and use KeepAlive so data isn't unmapped.
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
}

View file

@ -5,6 +5,12 @@ type shortIterable interface {
next() uint16
}
type shortPeekable interface {
shortIterable
peekNext() uint16
advanceIfNeeded(minval uint16)
}
type shortIterator struct {
slice []uint16
loc int
@ -19,3 +25,28 @@ func (si *shortIterator) next() uint16 {
si.loc++
return a
}
func (si *shortIterator) peekNext() uint16 {
return si.slice[si.loc]
}
func (si *shortIterator) advanceIfNeeded(minval uint16) {
if si.hasNext() && si.peekNext() < minval {
si.loc = advanceUntil(si.slice, si.loc, len(si.slice), minval)
}
}
type reverseIterator struct {
slice []uint16
loc int
}
func (si *reverseIterator) hasNext() bool {
return si.loc >= 0
}
func (si *reverseIterator) next() uint16 {
a := si.slice[si.loc]
si.loc--
return a
}

View file

@ -14,6 +14,17 @@ const (
serialCookie = 12347 // runs, arrays, and bitmaps
noOffsetThreshold = 4
// MaxUint32 is the largest uint32 value.
MaxUint32 = 4294967295
// MaxRange is One more than the maximum allowed bitmap bit index. For use as an upper
// bound for ranges.
MaxRange uint64 = MaxUint32 + 1
// MaxUint16 is the largest 16 bit unsigned int.
// This is the largest value an interval16 can store.
MaxUint16 = 65535
// Compute wordSizeInBytes, the size of a word in bytes.
_m = ^uint64(0)
_logS = _m>>8&1 + _m>>16&1 + _m>>32&1
@ -114,7 +125,6 @@ func flipBitmapRange(bitmap []uint64, start int, end int) {
endword := (end - 1) / 64
bitmap[firstword] ^= ^(^uint64(0) << uint(start%64))
for i := firstword; i < endword; i++ {
//p("flipBitmapRange on i=%v", i)
bitmap[i] = ^bitmap[i]
}
bitmap[endword] ^= ^uint64(0) >> (uint(-end) % 64)
@ -292,24 +302,3 @@ func minOfUint16(a, b uint16) uint16 {
}
return b
}
func maxInt(a, b int) int {
if a > b {
return a
}
return b
}
func maxUint16(a, b uint16) uint16 {
if a > b {
return a
}
return b
}
func minUint16(a, b uint16) uint16 {
if a < b {
return a
}
return b
}