blob: GetBlobContent: reduce allocations (#8223)
See #8222 for context. ## git.Blob.NewTruncatedReader This introduce a new `NewTruncatedReader` method to return a blob-reader which silently truncates when the limit is reached (io.EOF will be returned). Since the actual size is also returned `GetBlobContent` can pre-allocate a `[]byte` of the full-size (min of the asked size and the actual size) and call `io.ReadFull(rc, buf)` (instead of `util.ReadWithLimit(dataRc, int(limit))` which is convoluted and not used anywhere else). ### Tests - I added test coverage for Go changes... - [x] in their respective `*_test.go` for unit tests. ### Documentation - [x] I did not document these changes and I do not expect someone else to do it. ### Release notes - [x] I do not want this change to show in the release notes. Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/8223 Reviewed-by: Lucas <sclu1034@noreply.codeberg.org> Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org> Co-authored-by: oliverpool <git@olivier.pfad.fr> Co-committed-by: oliverpool <git@olivier.pfad.fr>
This commit is contained in:
		
					parent
					
						
							
								913eaffb8a
							
						
					
				
			
			
				commit
				
					
						31ad7c9353
					
				
			
		
					 4 changed files with 95 additions and 135 deletions
				
			
		| 
						 | 
				
			
			@ -12,7 +12,6 @@ import (
 | 
			
		|||
 | 
			
		||||
	"forgejo.org/modules/log"
 | 
			
		||||
	"forgejo.org/modules/typesniffer"
 | 
			
		||||
	"forgejo.org/modules/util"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// Blob represents a Git object.
 | 
			
		||||
| 
						 | 
				
			
			@ -25,42 +24,25 @@ type Blob struct {
 | 
			
		|||
	repo    *Repository
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// DataAsync gets a ReadCloser for the contents of a blob without reading it all.
 | 
			
		||||
// Calling the Close function on the result will discard all unread output.
 | 
			
		||||
func (b *Blob) DataAsync() (io.ReadCloser, error) {
 | 
			
		||||
func (b *Blob) newReader() (*bufio.Reader, int64, func(), error) {
 | 
			
		||||
	wr, rd, cancel, err := b.repo.CatFileBatch(b.repo.Ctx)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
		return nil, 0, nil, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	_, err = wr.Write([]byte(b.ID.String() + "\n"))
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		cancel()
 | 
			
		||||
		return nil, err
 | 
			
		||||
		return nil, 0, nil, err
 | 
			
		||||
	}
 | 
			
		||||
	_, _, size, err := ReadBatchLine(rd)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		cancel()
 | 
			
		||||
		return nil, err
 | 
			
		||||
		return nil, 0, nil, err
 | 
			
		||||
	}
 | 
			
		||||
	b.gotSize = true
 | 
			
		||||
	b.size = size
 | 
			
		||||
 | 
			
		||||
	if size < 4096 {
 | 
			
		||||
		bs, err := io.ReadAll(io.LimitReader(rd, size))
 | 
			
		||||
		defer cancel()
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return nil, err
 | 
			
		||||
		}
 | 
			
		||||
		_, err = rd.Discard(1)
 | 
			
		||||
		return io.NopCloser(bytes.NewReader(bs)), err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return &blobReader{
 | 
			
		||||
		rd:     rd,
 | 
			
		||||
		n:      size,
 | 
			
		||||
		cancel: cancel,
 | 
			
		||||
	}, nil
 | 
			
		||||
	return rd, size, cancel, err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Size returns the uncompressed size of the blob
 | 
			
		||||
| 
						 | 
				
			
			@ -91,10 +73,36 @@ func (b *Blob) Size() int64 {
 | 
			
		|||
	return b.size
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// DataAsync gets a ReadCloser for the contents of a blob without reading it all.
 | 
			
		||||
// Calling the Close function on the result will discard all unread output.
 | 
			
		||||
func (b *Blob) DataAsync() (io.ReadCloser, error) {
 | 
			
		||||
	rd, size, cancel, err := b.newReader()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if size < 4096 {
 | 
			
		||||
		bs, err := io.ReadAll(io.LimitReader(rd, size))
 | 
			
		||||
		defer cancel()
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return nil, err
 | 
			
		||||
		}
 | 
			
		||||
		_, err = rd.Discard(1)
 | 
			
		||||
		return io.NopCloser(bytes.NewReader(bs)), err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return &blobReader{
 | 
			
		||||
		rd:     rd,
 | 
			
		||||
		n:      size,
 | 
			
		||||
		cancel: cancel,
 | 
			
		||||
	}, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type blobReader struct {
 | 
			
		||||
	rd     *bufio.Reader
 | 
			
		||||
	n      int64
 | 
			
		||||
	cancel func()
 | 
			
		||||
	rd                *bufio.Reader
 | 
			
		||||
	n                 int64 // number of bytes to read
 | 
			
		||||
	additionalDiscard int64 // additional number of bytes to discard
 | 
			
		||||
	cancel            func()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (b *blobReader) Read(p []byte) (n int, err error) {
 | 
			
		||||
| 
						 | 
				
			
			@ -117,7 +125,8 @@ func (b *blobReader) Close() error {
 | 
			
		|||
 | 
			
		||||
	defer b.cancel()
 | 
			
		||||
 | 
			
		||||
	if err := DiscardFull(b.rd, b.n+1); err != nil {
 | 
			
		||||
	// discard the unread bytes, the truncated bytes and the trailing newline
 | 
			
		||||
	if err := DiscardFull(b.rd, b.n+b.additionalDiscard+1); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -131,17 +140,35 @@ func (b *Blob) Name() string {
 | 
			
		|||
	return b.name
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// GetBlobContent Gets the limited content of the blob as raw text
 | 
			
		||||
// NewTruncatedReader return a blob-reader which silently truncates when the limit is reached (io.EOF will be returned)
 | 
			
		||||
func (b *Blob) NewTruncatedReader(limit int64) (rc io.ReadCloser, fullSize int64, err error) {
 | 
			
		||||
	r, fullSize, cancel, err := b.newReader()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, fullSize, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	limit = min(limit, fullSize)
 | 
			
		||||
	return &blobReader{
 | 
			
		||||
		rd:                r,
 | 
			
		||||
		n:                 limit,
 | 
			
		||||
		additionalDiscard: fullSize - limit,
 | 
			
		||||
		cancel:            cancel,
 | 
			
		||||
	}, fullSize, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// GetBlobContent Gets the truncated content of the blob as raw text
 | 
			
		||||
func (b *Blob) GetBlobContent(limit int64) (string, error) {
 | 
			
		||||
	if limit <= 0 {
 | 
			
		||||
		return "", nil
 | 
			
		||||
	}
 | 
			
		||||
	dataRc, err := b.DataAsync()
 | 
			
		||||
	rc, fullSize, err := b.NewTruncatedReader(limit)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return "", err
 | 
			
		||||
	}
 | 
			
		||||
	defer dataRc.Close()
 | 
			
		||||
	buf, err := util.ReadWithLimit(dataRc, int(limit))
 | 
			
		||||
	defer rc.Close()
 | 
			
		||||
 | 
			
		||||
	buf := make([]byte, min(fullSize, limit))
 | 
			
		||||
	_, err = io.ReadFull(rc, buf)
 | 
			
		||||
	return string(buf), err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -35,6 +35,43 @@ func TestBlob_Data(t *testing.T) {
 | 
			
		|||
	assert.Equal(t, output, string(data))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestBlob_GetBlobContent(t *testing.T) {
 | 
			
		||||
	bareRepo1Path := filepath.Join(testReposDir, "repo1_bare")
 | 
			
		||||
	repo, err := openRepositoryWithDefaultContext(bareRepo1Path)
 | 
			
		||||
	require.NoError(t, err)
 | 
			
		||||
 | 
			
		||||
	defer repo.Close()
 | 
			
		||||
 | 
			
		||||
	testBlob, err := repo.GetBlob("6c493ff740f9380390d5c9ddef4af18697ac9375")
 | 
			
		||||
	require.NoError(t, err)
 | 
			
		||||
 | 
			
		||||
	r, err := testBlob.GetBlobContent(100)
 | 
			
		||||
	require.NoError(t, err)
 | 
			
		||||
	require.Equal(t, "file2\n", r)
 | 
			
		||||
 | 
			
		||||
	r, err = testBlob.GetBlobContent(-1)
 | 
			
		||||
	require.NoError(t, err)
 | 
			
		||||
	require.Empty(t, r)
 | 
			
		||||
 | 
			
		||||
	r, err = testBlob.GetBlobContent(4)
 | 
			
		||||
	require.NoError(t, err)
 | 
			
		||||
	require.Equal(t, "file", r)
 | 
			
		||||
 | 
			
		||||
	r, err = testBlob.GetBlobContent(6)
 | 
			
		||||
	require.NoError(t, err)
 | 
			
		||||
	require.Equal(t, "file2\n", r)
 | 
			
		||||
 | 
			
		||||
	t.Run("non-existing blob", func(t *testing.T) {
 | 
			
		||||
		inexistingBlob, err := repo.GetBlob("00003ff740f9380390d5c9ddef4af18690000000")
 | 
			
		||||
		require.NoError(t, err)
 | 
			
		||||
 | 
			
		||||
		r, err := inexistingBlob.GetBlobContent(100)
 | 
			
		||||
		require.Error(t, err)
 | 
			
		||||
		require.IsType(t, ErrNotExist{}, err)
 | 
			
		||||
		require.Empty(t, r)
 | 
			
		||||
	})
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func Benchmark_Blob_Data(b *testing.B) {
 | 
			
		||||
	bareRepo1Path := filepath.Join(testReposDir, "repo1_bare")
 | 
			
		||||
	repo, err := openRepositoryWithDefaultContext(bareRepo1Path)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -4,7 +4,6 @@
 | 
			
		|||
package util
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"bytes"
 | 
			
		||||
	"errors"
 | 
			
		||||
	"io"
 | 
			
		||||
)
 | 
			
		||||
| 
						 | 
				
			
			@ -20,42 +19,6 @@ func ReadAtMost(r io.Reader, buf []byte) (n int, err error) {
 | 
			
		|||
	return n, err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// ReadWithLimit reads at most "limit" bytes from r into buf.
 | 
			
		||||
// If EOF or ErrUnexpectedEOF occurs while reading, err will be nil.
 | 
			
		||||
func ReadWithLimit(r io.Reader, n int) (buf []byte, err error) {
 | 
			
		||||
	return readWithLimit(r, 1024, n)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func readWithLimit(r io.Reader, batch, limit int) ([]byte, error) {
 | 
			
		||||
	if limit <= batch {
 | 
			
		||||
		buf := make([]byte, limit)
 | 
			
		||||
		n, err := ReadAtMost(r, buf)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return nil, err
 | 
			
		||||
		}
 | 
			
		||||
		return buf[:n], nil
 | 
			
		||||
	}
 | 
			
		||||
	res := bytes.NewBuffer(make([]byte, 0, batch))
 | 
			
		||||
	bufFix := make([]byte, batch)
 | 
			
		||||
	eof := false
 | 
			
		||||
	for res.Len() < limit && !eof {
 | 
			
		||||
		bufTmp := bufFix
 | 
			
		||||
		if res.Len()+batch > limit {
 | 
			
		||||
			bufTmp = bufFix[:limit-res.Len()]
 | 
			
		||||
		}
 | 
			
		||||
		n, err := io.ReadFull(r, bufTmp)
 | 
			
		||||
		if err == io.EOF || err == io.ErrUnexpectedEOF {
 | 
			
		||||
			eof = true
 | 
			
		||||
		} else if err != nil {
 | 
			
		||||
			return nil, err
 | 
			
		||||
		}
 | 
			
		||||
		if _, err = res.Write(bufTmp[:n]); err != nil {
 | 
			
		||||
			return nil, err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return res.Bytes(), nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// ErrNotEmpty is an error reported when there is a non-empty reader
 | 
			
		||||
var ErrNotEmpty = errors.New("not-empty")
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,67 +0,0 @@
 | 
			
		|||
// Copyright 2023 The Gitea Authors. All rights reserved.
 | 
			
		||||
// SPDX-License-Identifier: MIT
 | 
			
		||||
 | 
			
		||||
package util
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"bytes"
 | 
			
		||||
	"errors"
 | 
			
		||||
	"testing"
 | 
			
		||||
 | 
			
		||||
	"github.com/stretchr/testify/assert"
 | 
			
		||||
	"github.com/stretchr/testify/require"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type readerWithError struct {
 | 
			
		||||
	buf *bytes.Buffer
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (r *readerWithError) Read(p []byte) (n int, err error) {
 | 
			
		||||
	if r.buf.Len() < 2 {
 | 
			
		||||
		return 0, errors.New("test error")
 | 
			
		||||
	}
 | 
			
		||||
	return r.buf.Read(p)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestReadWithLimit(t *testing.T) {
 | 
			
		||||
	bs := []byte("0123456789abcdef")
 | 
			
		||||
 | 
			
		||||
	// normal test
 | 
			
		||||
	buf, err := readWithLimit(bytes.NewBuffer(bs), 5, 2)
 | 
			
		||||
	require.NoError(t, err)
 | 
			
		||||
	assert.Equal(t, []byte("01"), buf)
 | 
			
		||||
 | 
			
		||||
	buf, err = readWithLimit(bytes.NewBuffer(bs), 5, 5)
 | 
			
		||||
	require.NoError(t, err)
 | 
			
		||||
	assert.Equal(t, []byte("01234"), buf)
 | 
			
		||||
 | 
			
		||||
	buf, err = readWithLimit(bytes.NewBuffer(bs), 5, 6)
 | 
			
		||||
	require.NoError(t, err)
 | 
			
		||||
	assert.Equal(t, []byte("012345"), buf)
 | 
			
		||||
 | 
			
		||||
	buf, err = readWithLimit(bytes.NewBuffer(bs), 5, len(bs))
 | 
			
		||||
	require.NoError(t, err)
 | 
			
		||||
	assert.Equal(t, []byte("0123456789abcdef"), buf)
 | 
			
		||||
 | 
			
		||||
	buf, err = readWithLimit(bytes.NewBuffer(bs), 5, 100)
 | 
			
		||||
	require.NoError(t, err)
 | 
			
		||||
	assert.Equal(t, []byte("0123456789abcdef"), buf)
 | 
			
		||||
 | 
			
		||||
	// test with error
 | 
			
		||||
	buf, err = readWithLimit(&readerWithError{bytes.NewBuffer(bs)}, 5, 10)
 | 
			
		||||
	require.NoError(t, err)
 | 
			
		||||
	assert.Equal(t, []byte("0123456789"), buf)
 | 
			
		||||
 | 
			
		||||
	buf, err = readWithLimit(&readerWithError{bytes.NewBuffer(bs)}, 5, 100)
 | 
			
		||||
	require.ErrorContains(t, err, "test error")
 | 
			
		||||
	assert.Empty(t, buf)
 | 
			
		||||
 | 
			
		||||
	// test public function
 | 
			
		||||
	buf, err = ReadWithLimit(bytes.NewBuffer(bs), 2)
 | 
			
		||||
	require.NoError(t, err)
 | 
			
		||||
	assert.Equal(t, []byte("01"), buf)
 | 
			
		||||
 | 
			
		||||
	buf, err = ReadWithLimit(bytes.NewBuffer(bs), 9999999)
 | 
			
		||||
	require.NoError(t, err)
 | 
			
		||||
	assert.Equal(t, []byte("0123456789abcdef"), buf)
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue