Improve listing performance by using go-git (#6478)

* Use go-git for tree reading and commit info lookup.

Signed-off-by: Filip Navara <navara@emclient.com>

* Use TreeEntry.IsRegular() instead of ObjectType that was removed.

Signed-off-by: Filip Navara <navara@emclient.com>

* Use the treePath to optimize commit info search.

Signed-off-by: Filip Navara <navara@emclient.com>

* Extract the latest commit at treePath along with the other commits.

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix listing commit info for a directory that was created in one commit and never modified after.

Signed-off-by: Filip Navara <navara@emclient.com>

* Avoid nearly all external 'git' invocations when doing directory listing (.editorconfig code path is still hit).

Signed-off-by: Filip Navara <navara@emclient.com>

* Use go-git for reading blobs.

Signed-off-by: Filip Navara <navara@emclient.com>

* Make SHA1 type alias for plumbing.Hash in go-git.

Signed-off-by: Filip Navara <navara@emclient.com>

* Make Signature type alias for object.Signature in go-git.

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix GetCommitsInfo for repository with only one commit.

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix PGP signature verification.

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix issues with walking commit graph across merges.

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix typo in condition.

Signed-off-by: Filip Navara <navara@emclient.com>

* Speed up loading branch list by keeping the repository reference (and thus all the loaded packfile indexes).

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix lising submodules.

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix build

Signed-off-by: Filip Navara <navara@emclient.com>

* Add back commit cache because of name-rev

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix tests

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix code style

* Fix spelling

* Address PR feedback

Signed-off-by: Filip Navara <navara@emclient.com>

* Update vendor module list

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix getting trees by commit id

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix remaining unit test failures

* Fix GetTreeBySHA

* Avoid running `git name-rev` if not necessary

Signed-off-by: Filip Navara <navara@emclient.com>

* Move Branch code to git module

* Clean up GPG signature verification and fix it for tagged commits

* Address PR feedback (import formatting, copyright headers)

* Make blob lookup by SHA working

* Update tests to use public API

* Allow getting content from any type of object through the blob interface

* Change test to actually expect the object content that is in the GIT repository

* Change one more test to actually expect the object content that is in the GIT repository

* Add comments
This commit is contained in:
Filip Navara 2019-04-19 14:17:27 +02:00 committed by Lunny Xiao
parent 19ec2606e9
commit 2af67f6044
44 changed files with 759 additions and 783 deletions

View file

@ -1,76 +1,40 @@
// Copyright 2015 The Gogs Authors. All rights reserved.
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package git
import (
"bytes"
"encoding/base64"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"gopkg.in/src-d/go-git.v4/plumbing"
)
// Blob represents a Git object.
type Blob struct {
repo *Repository
*TreeEntry
}
ID SHA1
// Data gets content of blob all at once and wrap it as io.Reader.
// This can be very slow and memory consuming for huge content.
func (b *Blob) Data() (io.Reader, error) {
stdout := new(bytes.Buffer)
stderr := new(bytes.Buffer)
// Preallocate memory to save ~50% memory usage on big files.
stdout.Grow(int(b.Size() + 2048))
if err := b.DataPipeline(stdout, stderr); err != nil {
return nil, concatenateError(err, stderr.String())
}
return stdout, nil
}
// DataPipeline gets content of blob and write the result or error to stdout or stderr
func (b *Blob) DataPipeline(stdout, stderr io.Writer) error {
return NewCommand("show", b.ID.String()).RunInDirPipeline(b.repo.Path, stdout, stderr)
}
type cmdReadCloser struct {
cmd *exec.Cmd
stdout io.Reader
}
func (c cmdReadCloser) Read(p []byte) (int, error) {
return c.stdout.Read(p)
}
func (c cmdReadCloser) Close() error {
io.Copy(ioutil.Discard, c.stdout)
return c.cmd.Wait()
gogitEncodedObj plumbing.EncodedObject
name string
}
// DataAsync gets a ReadCloser for the contents of a blob without reading it all.
// Calling the Close function on the result will discard all unread output.
func (b *Blob) DataAsync() (io.ReadCloser, error) {
cmd := exec.Command("git", "show", b.ID.String())
cmd.Dir = b.repo.Path
cmd.Stderr = os.Stderr
return b.gogitEncodedObj.Reader()
}
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, fmt.Errorf("StdoutPipe: %v", err)
}
// Size returns the uncompressed size of the blob
func (b *Blob) Size() int64 {
return b.gogitEncodedObj.Size()
}
if err = cmd.Start(); err != nil {
return nil, fmt.Errorf("Start: %v", err)
}
return cmdReadCloser{stdout: stdout, cmd: cmd}, nil
// Name returns name of the tree entry this blob object was created from (or empty string)
func (b *Blob) Name() string {
return b.name
}
// GetBlobContentBase64 Reads the content of the blob with a base64 encode and returns the encoded string