[PORT] Refactor the usage of batch catfile (gitea#31754)

When opening a repository, it will call `ensureValidRepository` and also
`CatFileBatch`. But sometimes these will not be used until repository
closed. So it's a waste of CPU to invoke 3 times git command for every
open repository.

This PR removed all of these from `OpenRepository` but only kept
checking whether the folder exists. When a batch is necessary, the
necessary functions will be invoked.

---
Conflict resolution: Because of the removal of go-git in (#4941)
`_nogogit.go` files were either renamed or merged into the 'common'
file. Git does handle the renames correctly, but for those that were
merged has to be manually copied pasted over. The patch looks the same,
201 additions 90 deletions as the original patch.

(cherry picked from commit c03baab678ba5b2e9d974aea147e660417f5d3f7)
This commit is contained in:
Lunny Xiao 2024-08-21 01:04:57 +08:00 committed by Gusted
parent cbedd7e5be
commit 0ca13c5eae
No known key found for this signature in database
GPG key ID: FD821B732837125F
15 changed files with 201 additions and 90 deletions

View file

@ -15,11 +15,11 @@ import (
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/indexer/code/internal"
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch"
"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/typesniffer"
@ -154,17 +154,19 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository) elasti
func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error {
reqs := make([]elastic.BulkableRequest, 0)
if len(changes.Updates) > 0 {
// Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first!
if err := git.EnsureValidGitRepository(ctx, repo.RepoPath()); err != nil {
log.Error("Unable to open git repo: %s for %-v: %v", repo.RepoPath(), repo, err)
r, err := gitrepo.OpenRepository(ctx, repo)
if err != nil {
return err
}
batchWriter, batchReader, cancel := git.CatFileBatch(ctx, repo.RepoPath())
defer cancel()
defer r.Close()
batch, err := r.NewBatch(ctx)
if err != nil {
return err
}
defer batch.Close()
for _, update := range changes.Updates {
updateReqs, err := b.addUpdate(ctx, batchWriter, batchReader, sha, update, repo)
updateReqs, err := b.addUpdate(ctx, batch.Writer, batch.Reader, sha, update, repo)
if err != nil {
return err
}
@ -172,7 +174,7 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st
reqs = append(reqs, updateReqs...)
}
}
cancel()
batch.Close()
}
for _, filename := range changes.RemovedFilenames {