Add cron method to gc LFS MetaObjects (#22385)
This PR adds a task to the cron service to allow garbage collection of LFS meta objects. As repositories may have a large number of LFSMetaObjects, an updated column is added to this table and it is used to perform a generational GC to attempt to reduce the amount of work. (There may need to be a bit more work here but this is probably enough for the moment.) Fix #7045 Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
		
					parent
					
						
							
								04c97aa364
							
						
					
				
			
			
				commit
				
					
						2cc3a6381c
					
				
			
		
					 9 changed files with 255 additions and 35 deletions
				
			
		| 
						 | 
				
			
			@ -2213,6 +2213,28 @@ ROUTER = console
 | 
			
		|||
;SCHEDULE = @every 168h
 | 
			
		||||
;OLDER_THAN = 8760h
 | 
			
		||||
 | 
			
		||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 | 
			
		||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 | 
			
		||||
;; Garbage collect LFS pointers in repositories
 | 
			
		||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 | 
			
		||||
;[cron.gc_lfs]
 | 
			
		||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 | 
			
		||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 | 
			
		||||
;ENABLED = false
 | 
			
		||||
;; Garbage collect LFS pointers in repositories (default false)
 | 
			
		||||
;RUN_AT_START = false
 | 
			
		||||
;; Interval as a duration between each gc run (default every 24h)
 | 
			
		||||
;SCHEDULE = @every 24h
 | 
			
		||||
;; Only attempt to garbage collect LFSMetaObjects older than this (default 7 days)
 | 
			
		||||
;OLDER_THAN = 168h
 | 
			
		||||
;; Only attempt to garbage collect LFSMetaObjects that have not been attempted to be garbage collected for this long (default 3 days)
 | 
			
		||||
;LAST_UPDATED_MORE_THAN_AGO = 72h
 | 
			
		||||
; Minimum number of stale LFSMetaObjects to check per repo. Set to `0` to always check all.
 | 
			
		||||
;NUMBER_TO_CHECK_PER_REPO = 100
 | 
			
		||||
;Check at least this proportion of LFSMetaObjects per repo. (This may cause all stale LFSMetaObjects to be checked.)
 | 
			
		||||
;PROPORTION_TO_CHECK_PER_REPO = 0.6
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 | 
			
		||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 | 
			
		||||
;; Git Operation timeout in seconds
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1039,6 +1039,16 @@ Default templates for project boards:
 | 
			
		|||
- `SCHEDULE`: **@every 168h**: Cron syntax to set how often to check.
 | 
			
		||||
- `OLDER_THAN`: **@every 8760h**: any system notice older than this expression will be deleted from database.
 | 
			
		||||
 | 
			
		||||
#### Cron -  Garbage collect LFS pointers in repositories ('cron.gc_lfs')
 | 
			
		||||
 | 
			
		||||
- `ENABLED`: **false**: Enable service.
 | 
			
		||||
- `RUN_AT_START`: **false**: Run tasks at start up time (if ENABLED).
 | 
			
		||||
- `SCHEDULE`: **@every 24h**: Cron syntax to set how often to check.
 | 
			
		||||
- `OLDER_THAN`: **168h**: Only attempt to garbage collect LFSMetaObjects older than this (default 7 days)
 | 
			
		||||
- `LAST_UPDATED_MORE_THAN_AGO`: **72h**: Only attempt to garbage collect LFSMetaObjects that have not been attempted to be garbage collected for this long (default 3 days)
 | 
			
		||||
- `NUMBER_TO_CHECK_PER_REPO`: **100**: Minimum number of stale LFSMetaObjects to check per repo. Set to `0` to always check all.
 | 
			
		||||
- `PROPORTION_TO_CHECK_PER_REPO`: **0.6**: Check at least this proportion of LFSMetaObjects per repo. (This may cause all stale LFSMetaObjects to be checked.)
 | 
			
		||||
 | 
			
		||||
## Git (`git`)
 | 
			
		||||
 | 
			
		||||
- `PATH`: **""**: The path of Git executable. If empty, Gitea searches through the PATH environment.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -115,6 +115,7 @@ type LFSMetaObject struct {
 | 
			
		|||
	RepositoryID int64              `xorm:"UNIQUE(s) INDEX NOT NULL"`
 | 
			
		||||
	Existing     bool               `xorm:"-"`
 | 
			
		||||
	CreatedUnix  timeutil.TimeStamp `xorm:"created"`
 | 
			
		||||
	UpdatedUnix  timeutil.TimeStamp `xorm:"INDEX updated"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func init() {
 | 
			
		||||
| 
						 | 
				
			
			@ -334,8 +335,45 @@ func GetRepoLFSSize(ctx context.Context, repoID int64) (int64, error) {
 | 
			
		|||
	return lfsSize, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// IterateRepositoryIDsWithLFSMetaObjects iterates across the repositories that have LFSMetaObjects
 | 
			
		||||
func IterateRepositoryIDsWithLFSMetaObjects(ctx context.Context, f func(ctx context.Context, repoID, count int64) error) error {
 | 
			
		||||
	batchSize := setting.Database.IterateBufferSize
 | 
			
		||||
	sess := db.GetEngine(ctx)
 | 
			
		||||
	id := int64(0)
 | 
			
		||||
	type RepositoryCount struct {
 | 
			
		||||
		RepositoryID int64
 | 
			
		||||
		Count        int64
 | 
			
		||||
	}
 | 
			
		||||
	for {
 | 
			
		||||
		counts := make([]*RepositoryCount, 0, batchSize)
 | 
			
		||||
		sess.Select("repository_id, COUNT(id) AS count").
 | 
			
		||||
			Table("lfs_meta_object").
 | 
			
		||||
			Where("repository_id > ?", id).
 | 
			
		||||
			GroupBy("repository_id").
 | 
			
		||||
			OrderBy("repository_id ASC")
 | 
			
		||||
 | 
			
		||||
		if err := sess.Limit(batchSize, 0).Find(&counts); err != nil {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
		if len(counts) == 0 {
 | 
			
		||||
			return nil
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		for _, count := range counts {
 | 
			
		||||
			if err := f(ctx, count.RepositoryID, count.Count); err != nil {
 | 
			
		||||
				return err
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		id = counts[len(counts)-1].RepositoryID
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// IterateLFSMetaObjectsForRepoOptions provides options for IterateLFSMetaObjectsForRepo
 | 
			
		||||
type IterateLFSMetaObjectsForRepoOptions struct {
 | 
			
		||||
	OlderThan time.Time
 | 
			
		||||
	OlderThan                 time.Time
 | 
			
		||||
	UpdatedLessRecentlyThan   time.Time
 | 
			
		||||
	OrderByUpdated            bool
 | 
			
		||||
	LoopFunctionAlwaysUpdates bool
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo
 | 
			
		||||
| 
						 | 
				
			
			@ -348,28 +386,53 @@ func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(cont
 | 
			
		|||
		LFSMetaObject
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	id := int64(0)
 | 
			
		||||
 | 
			
		||||
	for {
 | 
			
		||||
		beans := make([]*CountLFSMetaObject, 0, batchSize)
 | 
			
		||||
		// SELECT `lfs_meta_object`.*, COUNT(`l1`.id) as `count` FROM lfs_meta_object INNER JOIN lfs_meta_object AS l1 ON l1.oid = lfs_meta_object.oid WHERE lfs_meta_object.repository_id = ? GROUP BY lfs_meta_object.id
 | 
			
		||||
		sess := engine.Select("`lfs_meta_object`.*, COUNT(`l1`.oid) AS `count`").
 | 
			
		||||
			Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid").
 | 
			
		||||
			Where("`lfs_meta_object`.repository_id = ?", repoID)
 | 
			
		||||
		if !opts.OlderThan.IsZero() {
 | 
			
		||||
			sess.And("`lfs_meta_object`.created_unix < ?", opts.OlderThan)
 | 
			
		||||
		}
 | 
			
		||||
		if !opts.UpdatedLessRecentlyThan.IsZero() {
 | 
			
		||||
			sess.And("`lfs_meta_object`.updated_unix < ?", opts.UpdatedLessRecentlyThan)
 | 
			
		||||
		}
 | 
			
		||||
		sess.GroupBy("`lfs_meta_object`.id")
 | 
			
		||||
		if opts.OrderByUpdated {
 | 
			
		||||
			sess.OrderBy("`lfs_meta_object`.updated_unix ASC")
 | 
			
		||||
		} else {
 | 
			
		||||
			sess.And("`lfs_meta_object`.id > ?", id)
 | 
			
		||||
			sess.OrderBy("`lfs_meta_object`.id ASC")
 | 
			
		||||
		}
 | 
			
		||||
		if err := sess.Limit(batchSize, start).Find(&beans); err != nil {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
		if len(beans) == 0 {
 | 
			
		||||
			return nil
 | 
			
		||||
		}
 | 
			
		||||
		start += len(beans)
 | 
			
		||||
		if !opts.LoopFunctionAlwaysUpdates {
 | 
			
		||||
			start += len(beans)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		for _, bean := range beans {
 | 
			
		||||
			if err := f(ctx, &bean.LFSMetaObject, bean.Count); err != nil {
 | 
			
		||||
				return err
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		id = beans[len(beans)-1].ID
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// MarkLFSMetaObject updates the updated time for the provided LFSMetaObject
 | 
			
		||||
func MarkLFSMetaObject(ctx context.Context, id int64) error {
 | 
			
		||||
	obj := &LFSMetaObject{
 | 
			
		||||
		UpdatedUnix: timeutil.TimeStampNow(),
 | 
			
		||||
	}
 | 
			
		||||
	count, err := db.GetEngine(ctx).ID(id).Update(obj)
 | 
			
		||||
	if count != 1 {
 | 
			
		||||
		log.Error("Unexpectedly updated %d LFSMetaObjects with ID: %d", count, id)
 | 
			
		||||
	}
 | 
			
		||||
	return err
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -432,6 +432,9 @@ var migrations = []Migration{
 | 
			
		|||
	NewMigration("Update counts of all open milestones", v1_18.UpdateOpenMilestoneCounts),
 | 
			
		||||
	// v230 -> v231
 | 
			
		||||
	NewMigration("Add ConfidentialClient column (default true) to OAuth2Application table", v1_18.AddConfidentialClientColumnToOAuth2ApplicationTable),
 | 
			
		||||
 | 
			
		||||
	// Gitea 1.18.0 ends at v231
 | 
			
		||||
 | 
			
		||||
	// v231 -> v232
 | 
			
		||||
	NewMigration("Add index for hook_task", v1_19.AddIndexForHookTask),
 | 
			
		||||
	// v232 -> v233
 | 
			
		||||
| 
						 | 
				
			
			@ -446,6 +449,8 @@ var migrations = []Migration{
 | 
			
		|||
	NewMigration("Create secrets table", v1_19.CreateSecretsTable),
 | 
			
		||||
	// v237 -> v238
 | 
			
		||||
	NewMigration("Drop ForeignReference table", v1_19.DropForeignReferenceTable),
 | 
			
		||||
	// v238 -> v239
 | 
			
		||||
	NewMigration("Add updated unix to LFSMetaObject", v1_19.AddUpdatedUnixToLFSMetaObject),
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// GetCurrentDBVersion returns the current db version
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										27
									
								
								models/migrations/v1_19/v238.go
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								models/migrations/v1_19/v238.go
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,27 @@
 | 
			
		|||
// Copyright 2022 The Gitea Authors. All rights reserved.
 | 
			
		||||
// SPDX-License-Identifier: MIT
 | 
			
		||||
 | 
			
		||||
package v1_19 //nolint
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"code.gitea.io/gitea/modules/timeutil"
 | 
			
		||||
 | 
			
		||||
	"xorm.io/xorm"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// AddUpdatedUnixToLFSMetaObject adds an updated column to the LFSMetaObject to allow for garbage collection
 | 
			
		||||
func AddUpdatedUnixToLFSMetaObject(x *xorm.Engine) error {
 | 
			
		||||
	// Drop the table introduced in `v211`, it's considered badly designed and doesn't look like to be used.
 | 
			
		||||
	// See: https://github.com/go-gitea/gitea/issues/21086#issuecomment-1318217453
 | 
			
		||||
	// LFSMetaObject stores metadata for LFS tracked files.
 | 
			
		||||
	type LFSMetaObject struct {
 | 
			
		||||
		ID           int64              `xorm:"pk autoincr"`
 | 
			
		||||
		Oid          string             `json:"oid" xorm:"UNIQUE(s) INDEX NOT NULL"`
 | 
			
		||||
		Size         int64              `json:"size" xorm:"NOT NULL"`
 | 
			
		||||
		RepositoryID int64              `xorm:"UNIQUE(s) INDEX NOT NULL"`
 | 
			
		||||
		CreatedUnix  timeutil.TimeStamp `xorm:"created"`
 | 
			
		||||
		UpdatedUnix  timeutil.TimeStamp `xorm:"INDEX updated"`
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return x.Sync(new(LFSMetaObject))
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -6,6 +6,7 @@ package doctor
 | 
			
		|||
import (
 | 
			
		||||
	"context"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	"code.gitea.io/gitea/modules/log"
 | 
			
		||||
	"code.gitea.io/gitea/modules/setting"
 | 
			
		||||
| 
						 | 
				
			
			@ -29,7 +30,20 @@ func garbageCollectLFSCheck(ctx context.Context, logger log.Logger, autofix bool
 | 
			
		|||
		return fmt.Errorf("LFS support is disabled")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if err := repository.GarbageCollectLFSMetaObjects(ctx, logger, autofix); err != nil {
 | 
			
		||||
	if err := repository.GarbageCollectLFSMetaObjects(ctx, repository.GarbageCollectLFSMetaObjectsOptions{
 | 
			
		||||
		Logger:  logger,
 | 
			
		||||
		AutoFix: autofix,
 | 
			
		||||
		// Only attempt to garbage collect lfs meta objects older than a week as the order of git lfs upload
 | 
			
		||||
		// and git object upload is not necessarily guaranteed. It's possible to imagine a situation whereby
 | 
			
		||||
		// an LFS object is uploaded but the git branch is not uploaded immediately, or there are some rapid
 | 
			
		||||
		// changes in new branches that might lead to lfs objects becoming temporarily unassociated with git
 | 
			
		||||
		// objects.
 | 
			
		||||
		//
 | 
			
		||||
		// It is likely that a week is potentially excessive but it should definitely be enough that any
 | 
			
		||||
		// unassociated LFS object is genuinely unassociated.
 | 
			
		||||
		OlderThan: time.Now().Add(-24 * time.Hour * 7),
 | 
			
		||||
		// We don't set the UpdatedLessRecentlyThan because we want to do a full GC
 | 
			
		||||
	}); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2554,6 +2554,7 @@ dashboard.delete_old_actions = Delete all old actions from database
 | 
			
		|||
dashboard.delete_old_actions.started = Delete all old actions from database started.
 | 
			
		||||
dashboard.update_checker = Update checker
 | 
			
		||||
dashboard.delete_old_system_notices = Delete all old system notices from database
 | 
			
		||||
dashboard.gc_lfs = Garbage collect LFS meta objects
 | 
			
		||||
 | 
			
		||||
users.user_manage_panel = User Account Management
 | 
			
		||||
users.new_account = Create User Account
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -175,6 +175,48 @@ func registerDeleteOldSystemNotices() {
 | 
			
		|||
	})
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func registerGCLFS() {
 | 
			
		||||
	if !setting.LFS.StartServer {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	type GCLFSConfig struct {
 | 
			
		||||
		OlderThanConfig
 | 
			
		||||
		LastUpdatedMoreThanAgo   time.Duration
 | 
			
		||||
		NumberToCheckPerRepo     int64
 | 
			
		||||
		ProportionToCheckPerRepo float64
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	RegisterTaskFatal("gc_lfs", &GCLFSConfig{
 | 
			
		||||
		OlderThanConfig: OlderThanConfig{
 | 
			
		||||
			BaseConfig: BaseConfig{
 | 
			
		||||
				Enabled:    false,
 | 
			
		||||
				RunAtStart: false,
 | 
			
		||||
				Schedule:   "@every 24h",
 | 
			
		||||
			},
 | 
			
		||||
			// Only attempt to garbage collect lfs meta objects older than a week as the order of git lfs upload
 | 
			
		||||
			// and git object upload is not necessarily guaranteed. It's possible to imagine a situation whereby
 | 
			
		||||
			// an LFS object is uploaded but the git branch is not uploaded immediately, or there are some rapid
 | 
			
		||||
			// changes in new branches that might lead to lfs objects becoming temporarily unassociated with git
 | 
			
		||||
			// objects.
 | 
			
		||||
			//
 | 
			
		||||
			// It is likely that a week is potentially excessive but it should definitely be enough that any
 | 
			
		||||
			// unassociated LFS object is genuinely unassociated.
 | 
			
		||||
			OlderThan: 24 * time.Hour * 7,
 | 
			
		||||
		},
 | 
			
		||||
		// Only GC things that haven't been looked at in the past 3 days
 | 
			
		||||
		LastUpdatedMoreThanAgo:   24 * time.Hour * 3,
 | 
			
		||||
		NumberToCheckPerRepo:     100,
 | 
			
		||||
		ProportionToCheckPerRepo: 0.6,
 | 
			
		||||
	}, func(ctx context.Context, _ *user_model.User, config Config) error {
 | 
			
		||||
		gcLFSConfig := config.(*GCLFSConfig)
 | 
			
		||||
		return repo_service.GarbageCollectLFSMetaObjects(ctx, repo_service.GarbageCollectLFSMetaObjectsOptions{
 | 
			
		||||
			AutoFix:                 true,
 | 
			
		||||
			OlderThan:               time.Now().Add(-gcLFSConfig.OlderThan),
 | 
			
		||||
			UpdatedLessRecentlyThan: time.Now().Add(-gcLFSConfig.LastUpdatedMoreThanAgo),
 | 
			
		||||
		})
 | 
			
		||||
	})
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func initExtendedTasks() {
 | 
			
		||||
	registerDeleteInactiveUsers()
 | 
			
		||||
	registerDeleteRepositoryArchives()
 | 
			
		||||
| 
						 | 
				
			
			@ -188,4 +230,5 @@ func initExtendedTasks() {
 | 
			
		|||
	registerDeleteOldActions()
 | 
			
		||||
	registerUpdateGiteaChecker()
 | 
			
		||||
	registerDeleteOldSystemNotices()
 | 
			
		||||
	registerGCLFS()
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -5,49 +5,67 @@ package repository
 | 
			
		|||
 | 
			
		||||
import (
 | 
			
		||||
	"context"
 | 
			
		||||
	"errors"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	"code.gitea.io/gitea/models/db"
 | 
			
		||||
	git_model "code.gitea.io/gitea/models/git"
 | 
			
		||||
	repo_model "code.gitea.io/gitea/models/repo"
 | 
			
		||||
	"code.gitea.io/gitea/modules/git"
 | 
			
		||||
	"code.gitea.io/gitea/modules/lfs"
 | 
			
		||||
	"code.gitea.io/gitea/modules/log"
 | 
			
		||||
 | 
			
		||||
	"xorm.io/builder"
 | 
			
		||||
	"code.gitea.io/gitea/modules/setting"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func GarbageCollectLFSMetaObjects(ctx context.Context, logger log.Logger, autofix bool) error {
 | 
			
		||||
	log.Trace("Doing: GarbageCollectLFSMetaObjects")
 | 
			
		||||
 | 
			
		||||
	if err := db.Iterate(
 | 
			
		||||
		ctx,
 | 
			
		||||
		builder.And(builder.Gt{"id": 0}),
 | 
			
		||||
		func(ctx context.Context, repo *repo_model.Repository) error {
 | 
			
		||||
			return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, logger, autofix)
 | 
			
		||||
		},
 | 
			
		||||
	); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	log.Trace("Finished: GarbageCollectLFSMetaObjects")
 | 
			
		||||
	return nil
 | 
			
		||||
// GarbageCollectLFSMetaObjectsOptions provides options for GarbageCollectLFSMetaObjects function
 | 
			
		||||
type GarbageCollectLFSMetaObjectsOptions struct {
 | 
			
		||||
	Logger                   log.Logger
 | 
			
		||||
	AutoFix                  bool
 | 
			
		||||
	OlderThan                time.Time
 | 
			
		||||
	UpdatedLessRecentlyThan  time.Time
 | 
			
		||||
	NumberToCheckPerRepo     int64
 | 
			
		||||
	ProportionToCheckPerRepo float64
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.Repository, logger log.Logger, autofix bool) error {
 | 
			
		||||
	if logger != nil {
 | 
			
		||||
		logger.Info("Checking %-v", repo)
 | 
			
		||||
// GarbageCollectLFSMetaObjects garbage collects LFS objects for all repositories
 | 
			
		||||
func GarbageCollectLFSMetaObjects(ctx context.Context, opts GarbageCollectLFSMetaObjectsOptions) error {
 | 
			
		||||
	log.Trace("Doing: GarbageCollectLFSMetaObjects")
 | 
			
		||||
	defer log.Trace("Finished: GarbageCollectLFSMetaObjects")
 | 
			
		||||
 | 
			
		||||
	if !setting.LFS.StartServer {
 | 
			
		||||
		if opts.Logger != nil {
 | 
			
		||||
			opts.Logger.Info("LFS support is disabled")
 | 
			
		||||
		}
 | 
			
		||||
		return nil
 | 
			
		||||
	}
 | 
			
		||||
	total, orphaned, collected, deleted := 0, 0, 0, 0
 | 
			
		||||
	if logger != nil {
 | 
			
		||||
 | 
			
		||||
	return git_model.IterateRepositoryIDsWithLFSMetaObjects(ctx, func(ctx context.Context, repoID, count int64) error {
 | 
			
		||||
		repo, err := repo_model.GetRepositoryByID(ctx, repoID)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if newMinimum := int64(float64(count) * opts.ProportionToCheckPerRepo); newMinimum > opts.NumberToCheckPerRepo && opts.NumberToCheckPerRepo != 0 {
 | 
			
		||||
			opts.NumberToCheckPerRepo = newMinimum
 | 
			
		||||
		}
 | 
			
		||||
		return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, opts)
 | 
			
		||||
	})
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// GarbageCollectLFSMetaObjectsForRepo garbage collects LFS objects for a specific repository
 | 
			
		||||
func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.Repository, opts GarbageCollectLFSMetaObjectsOptions) error {
 | 
			
		||||
	if opts.Logger != nil {
 | 
			
		||||
		opts.Logger.Info("Checking %-v", repo)
 | 
			
		||||
	}
 | 
			
		||||
	total, orphaned, collected, deleted := int64(0), 0, 0, 0
 | 
			
		||||
	if opts.Logger != nil {
 | 
			
		||||
		defer func() {
 | 
			
		||||
			if orphaned == 0 {
 | 
			
		||||
				logger.Info("Found %d total LFSMetaObjects in %-v", total, repo)
 | 
			
		||||
			} else if !autofix {
 | 
			
		||||
				logger.Info("Found %d/%d orphaned LFSMetaObjects in %-v", orphaned, total, repo)
 | 
			
		||||
				opts.Logger.Info("Found %d total LFSMetaObjects in %-v", total, repo)
 | 
			
		||||
			} else if !opts.AutoFix {
 | 
			
		||||
				opts.Logger.Info("Found %d/%d orphaned LFSMetaObjects in %-v", orphaned, total, repo)
 | 
			
		||||
			} else {
 | 
			
		||||
				logger.Info("Collected %d/%d orphaned/%d total LFSMetaObjects in %-v. %d removed from storage.", collected, orphaned, total, repo, deleted)
 | 
			
		||||
				opts.Logger.Info("Collected %d/%d orphaned/%d total LFSMetaObjects in %-v. %d removed from storage.", collected, orphaned, total, repo, deleted)
 | 
			
		||||
			}
 | 
			
		||||
		}()
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -60,17 +78,21 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R
 | 
			
		|||
	defer gitRepo.Close()
 | 
			
		||||
 | 
			
		||||
	store := lfs.NewContentStore()
 | 
			
		||||
	errStop := errors.New("STOPERR")
 | 
			
		||||
 | 
			
		||||
	return git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error {
 | 
			
		||||
	err = git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error {
 | 
			
		||||
		if opts.NumberToCheckPerRepo > 0 && total > opts.NumberToCheckPerRepo {
 | 
			
		||||
			return errStop
 | 
			
		||||
		}
 | 
			
		||||
		total++
 | 
			
		||||
		pointerSha := git.ComputeBlobHash([]byte(metaObject.Pointer.StringContent()))
 | 
			
		||||
 | 
			
		||||
		if gitRepo.IsObjectExist(pointerSha.String()) {
 | 
			
		||||
			return nil
 | 
			
		||||
			return git_model.MarkLFSMetaObject(ctx, metaObject.ID)
 | 
			
		||||
		}
 | 
			
		||||
		orphaned++
 | 
			
		||||
 | 
			
		||||
		if !autofix {
 | 
			
		||||
		if !opts.AutoFix {
 | 
			
		||||
			return nil
 | 
			
		||||
		}
 | 
			
		||||
		// Non-existent pointer file
 | 
			
		||||
| 
						 | 
				
			
			@ -100,6 +122,19 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R
 | 
			
		|||
		//
 | 
			
		||||
		// It is likely that a week is potentially excessive but it should definitely be enough that any
 | 
			
		||||
		// unassociated LFS object is genuinely unassociated.
 | 
			
		||||
		OlderThan: time.Now().Add(-24 * 7 * time.Hour),
 | 
			
		||||
		OlderThan:                 opts.OlderThan,
 | 
			
		||||
		UpdatedLessRecentlyThan:   opts.UpdatedLessRecentlyThan,
 | 
			
		||||
		OrderByUpdated:            true,
 | 
			
		||||
		LoopFunctionAlwaysUpdates: true,
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	if err == errStop {
 | 
			
		||||
		if opts.Logger != nil {
 | 
			
		||||
			opts.Logger.Info("Processing stopped at %d total LFSMetaObjects in %-v", total, repo)
 | 
			
		||||
		}
 | 
			
		||||
		return nil
 | 
			
		||||
	} else if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue