Help to recover from corrupted levelqueue (#24912)
gitea.com experienced the corrupted LevelQueue bug again. I think the problem is clear now: if the keys in LevelDB went out-of-sync, the LevelQueue itself doesn't have the ability to recover, eg: * LevelQueue.Len() reports 100 * LevelQueue.LPop() reports ErrNotFound = errors.New("no key found") So it needs to dive into the LevelDB to remove all keys to recover the corrupted LevelQueue. More comments are in TestCorruptedLevelQueue.
This commit is contained in:
parent
8faf9465b3
commit
84c8ab9fd1
5 changed files with 162 additions and 55 deletions
|
@ -5,16 +5,21 @@ package queue
|
|||
|
||||
import (
|
||||
"context"
|
||||
"sync/atomic"
|
||||
|
||||
"code.gitea.io/gitea/modules/nosql"
|
||||
"code.gitea.io/gitea/modules/queue/lqinternal"
|
||||
|
||||
"gitea.com/lunny/levelqueue"
|
||||
"github.com/syndtr/goleveldb/leveldb"
|
||||
)
|
||||
|
||||
type baseLevelQueue struct {
|
||||
internal *levelqueue.Queue
|
||||
conn string
|
||||
cfg *BaseConfig
|
||||
internal atomic.Pointer[levelqueue.Queue]
|
||||
|
||||
conn string
|
||||
cfg *BaseConfig
|
||||
db *leveldb.DB
|
||||
}
|
||||
|
||||
var _ baseQueue = (*baseLevelQueue)(nil)
|
||||
|
@ -31,21 +36,23 @@ func newBaseLevelQueueSimple(cfg *BaseConfig) (baseQueue, error) {
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
q := &baseLevelQueue{conn: conn, cfg: cfg}
|
||||
q.internal, err = levelqueue.NewQueue(db, []byte(cfg.QueueFullName), false)
|
||||
q := &baseLevelQueue{conn: conn, cfg: cfg, db: db}
|
||||
lq, err := levelqueue.NewQueue(db, []byte(cfg.QueueFullName), false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
q.internal.Store(lq)
|
||||
return q, nil
|
||||
}
|
||||
|
||||
func (q *baseLevelQueue) PushItem(ctx context.Context, data []byte) error {
|
||||
return baseLevelQueueCommon(q.cfg, q.internal, nil).PushItem(ctx, data)
|
||||
c := baseLevelQueueCommon(q.cfg, nil, func() baseLevelQueuePushPoper { return q.internal.Load() })
|
||||
return c.PushItem(ctx, data)
|
||||
}
|
||||
|
||||
func (q *baseLevelQueue) PopItem(ctx context.Context) ([]byte, error) {
|
||||
return baseLevelQueueCommon(q.cfg, q.internal, nil).PopItem(ctx)
|
||||
c := baseLevelQueueCommon(q.cfg, nil, func() baseLevelQueuePushPoper { return q.internal.Load() })
|
||||
return c.PopItem(ctx)
|
||||
}
|
||||
|
||||
func (q *baseLevelQueue) HasItem(ctx context.Context, data []byte) (bool, error) {
|
||||
|
@ -53,20 +60,24 @@ func (q *baseLevelQueue) HasItem(ctx context.Context, data []byte) (bool, error)
|
|||
}
|
||||
|
||||
func (q *baseLevelQueue) Len(ctx context.Context) (int, error) {
|
||||
return int(q.internal.Len()), nil
|
||||
return int(q.internal.Load().Len()), nil
|
||||
}
|
||||
|
||||
func (q *baseLevelQueue) Close() error {
|
||||
err := q.internal.Close()
|
||||
err := q.internal.Load().Close()
|
||||
_ = nosql.GetManager().CloseLevelDB(q.conn)
|
||||
q.db = nil // the db is not managed by us, it's managed by the nosql manager
|
||||
return err
|
||||
}
|
||||
|
||||
func (q *baseLevelQueue) RemoveAll(ctx context.Context) error {
|
||||
for q.internal.Len() > 0 {
|
||||
if _, err := q.internal.LPop(); err != nil {
|
||||
return err
|
||||
}
|
||||
lqinternal.RemoveLevelQueueKeys(q.db, []byte(q.cfg.QueueFullName))
|
||||
lq, err := levelqueue.NewQueue(q.db, []byte(q.cfg.QueueFullName), false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
old := q.internal.Load()
|
||||
q.internal.Store(lq)
|
||||
_ = old.Close() // Not ideal for concurrency. Luckily, the levelqueue only sets its db=nil because it doesn't manage the db, so far so good
|
||||
return nil
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue