feat: filepath filter for code search (#6143)
Added support for searching content in a specific directory or file. Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/6143 Reviewed-by: Gusted <gusted@noreply.codeberg.org> Reviewed-by: 0ko <0ko@noreply.codeberg.org> Co-authored-by: Shiny Nematoda <snematoda.751k2@aleeas.com> Co-committed-by: Shiny Nematoda <snematoda.751k2@aleeas.com>
This commit is contained in:
parent
bb88e1daf8
commit
ee214cb886
19 changed files with 342 additions and 61 deletions
69
modules/indexer/code/bleve/tokenizer/hierarchy/hierarchy.go
Normal file
69
modules/indexer/code/bleve/tokenizer/hierarchy/hierarchy.go
Normal file
|
@ -0,0 +1,69 @@
|
|||
// Copyright 2024 The Forgejo Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package hierarchy
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const Name = "path_hierarchy"
|
||||
|
||||
type PathHierarchyTokenizer struct{}
|
||||
|
||||
// Similar to elastic's path_hierarchy tokenizer
|
||||
// This tokenizes a given path into all the possible hierarchies
|
||||
// For example,
|
||||
// modules/indexer/code/search.go =>
|
||||
//
|
||||
// modules/
|
||||
// modules/indexer
|
||||
// modules/indexer/code
|
||||
// modules/indexer/code/search.go
|
||||
func (t *PathHierarchyTokenizer) Tokenize(input []byte) analysis.TokenStream {
|
||||
// trim any extra slashes
|
||||
input = bytes.Trim(input, "/")
|
||||
|
||||
// zero allocations until the nested directories exceed a depth of 8 (which is unlikely)
|
||||
rv := make(analysis.TokenStream, 0, 8)
|
||||
count, off := 1, 0
|
||||
|
||||
// iterate till all directory seperators
|
||||
for i := bytes.IndexRune(input[off:], '/'); i != -1; i = bytes.IndexRune(input[off:], '/') {
|
||||
// the index is relative to input[offest...]
|
||||
// add this index to the accumlated offset to get the index of the current seperator in input[0...]
|
||||
off += i
|
||||
rv = append(rv, &analysis.Token{
|
||||
Term: input[:off], // take the slice, input[0...index of seperator]
|
||||
Start: 0,
|
||||
End: off,
|
||||
Position: count,
|
||||
Type: analysis.AlphaNumeric,
|
||||
})
|
||||
// increment the offset after considering the seperator
|
||||
off++
|
||||
count++
|
||||
}
|
||||
|
||||
// the entire file path should always be the last token
|
||||
rv = append(rv, &analysis.Token{
|
||||
Term: input,
|
||||
Start: 0,
|
||||
End: len(input),
|
||||
Position: count,
|
||||
Type: analysis.AlphaNumeric,
|
||||
})
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func TokenizerConstructor(config map[string]any, cache *registry.Cache) (analysis.Tokenizer, error) {
|
||||
return &PathHierarchyTokenizer{}, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenizer(Name, TokenizerConstructor)
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue