feat: filepath filter for code search (#6143)

Added support for searching content in a specific directory or file.

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/6143
Reviewed-by: Gusted <gusted@noreply.codeberg.org>
Reviewed-by: 0ko <0ko@noreply.codeberg.org>
Co-authored-by: Shiny Nematoda <snematoda.751k2@aleeas.com>
Co-committed-by: Shiny Nematoda <snematoda.751k2@aleeas.com>
This commit is contained in:
Shiny Nematoda 2024-12-22 12:24:29 +00:00 committed by 0ko
parent bb88e1daf8
commit ee214cb886
19 changed files with 342 additions and 61 deletions

View file

@ -30,7 +30,7 @@ import (
)
const (
esRepoIndexerLatestVersion = 1
esRepoIndexerLatestVersion = 2
// multi-match-types, currently only 2 types are used
// Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types
esMultiMatchTypeBestFields = "best_fields"
@ -57,6 +57,21 @@ func NewIndexer(url, indexerName string) *Indexer {
const (
defaultMapping = `{
"settings": {
"analysis": {
"analyzer": {
"custom_path_tree": {
"tokenizer": "custom_hierarchy"
}
},
"tokenizer": {
"custom_hierarchy": {
"type": "path_hierarchy",
"delimiter": "/"
}
}
}
},
"mappings": {
"properties": {
"repo_id": {
@ -72,6 +87,15 @@ const (
"type": "keyword",
"index": true
},
"filename": {
"type": "text",
"fields": {
"tree": {
"type": "text",
"analyzer": "custom_path_tree"
}
}
},
"language": {
"type": "keyword",
"index": true
@ -138,6 +162,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro
"repo_id": repo.ID,
"content": string(charset.ToUTF8DropErrors(fileContents, charset.ConvertOpts{})),
"commit_id": sha,
"filename": update.Filename,
"language": analyze.GetCodeLanguage(update.Filename, fileContents),
"updated_at": timeutil.TimeStampNow(),
}),
@ -267,7 +292,6 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int)
panic(fmt.Sprintf("2===%#v", hit.Highlight))
}
repoID, fileName := internal.ParseIndexerID(hit.Id)
res := make(map[string]any)
if err := json.Unmarshal(hit.Source, &res); err != nil {
return 0, nil, nil, err
@ -276,8 +300,8 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int)
language := res["language"].(string)
hits = append(hits, &internal.SearchResult{
RepoID: repoID,
Filename: fileName,
RepoID: int64(res["repo_id"].(float64)),
Filename: res["filename"].(string),
CommitID: res["commit_id"].(string),
Content: res["content"].(string),
UpdatedUnix: timeutil.TimeStamp(res["updated_at"].(float64)),
@ -326,6 +350,9 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
repoQuery := elastic.NewTermsQuery("repo_id", repoStrs...)
query = query.Must(repoQuery)
}
if len(opts.Filename) > 0 {
query = query.Filter(elastic.NewTermsQuery("filename.tree", opts.Filename))
}
var (
start, pageSize = opts.GetSkipTake()