feat: filepath filter for code search (#6143)
Added support for searching content in a specific directory or file. Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/6143 Reviewed-by: Gusted <gusted@noreply.codeberg.org> Reviewed-by: 0ko <0ko@noreply.codeberg.org> Co-authored-by: Shiny Nematoda <snematoda.751k2@aleeas.com> Co-committed-by: Shiny Nematoda <snematoda.751k2@aleeas.com>
This commit is contained in:
parent
bb88e1daf8
commit
ee214cb886
19 changed files with 342 additions and 61 deletions
|
@ -30,7 +30,7 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
esRepoIndexerLatestVersion = 1
|
||||
esRepoIndexerLatestVersion = 2
|
||||
// multi-match-types, currently only 2 types are used
|
||||
// Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types
|
||||
esMultiMatchTypeBestFields = "best_fields"
|
||||
|
@ -57,6 +57,21 @@ func NewIndexer(url, indexerName string) *Indexer {
|
|||
|
||||
const (
|
||||
defaultMapping = `{
|
||||
"settings": {
|
||||
"analysis": {
|
||||
"analyzer": {
|
||||
"custom_path_tree": {
|
||||
"tokenizer": "custom_hierarchy"
|
||||
}
|
||||
},
|
||||
"tokenizer": {
|
||||
"custom_hierarchy": {
|
||||
"type": "path_hierarchy",
|
||||
"delimiter": "/"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"repo_id": {
|
||||
|
@ -72,6 +87,15 @@ const (
|
|||
"type": "keyword",
|
||||
"index": true
|
||||
},
|
||||
"filename": {
|
||||
"type": "text",
|
||||
"fields": {
|
||||
"tree": {
|
||||
"type": "text",
|
||||
"analyzer": "custom_path_tree"
|
||||
}
|
||||
}
|
||||
},
|
||||
"language": {
|
||||
"type": "keyword",
|
||||
"index": true
|
||||
|
@ -138,6 +162,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro
|
|||
"repo_id": repo.ID,
|
||||
"content": string(charset.ToUTF8DropErrors(fileContents, charset.ConvertOpts{})),
|
||||
"commit_id": sha,
|
||||
"filename": update.Filename,
|
||||
"language": analyze.GetCodeLanguage(update.Filename, fileContents),
|
||||
"updated_at": timeutil.TimeStampNow(),
|
||||
}),
|
||||
|
@ -267,7 +292,6 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int)
|
|||
panic(fmt.Sprintf("2===%#v", hit.Highlight))
|
||||
}
|
||||
|
||||
repoID, fileName := internal.ParseIndexerID(hit.Id)
|
||||
res := make(map[string]any)
|
||||
if err := json.Unmarshal(hit.Source, &res); err != nil {
|
||||
return 0, nil, nil, err
|
||||
|
@ -276,8 +300,8 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int)
|
|||
language := res["language"].(string)
|
||||
|
||||
hits = append(hits, &internal.SearchResult{
|
||||
RepoID: repoID,
|
||||
Filename: fileName,
|
||||
RepoID: int64(res["repo_id"].(float64)),
|
||||
Filename: res["filename"].(string),
|
||||
CommitID: res["commit_id"].(string),
|
||||
Content: res["content"].(string),
|
||||
UpdatedUnix: timeutil.TimeStamp(res["updated_at"].(float64)),
|
||||
|
@ -326,6 +350,9 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
|
|||
repoQuery := elastic.NewTermsQuery("repo_id", repoStrs...)
|
||||
query = query.Must(repoQuery)
|
||||
}
|
||||
if len(opts.Filename) > 0 {
|
||||
query = query.Filter(elastic.NewTermsQuery("filename.tree", opts.Filename))
|
||||
}
|
||||
|
||||
var (
|
||||
start, pageSize = opts.GetSkipTake()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue