fix: skip empty tokens in SearchOptions.Tokens() (#8261)
Query string tokenizer could return a list containing empty tokens when the query string was `\` or `"` (probably in other scenarios as well). This seems undesirable and is what triggered #8260, but I'm posting this separately from that fix in case I'm wrong. Feel free to reject if so. The actual change in behavior is that now searching for `\` or `"` behaves the same as if the query were empty (the bleve/elastic code checks that the tokenizer actually returned, anything rather than just query being non-empty). ### Tests - I added test coverage for Go changes... - [x] in their respective `*_test.go` for unit tests. ### Documentation - [ ] I created a pull request [to the documentation](https://codeberg.org/forgejo/docs) to explain to Forgejo users how to use this change. - [x] I did not document these changes and I do not expect someone else to do it. ### Release notes - [x] I do not want this change to show in the release notes. - [ ] I want the title to show in the release notes with a link to this pull request. - [ ] I want the content of the `release-notes/<pull request number>.md` to be be used for the release notes instead of the title. Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/8261 Reviewed-by: Shiny Nematoda <snematoda@noreply.codeberg.org> Co-authored-by: Danko Aleksejevs <danko@very.lv> Co-committed-by: Danko Aleksejevs <danko@very.lv>
This commit is contained in:
		
					parent
					
						
							
								bc2e4942fc
							
						
					
				
			
			
				commit
				
					
						4935e6e1a3
					
				
			
		
					 5 changed files with 146 additions and 19 deletions
				
			
		| 
						 | 
				
			
			@ -156,11 +156,12 @@ func (b *Indexer) Delete(_ context.Context, ids ...int64) error {
 | 
			
		|||
func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
 | 
			
		||||
	var queries []query.Query
 | 
			
		||||
 | 
			
		||||
	if options.Keyword != "" {
 | 
			
		||||
	tokens, err := options.Tokens()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if len(tokens) > 0 {
 | 
			
		||||
		q := bleve.NewBooleanQuery()
 | 
			
		||||
		for _, token := range tokens {
 | 
			
		||||
			innerQ := bleve.NewDisjunctionQuery(
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -149,12 +149,13 @@ func (b *Indexer) Delete(ctx context.Context, ids ...int64) error {
 | 
			
		|||
func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
 | 
			
		||||
	query := elastic.NewBoolQuery()
 | 
			
		||||
 | 
			
		||||
	if options.Keyword != "" {
 | 
			
		||||
		q := elastic.NewBoolQuery()
 | 
			
		||||
	tokens, err := options.Tokens()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if len(tokens) > 0 {
 | 
			
		||||
		q := elastic.NewBoolQuery()
 | 
			
		||||
		for _, token := range tokens {
 | 
			
		||||
			innerQ := elastic.NewMultiMatchQuery(token.Term, "content", "comments").FieldWithBoost("title", 2.0).TieBreaker(0.5)
 | 
			
		||||
			if token.Fuzzy {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -45,13 +45,10 @@ func (t *Tokenizer) next() (tk Token, err error) {
 | 
			
		|||
 | 
			
		||||
	// skip all leading white space
 | 
			
		||||
	for {
 | 
			
		||||
		if r, _, err = t.in.ReadRune(); err == nil && r == ' ' {
 | 
			
		||||
			//nolint:staticcheck,wastedassign // SA4006 the variable is used after the loop
 | 
			
		||||
			r, _, err = t.in.ReadRune()
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		if r, _, err = t.in.ReadRune(); err != nil || r != ' ' {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return tk, err
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -107,12 +104,18 @@ nextEnd:
 | 
			
		|||
 | 
			
		||||
// Tokenize the keyword
 | 
			
		||||
func (o *SearchOptions) Tokens() (tokens []Token, err error) {
 | 
			
		||||
	if o.Keyword == "" {
 | 
			
		||||
		return nil, nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	in := strings.NewReader(o.Keyword)
 | 
			
		||||
	it := Tokenizer{in: in}
 | 
			
		||||
 | 
			
		||||
	for token, err := it.next(); err == nil; token, err = it.next() {
 | 
			
		||||
		if token.Term != "" {
 | 
			
		||||
			tokens = append(tokens, token)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if err != nil && err != io.EOF {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -41,6 +41,36 @@ var testOpts = []testIssueQueryStringOpt{
 | 
			
		|||
			},
 | 
			
		||||
		},
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		Keyword: "Hello  World",
 | 
			
		||||
		Results: []Token{
 | 
			
		||||
			{
 | 
			
		||||
				Term:  "Hello",
 | 
			
		||||
				Fuzzy: true,
 | 
			
		||||
				Kind:  BoolOptShould,
 | 
			
		||||
			},
 | 
			
		||||
			{
 | 
			
		||||
				Term:  "World",
 | 
			
		||||
				Fuzzy: true,
 | 
			
		||||
				Kind:  BoolOptShould,
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		Keyword: " Hello World ",
 | 
			
		||||
		Results: []Token{
 | 
			
		||||
			{
 | 
			
		||||
				Term:  "Hello",
 | 
			
		||||
				Fuzzy: true,
 | 
			
		||||
				Kind:  BoolOptShould,
 | 
			
		||||
			},
 | 
			
		||||
			{
 | 
			
		||||
				Term:  "World",
 | 
			
		||||
				Fuzzy: true,
 | 
			
		||||
				Kind:  BoolOptShould,
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		Keyword: "+Hello +World",
 | 
			
		||||
		Results: []Token{
 | 
			
		||||
| 
						 | 
				
			
			@ -156,6 +186,68 @@ var testOpts = []testIssueQueryStringOpt{
 | 
			
		|||
			},
 | 
			
		||||
		},
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		Keyword: "\\",
 | 
			
		||||
		Results: nil,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		Keyword: "\"",
 | 
			
		||||
		Results: nil,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		Keyword: "Hello \\",
 | 
			
		||||
		Results: []Token{
 | 
			
		||||
			{
 | 
			
		||||
				Term:  "Hello",
 | 
			
		||||
				Fuzzy: true,
 | 
			
		||||
				Kind:  BoolOptShould,
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		Keyword: "\"\"",
 | 
			
		||||
		Results: nil,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		Keyword: "\" World \"",
 | 
			
		||||
		Results: []Token{
 | 
			
		||||
			{
 | 
			
		||||
				Term:  " World ",
 | 
			
		||||
				Fuzzy: false,
 | 
			
		||||
				Kind:  BoolOptShould,
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		Keyword: "\"\" World \"\"",
 | 
			
		||||
		Results: []Token{
 | 
			
		||||
			{
 | 
			
		||||
				Term:  "World",
 | 
			
		||||
				Fuzzy: true,
 | 
			
		||||
				Kind:  BoolOptShould,
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		Keyword: "Best \"Hello World\" Ever",
 | 
			
		||||
		Results: []Token{
 | 
			
		||||
			{
 | 
			
		||||
				Term:  "Best",
 | 
			
		||||
				Fuzzy: true,
 | 
			
		||||
				Kind:  BoolOptShould,
 | 
			
		||||
			},
 | 
			
		||||
			{
 | 
			
		||||
				Term:  "Hello World",
 | 
			
		||||
				Fuzzy: false,
 | 
			
		||||
				Kind:  BoolOptShould,
 | 
			
		||||
			},
 | 
			
		||||
			{
 | 
			
		||||
				Term:  "Ever",
 | 
			
		||||
				Fuzzy: true,
 | 
			
		||||
				Kind:  BoolOptShould,
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
	},
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestIssueQueryString(t *testing.T) {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -87,14 +87,44 @@ func TestIndexer(t *testing.T, indexer internal.Indexer) {
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func allResults(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
 | 
			
		||||
	assert.Len(t, result.Hits, len(data))
 | 
			
		||||
	assert.Equal(t, len(data), int(result.Total))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var cases = []*testIndexerCase{
 | 
			
		||||
	{
 | 
			
		||||
		Name:          "default",
 | 
			
		||||
		SearchOptions: &internal.SearchOptions{},
 | 
			
		||||
		Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) {
 | 
			
		||||
			assert.Len(t, result.Hits, len(data))
 | 
			
		||||
			assert.Equal(t, len(data), int(result.Total))
 | 
			
		||||
		Expected:      allResults,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		Name: "empty keyword",
 | 
			
		||||
		SearchOptions: &internal.SearchOptions{
 | 
			
		||||
			Keyword: "",
 | 
			
		||||
		},
 | 
			
		||||
		Expected: allResults,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		Name: "whitespace keyword",
 | 
			
		||||
		SearchOptions: &internal.SearchOptions{
 | 
			
		||||
			Keyword: "    ",
 | 
			
		||||
		},
 | 
			
		||||
		Expected: allResults,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		Name: "dangling slash in keyword",
 | 
			
		||||
		SearchOptions: &internal.SearchOptions{
 | 
			
		||||
			Keyword: "\\",
 | 
			
		||||
		},
 | 
			
		||||
		Expected: allResults,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		Name: "dangling quote in keyword",
 | 
			
		||||
		SearchOptions: &internal.SearchOptions{
 | 
			
		||||
			Keyword: "\"",
 | 
			
		||||
		},
 | 
			
		||||
		Expected: allResults,
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		Name: "empty",
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue