mirror of
				https://codeberg.org/forgejo/forgejo.git
				synced 2025-11-04 08:21:11 +00:00 
			
		
		
		
	Added support for searching content in a specific directory or file. Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/6143 Reviewed-by: Gusted <gusted@noreply.codeberg.org> Reviewed-by: 0ko <0ko@noreply.codeberg.org> Co-authored-by: Shiny Nematoda <snematoda.751k2@aleeas.com> Co-committed-by: Shiny Nematoda <snematoda.751k2@aleeas.com>
		
			
				
	
	
		
			69 lines
		
	
	
	
		
			1.8 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			69 lines
		
	
	
	
		
			1.8 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
// Copyright 2024 The Forgejo Authors. All rights reserved.
 | 
						|
// SPDX-License-Identifier: MIT
 | 
						|
 | 
						|
package hierarchy
 | 
						|
 | 
						|
import (
 | 
						|
	"bytes"
 | 
						|
 | 
						|
	"github.com/blevesearch/bleve/v2/analysis"
 | 
						|
	"github.com/blevesearch/bleve/v2/registry"
 | 
						|
)
 | 
						|
 | 
						|
const Name = "path_hierarchy"
 | 
						|
 | 
						|
type PathHierarchyTokenizer struct{}
 | 
						|
 | 
						|
// Similar to elastic's path_hierarchy tokenizer
 | 
						|
// This tokenizes a given path into all the possible hierarchies
 | 
						|
// For example,
 | 
						|
// modules/indexer/code/search.go =>
 | 
						|
//
 | 
						|
//	modules/
 | 
						|
//	modules/indexer
 | 
						|
//	modules/indexer/code
 | 
						|
//	modules/indexer/code/search.go
 | 
						|
func (t *PathHierarchyTokenizer) Tokenize(input []byte) analysis.TokenStream {
 | 
						|
	// trim any extra slashes
 | 
						|
	input = bytes.Trim(input, "/")
 | 
						|
 | 
						|
	// zero allocations until the nested directories exceed a depth of 8 (which is unlikely)
 | 
						|
	rv := make(analysis.TokenStream, 0, 8)
 | 
						|
	count, off := 1, 0
 | 
						|
 | 
						|
	// iterate till all directory seperators
 | 
						|
	for i := bytes.IndexRune(input[off:], '/'); i != -1; i = bytes.IndexRune(input[off:], '/') {
 | 
						|
		// the index is relative to input[offest...]
 | 
						|
		// add this index to the accumlated offset to get the index of the current seperator in input[0...]
 | 
						|
		off += i
 | 
						|
		rv = append(rv, &analysis.Token{
 | 
						|
			Term:     input[:off], // take the slice, input[0...index of seperator]
 | 
						|
			Start:    0,
 | 
						|
			End:      off,
 | 
						|
			Position: count,
 | 
						|
			Type:     analysis.AlphaNumeric,
 | 
						|
		})
 | 
						|
		// increment the offset after considering the seperator
 | 
						|
		off++
 | 
						|
		count++
 | 
						|
	}
 | 
						|
 | 
						|
	// the entire file path should always be the last token
 | 
						|
	rv = append(rv, &analysis.Token{
 | 
						|
		Term:     input,
 | 
						|
		Start:    0,
 | 
						|
		End:      len(input),
 | 
						|
		Position: count,
 | 
						|
		Type:     analysis.AlphaNumeric,
 | 
						|
	})
 | 
						|
 | 
						|
	return rv
 | 
						|
}
 | 
						|
 | 
						|
func TokenizerConstructor(config map[string]any, cache *registry.Cache) (analysis.Tokenizer, error) {
 | 
						|
	return &PathHierarchyTokenizer{}, nil
 | 
						|
}
 | 
						|
 | 
						|
func init() {
 | 
						|
	registry.RegisterTokenizer(Name, TokenizerConstructor)
 | 
						|
}
 |