mirror of
				https://codeberg.org/forgejo/forgejo.git
				synced 2025-10-25 11:33:11 +00:00 
			
		
		
		
	This PR contains the following updates:
| Package | Type | Update | Change |
|---|---|---|---|
| [github.com/blevesearch/bleve/v2](https://github.com/blevesearch/bleve) | require | minor | `v2.4.4` -> `v2.5.0` |
---
### Release Notes
<details>
<summary>blevesearch/bleve (github.com/blevesearch/bleve/v2)</summary>
### [`v2.5.0`](https://github.com/blevesearch/bleve/releases/tag/v2.5.0)
[Compare Source](https://github.com/blevesearch/bleve/compare/v2.4.4...v2.5.0)
##### Bug Fixes
-   Exact hits to score higher than fuzzy hits, with https://github.com/blevesearch/bleve/pull/2056
-   Fix boosting during hybrid search that involves text + nearest neighbor, with https://github.com/blevesearch/bleve/pull/2127
-   Addressed bug in IP field handling while highlighting, with https://github.com/blevesearch/bleve/pull/2142
-   Graceful error handling within registry, with https://github.com/blevesearch/bleve/pull/2151
-   `http/` package (meant for demo purposes) removed from repository to remove vulnerability - [CVE-2022-31022](https://github.com/blevesearch/bleve/security/advisories/GHSA-9w9f-6mg8-jp7w), relocated to within https://github.com/blevesearch/bleve-explorer
-   Geo radius queries will now advertise distances (within sort values) in readable format, with https://github.com/blevesearch/bleve/pull/2137
##### Improvements
-   Vector search requires `faiss` dynamic library to be built from [blevesearch/faiss@352484e](352484e0fc) which is a modified version of [v1.10.0](https://github.com/facebookresearch/faiss/releases/tag/v1.10.0)
-   Support for **BM25 scoring**, see: [scoring.md](https://github.com/blevesearch/bleve/blob/v2.5.0/docs/scoring.md#bm25)
-   Support for **synonyms' search**, see: [synonyms.md](https://github.com/blevesearch/bleve/blob/v2.5.0/docs/synonyms.md)
-   **Significant performance improvements in pre-filtered vector search**, with https://github.com/blevesearch/bleve/pull/2169 + dependent changes
-   `auto` fuzziness detection with https://github.com/blevesearch/bleve/pull/2060
-   Ability to affect ingestion/drain rate by tuning persister workers with https://github.com/blevesearch/bleve/pull/2100
-   Additional config in merge policy for improved merger behavior, with https://github.com/blevesearch/bleve/pull/2134
-   Geo improvements: footprint reduction for polygons, better validation and graceful error handling, with https://github.com/blevesearch/bleve/pull/2162 + https://github.com/blevesearch/bleve/pull/2158 + https://github.com/blevesearch/bleve/pull/2165
-   Upgrade to RoaringBitmap/roaring@v2.4.5, etcd.io/bbolt@v1.4.0
-   More metrics
##### Milestone
-   [v2.5.0](https://github.com/blevesearch/bleve/milestone/24)
</details>
---
### Configuration
📅 **Schedule**: Branch creation - "* 0-3 * * *" (UTC), Automerge - "* 0-3 * * *" (UTC).
🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied.
♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox.
🔕 **Ignore**: Close this PR and you won't be reminded about this update again.
---
 - [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check this box
---
This PR has been generated by [Renovate Bot](https://github.com/renovatebot/renovate).
<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzOS4yMjIuMSIsInVwZGF0ZWRJblZlciI6IjM5LjIyMi4xIiwidGFyZ2V0QnJhbmNoIjoiZm9yZ2VqbyIsImxhYmVscyI6WyJkZXBlbmRlbmN5LXVwZ3JhZGUiLCJ0ZXN0L25vdC1uZWVkZWQiXX0=-->
Co-authored-by: Gusted <postmaster@gusted.xyz>
Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/7468
Reviewed-by: Gusted <gusted@noreply.codeberg.org>
Reviewed-by: Shiny Nematoda <snematoda@noreply.codeberg.org>
Co-authored-by: Renovate Bot <forgejo-renovate-action@forgejo.org>
Co-committed-by: Renovate Bot <forgejo-renovate-action@forgejo.org>
		
	
			
		
			
				
	
	
		
			71 lines
		
	
	
	
		
			1.9 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			71 lines
		
	
	
	
		
			1.9 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright 2024 The Forgejo Authors. All rights reserved.
 | |
| // SPDX-License-Identifier: MIT
 | |
| 
 | |
| package hierarchy
 | |
| 
 | |
| import (
 | |
| 	"bytes"
 | |
| 
 | |
| 	"github.com/blevesearch/bleve/v2/analysis"
 | |
| 	"github.com/blevesearch/bleve/v2/registry"
 | |
| )
 | |
| 
 | |
| const Name = "path_hierarchy"
 | |
| 
 | |
| type PathHierarchyTokenizer struct{}
 | |
| 
 | |
| // Similar to elastic's path_hierarchy tokenizer
 | |
| // This tokenizes a given path into all the possible hierarchies
 | |
| // For example,
 | |
| // modules/indexer/code/search.go =>
 | |
| //
 | |
| //	modules/
 | |
| //	modules/indexer
 | |
| //	modules/indexer/code
 | |
| //	modules/indexer/code/search.go
 | |
| func (t *PathHierarchyTokenizer) Tokenize(input []byte) analysis.TokenStream {
 | |
| 	// trim any extra slashes
 | |
| 	input = bytes.Trim(input, "/")
 | |
| 
 | |
| 	// zero allocations until the nested directories exceed a depth of 8 (which is unlikely)
 | |
| 	rv := make(analysis.TokenStream, 0, 8)
 | |
| 	count, off := 1, 0
 | |
| 
 | |
| 	// iterate till all directory separators
 | |
| 	for i := bytes.IndexRune(input[off:], '/'); i != -1; i = bytes.IndexRune(input[off:], '/') {
 | |
| 		// the index is relative to input[offset...]
 | |
| 		// add this index to the accumulated offset to get the index of the current separator in input[0...]
 | |
| 		off += i
 | |
| 		rv = append(rv, &analysis.Token{
 | |
| 			Term:     input[:off], // take the slice, input[0...index of separator]
 | |
| 			Start:    0,
 | |
| 			End:      off,
 | |
| 			Position: count,
 | |
| 			Type:     analysis.AlphaNumeric,
 | |
| 		})
 | |
| 		// increment the offset after considering the separator
 | |
| 		off++
 | |
| 		count++
 | |
| 	}
 | |
| 
 | |
| 	// the entire file path should always be the last token
 | |
| 	rv = append(rv, &analysis.Token{
 | |
| 		Term:     input,
 | |
| 		Start:    0,
 | |
| 		End:      len(input),
 | |
| 		Position: count,
 | |
| 		Type:     analysis.AlphaNumeric,
 | |
| 	})
 | |
| 
 | |
| 	return rv
 | |
| }
 | |
| 
 | |
| func TokenizerConstructor(config map[string]any, cache *registry.Cache) (analysis.Tokenizer, error) {
 | |
| 	return &PathHierarchyTokenizer{}, nil
 | |
| }
 | |
| 
 | |
| func init() {
 | |
| 	if err := registry.RegisterTokenizer(Name, TokenizerConstructor); err != nil {
 | |
| 		panic(err)
 | |
| 	}
 | |
| }
 |