mirror of
				https://codeberg.org/forgejo/forgejo.git
				synced 2025-10-23 02:22:36 +00:00 
			
		
		
		
	* update bleve to master b17287a86f6cac923a5d886e10618df994eeb54b6724eac2e3b8dde89cfbe3a2 * remove unused pkg from dep file * change bleve from master to recent revision
		
			
				
	
	
		
			300 lines
		
	
	
	
		
			7.1 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			300 lines
		
	
	
	
		
			7.1 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| //  Copyright (c) 2017 Couchbase, Inc.
 | |
| //
 | |
| // Licensed under the Apache License, Version 2.0 (the "License");
 | |
| // you may not use this file except in compliance with the License.
 | |
| // You may obtain a copy of the License at
 | |
| //
 | |
| // 		http://www.apache.org/licenses/LICENSE-2.0
 | |
| //
 | |
| // Unless required by applicable law or agreed to in writing, software
 | |
| // distributed under the License is distributed on an "AS IS" BASIS,
 | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| // See the License for the specific language governing permissions and
 | |
| // limitations under the License.
 | |
| 
 | |
| package vellum
 | |
| 
 | |
| import (
 | |
| 	"io"
 | |
| 
 | |
| 	"github.com/willf/bitset"
 | |
| )
 | |
| 
 | |
| // FST is an in-memory representation of a finite state transducer,
 | |
| // capable of returning the uint64 value associated with
 | |
| // each []byte key stored, as well as enumerating all of the keys
 | |
| // in order.
 | |
| type FST struct {
 | |
| 	f       io.Closer
 | |
| 	ver     int
 | |
| 	len     int
 | |
| 	typ     int
 | |
| 	data    []byte
 | |
| 	decoder decoder
 | |
| }
 | |
| 
 | |
| func new(data []byte, f io.Closer) (rv *FST, err error) {
 | |
| 	rv = &FST{
 | |
| 		data: data,
 | |
| 		f:    f,
 | |
| 	}
 | |
| 
 | |
| 	rv.ver, rv.typ, err = decodeHeader(data)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	rv.decoder, err = loadDecoder(rv.ver, rv.data)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	rv.len = rv.decoder.getLen()
 | |
| 
 | |
| 	return rv, nil
 | |
| }
 | |
| 
 | |
| // Contains returns true if this FST contains the specified key.
 | |
| func (f *FST) Contains(val []byte) (bool, error) {
 | |
| 	_, exists, err := f.Get(val)
 | |
| 	return exists, err
 | |
| }
 | |
| 
 | |
| // Get returns the value associated with the key.  NOTE: a value of zero
 | |
| // does not imply the key does not exist, you must consult the second
 | |
| // return value as well.
 | |
| func (f *FST) Get(input []byte) (uint64, bool, error) {
 | |
| 	return f.get(input, nil)
 | |
| }
 | |
| 
 | |
| func (f *FST) get(input []byte, prealloc fstState) (uint64, bool, error) {
 | |
| 	var total uint64
 | |
| 	curr := f.decoder.getRoot()
 | |
| 	state, err := f.decoder.stateAt(curr, prealloc)
 | |
| 	if err != nil {
 | |
| 		return 0, false, err
 | |
| 	}
 | |
| 	for _, c := range input {
 | |
| 		_, curr, output := state.TransitionFor(c)
 | |
| 		if curr == noneAddr {
 | |
| 			return 0, false, nil
 | |
| 		}
 | |
| 
 | |
| 		state, err = f.decoder.stateAt(curr, state)
 | |
| 		if err != nil {
 | |
| 			return 0, false, err
 | |
| 		}
 | |
| 
 | |
| 		total += output
 | |
| 	}
 | |
| 
 | |
| 	if state.Final() {
 | |
| 		total += state.FinalOutput()
 | |
| 		return total, true, nil
 | |
| 	}
 | |
| 	return 0, false, nil
 | |
| }
 | |
| 
 | |
| // Version returns the encoding version used by this FST instance.
 | |
| func (f *FST) Version() int {
 | |
| 	return f.ver
 | |
| }
 | |
| 
 | |
| // Len returns the number of entries in this FST instance.
 | |
| func (f *FST) Len() int {
 | |
| 	return f.len
 | |
| }
 | |
| 
 | |
| // Type returns the type of this FST instance.
 | |
| func (f *FST) Type() int {
 | |
| 	return f.typ
 | |
| }
 | |
| 
 | |
| // Close will unmap any mmap'd data (if managed by vellum) and it will close
 | |
| // the backing file (if managed by vellum).  You MUST call Close() for any
 | |
| // FST instance that is created.
 | |
| func (f *FST) Close() error {
 | |
| 	if f.f != nil {
 | |
| 		err := f.f.Close()
 | |
| 		if err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 	}
 | |
| 	f.data = nil
 | |
| 	f.decoder = nil
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // Start returns the start state of this Automaton
 | |
| func (f *FST) Start() int {
 | |
| 	return f.decoder.getRoot()
 | |
| }
 | |
| 
 | |
| // IsMatch returns if this state is a matching state in this Automaton
 | |
| func (f *FST) IsMatch(addr int) bool {
 | |
| 	match, _ := f.IsMatchWithVal(addr)
 | |
| 	return match
 | |
| }
 | |
| 
 | |
| // CanMatch returns if this state can ever transition to a matching state
 | |
| // in this Automaton
 | |
| func (f *FST) CanMatch(addr int) bool {
 | |
| 	if addr == noneAddr {
 | |
| 		return false
 | |
| 	}
 | |
| 	return true
 | |
| }
 | |
| 
 | |
| // WillAlwaysMatch returns if from this state the Automaton will always
 | |
| // be in a matching state
 | |
| func (f *FST) WillAlwaysMatch(int) bool {
 | |
| 	return false
 | |
| }
 | |
| 
 | |
| // Accept returns the next state for this Automaton on input of byte b
 | |
| func (f *FST) Accept(addr int, b byte) int {
 | |
| 	next, _ := f.AcceptWithVal(addr, b)
 | |
| 	return next
 | |
| }
 | |
| 
 | |
| // IsMatchWithVal returns if this state is a matching state in this Automaton
 | |
| // and also returns the final output value for this state
 | |
| func (f *FST) IsMatchWithVal(addr int) (bool, uint64) {
 | |
| 	s, err := f.decoder.stateAt(addr, nil)
 | |
| 	if err != nil {
 | |
| 		return false, 0
 | |
| 	}
 | |
| 	return s.Final(), s.FinalOutput()
 | |
| }
 | |
| 
 | |
| // AcceptWithVal returns the next state for this Automaton on input of byte b
 | |
| // and also returns the output value for the transition
 | |
| func (f *FST) AcceptWithVal(addr int, b byte) (int, uint64) {
 | |
| 	s, err := f.decoder.stateAt(addr, nil)
 | |
| 	if err != nil {
 | |
| 		return noneAddr, 0
 | |
| 	}
 | |
| 	_, next, output := s.TransitionFor(b)
 | |
| 	return next, output
 | |
| }
 | |
| 
 | |
| // Iterator returns a new Iterator capable of enumerating the key/value pairs
 | |
| // between the provided startKeyInclusive and endKeyExclusive.
 | |
| func (f *FST) Iterator(startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) {
 | |
| 	return newIterator(f, startKeyInclusive, endKeyExclusive, nil)
 | |
| }
 | |
| 
 | |
| // Search returns a new Iterator capable of enumerating the key/value pairs
 | |
| // between the provided startKeyInclusive and endKeyExclusive that also
 | |
| // satisfy the provided automaton.
 | |
| func (f *FST) Search(aut Automaton, startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) {
 | |
| 	return newIterator(f, startKeyInclusive, endKeyExclusive, aut)
 | |
| }
 | |
| 
 | |
| // Debug is only intended for debug purposes, it simply asks the underlying
 | |
| // decoder visit each state, and pass it to the provided callback.
 | |
| func (f *FST) Debug(callback func(int, interface{}) error) error {
 | |
| 
 | |
| 	addr := f.decoder.getRoot()
 | |
| 	set := bitset.New(uint(addr))
 | |
| 	stack := addrStack{addr}
 | |
| 
 | |
| 	stateNumber := 0
 | |
| 	stack, addr = stack[:len(stack)-1], stack[len(stack)-1]
 | |
| 	for addr != noneAddr {
 | |
| 		if set.Test(uint(addr)) {
 | |
| 			stack, addr = stack.Pop()
 | |
| 			continue
 | |
| 		}
 | |
| 		set.Set(uint(addr))
 | |
| 		state, err := f.decoder.stateAt(addr, nil)
 | |
| 		if err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 		err = callback(stateNumber, state)
 | |
| 		if err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 		for i := 0; i < state.NumTransitions(); i++ {
 | |
| 			tchar := state.TransitionAt(i)
 | |
| 			_, dest, _ := state.TransitionFor(tchar)
 | |
| 			stack = append(stack, dest)
 | |
| 		}
 | |
| 		stateNumber++
 | |
| 		stack, addr = stack.Pop()
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| type addrStack []int
 | |
| 
 | |
| func (a addrStack) Pop() (addrStack, int) {
 | |
| 	l := len(a)
 | |
| 	if l < 1 {
 | |
| 		return a, noneAddr
 | |
| 	}
 | |
| 	return a[:l-1], a[l-1]
 | |
| }
 | |
| 
 | |
| // Reader() returns a Reader instance that a single thread may use to
 | |
| // retrieve data from the FST
 | |
| func (f *FST) Reader() (*Reader, error) {
 | |
| 	return &Reader{f: f}, nil
 | |
| }
 | |
| 
 | |
| func (f *FST) GetMinKey() ([]byte, error) {
 | |
| 	var rv []byte
 | |
| 
 | |
| 	curr := f.decoder.getRoot()
 | |
| 	state, err := f.decoder.stateAt(curr, nil)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	for !state.Final() {
 | |
| 		nextTrans := state.TransitionAt(0)
 | |
| 		_, curr, _ = state.TransitionFor(nextTrans)
 | |
| 		state, err = f.decoder.stateAt(curr, state)
 | |
| 		if err != nil {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 
 | |
| 		rv = append(rv, nextTrans)
 | |
| 	}
 | |
| 
 | |
| 	return rv, nil
 | |
| }
 | |
| 
 | |
| func (f *FST) GetMaxKey() ([]byte, error) {
 | |
| 	var rv []byte
 | |
| 
 | |
| 	curr := f.decoder.getRoot()
 | |
| 	state, err := f.decoder.stateAt(curr, nil)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	for state.NumTransitions() > 0 {
 | |
| 		nextTrans := state.TransitionAt(state.NumTransitions() - 1)
 | |
| 		_, curr, _ = state.TransitionFor(nextTrans)
 | |
| 		state, err = f.decoder.stateAt(curr, state)
 | |
| 		if err != nil {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 
 | |
| 		rv = append(rv, nextTrans)
 | |
| 	}
 | |
| 
 | |
| 	return rv, nil
 | |
| }
 | |
| 
 | |
| // A Reader is meant for a single threaded use
 | |
| type Reader struct {
 | |
| 	f        *FST
 | |
| 	prealloc fstStateV1
 | |
| }
 | |
| 
 | |
| func (r *Reader) Get(input []byte) (uint64, bool, error) {
 | |
| 	return r.f.get(input, &r.prealloc)
 | |
| }
 |