mirror of
				https://codeberg.org/forgejo/forgejo.git
				synced 2025-10-24 19:12:24 +00:00 
			
		
		
		
	* Dump: Use mholt/archive/v3 to support tar including many compressions Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: Allow dump output to stdout Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: Fixed bug present since #6677 where SessionConfig.Provider is never "file" Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: never pack RepoRootPath, LFS.ContentPath and LogRootPath when they are below AppDataPath Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: also dump LFS (fixes #10058) Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: never dump CustomPath if CustomPath is a subdir of or equal to AppDataPath (fixes #10365) Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Use log.Info instead of fmt.Fprintf Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * import ordering * make fmt Co-authored-by: zeripath <art27@cantab.net> Co-authored-by: techknowlogick <techknowlogick@gitea.io> Co-authored-by: Matti R <matti@mdranta.net>
		
			
				
	
	
		
			344 lines
		
	
	
	
		
			9.2 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
			
		
		
	
	
			344 lines
		
	
	
	
		
			9.2 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
| package brotli
 | |
| 
 | |
| import (
 | |
| 	"encoding/binary"
 | |
| 	"fmt"
 | |
| )
 | |
| 
 | |
| type hasherCommon struct {
 | |
| 	params           hasherParams
 | |
| 	is_prepared_     bool
 | |
| 	dict_num_lookups uint
 | |
| 	dict_num_matches uint
 | |
| }
 | |
| 
 | |
| func (h *hasherCommon) Common() *hasherCommon {
 | |
| 	return h
 | |
| }
 | |
| 
 | |
| type hasherHandle interface {
 | |
| 	Common() *hasherCommon
 | |
| 	Initialize(params *encoderParams)
 | |
| 	Prepare(one_shot bool, input_size uint, data []byte)
 | |
| 	StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint)
 | |
| 	HashTypeLength() uint
 | |
| 	StoreLookahead() uint
 | |
| 	PrepareDistanceCache(distance_cache []int)
 | |
| 	FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult)
 | |
| 	StoreRange(data []byte, mask uint, ix_start uint, ix_end uint)
 | |
| 	Store(data []byte, mask uint, ix uint)
 | |
| }
 | |
| 
 | |
| type score_t uint
 | |
| 
 | |
| const kCutoffTransformsCount uint32 = 10
 | |
| 
 | |
| /*   0,  12,   27,    23,    42,    63,    56,    48,    59,    64 */
 | |
| /* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */
 | |
| const kCutoffTransforms uint64 = 0x071B520ADA2D3200
 | |
| 
 | |
| type hasherSearchResult struct {
 | |
| 	len            uint
 | |
| 	distance       uint
 | |
| 	score          uint
 | |
| 	len_code_delta int
 | |
| }
 | |
| 
 | |
| /* kHashMul32 multiplier has these properties:
 | |
|    * The multiplier must be odd. Otherwise we may lose the highest bit.
 | |
|    * No long streaks of ones or zeros.
 | |
|    * There is no effort to ensure that it is a prime, the oddity is enough
 | |
|      for this use.
 | |
|    * The number has been tuned heuristically against compression benchmarks. */
 | |
| const kHashMul32 uint32 = 0x1E35A7BD
 | |
| 
 | |
| const kHashMul64 uint64 = 0x1E35A7BD1E35A7BD
 | |
| 
 | |
| const kHashMul64Long uint64 = 0x1FE35A7BD3579BD3
 | |
| 
 | |
| func hash14(data []byte) uint32 {
 | |
| 	var h uint32 = binary.LittleEndian.Uint32(data) * kHashMul32
 | |
| 
 | |
| 	/* The higher bits contain more mixture from the multiplication,
 | |
| 	   so we take our results from there. */
 | |
| 	return h >> (32 - 14)
 | |
| }
 | |
| 
 | |
| func prepareDistanceCache(distance_cache []int, num_distances int) {
 | |
| 	if num_distances > 4 {
 | |
| 		var last_distance int = distance_cache[0]
 | |
| 		distance_cache[4] = last_distance - 1
 | |
| 		distance_cache[5] = last_distance + 1
 | |
| 		distance_cache[6] = last_distance - 2
 | |
| 		distance_cache[7] = last_distance + 2
 | |
| 		distance_cache[8] = last_distance - 3
 | |
| 		distance_cache[9] = last_distance + 3
 | |
| 		if num_distances > 10 {
 | |
| 			var next_last_distance int = distance_cache[1]
 | |
| 			distance_cache[10] = next_last_distance - 1
 | |
| 			distance_cache[11] = next_last_distance + 1
 | |
| 			distance_cache[12] = next_last_distance - 2
 | |
| 			distance_cache[13] = next_last_distance + 2
 | |
| 			distance_cache[14] = next_last_distance - 3
 | |
| 			distance_cache[15] = next_last_distance + 3
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| const literalByteScore = 135
 | |
| 
 | |
| const distanceBitPenalty = 30
 | |
| 
 | |
| /* Score must be positive after applying maximal penalty. */
 | |
| const scoreBase = (distanceBitPenalty * 8 * 8)
 | |
| 
 | |
| /* Usually, we always choose the longest backward reference. This function
 | |
|    allows for the exception of that rule.
 | |
| 
 | |
|    If we choose a backward reference that is further away, it will
 | |
|    usually be coded with more bits. We approximate this by assuming
 | |
|    log2(distance). If the distance can be expressed in terms of the
 | |
|    last four distances, we use some heuristic constants to estimate
 | |
|    the bits cost. For the first up to four literals we use the bit
 | |
|    cost of the literals from the literal cost model, after that we
 | |
|    use the average bit cost of the cost model.
 | |
| 
 | |
|    This function is used to sometimes discard a longer backward reference
 | |
|    when it is not much longer and the bit cost for encoding it is more
 | |
|    than the saved literals.
 | |
| 
 | |
|    backward_reference_offset MUST be positive. */
 | |
| func backwardReferenceScore(copy_length uint, backward_reference_offset uint) uint {
 | |
| 	return scoreBase + literalByteScore*uint(copy_length) - distanceBitPenalty*uint(log2FloorNonZero(backward_reference_offset))
 | |
| }
 | |
| 
 | |
| func backwardReferenceScoreUsingLastDistance(copy_length uint) uint {
 | |
| 	return literalByteScore*uint(copy_length) + scoreBase + 15
 | |
| }
 | |
| 
 | |
| func backwardReferencePenaltyUsingLastDistance(distance_short_code uint) uint {
 | |
| 	return uint(39) + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE)
 | |
| }
 | |
| 
 | |
| func testStaticDictionaryItem(dictionary *encoderDictionary, item uint, data []byte, max_length uint, max_backward uint, max_distance uint, out *hasherSearchResult) bool {
 | |
| 	var len uint
 | |
| 	var word_idx uint
 | |
| 	var offset uint
 | |
| 	var matchlen uint
 | |
| 	var backward uint
 | |
| 	var score uint
 | |
| 	len = item & 0x1F
 | |
| 	word_idx = item >> 5
 | |
| 	offset = uint(dictionary.words.offsets_by_length[len]) + len*word_idx
 | |
| 	if len > max_length {
 | |
| 		return false
 | |
| 	}
 | |
| 
 | |
| 	matchlen = findMatchLengthWithLimit(data, dictionary.words.data[offset:], uint(len))
 | |
| 	if matchlen+uint(dictionary.cutoffTransformsCount) <= len || matchlen == 0 {
 | |
| 		return false
 | |
| 	}
 | |
| 	{
 | |
| 		var cut uint = len - matchlen
 | |
| 		var transform_id uint = (cut << 2) + uint((dictionary.cutoffTransforms>>(cut*6))&0x3F)
 | |
| 		backward = max_backward + 1 + word_idx + (transform_id << dictionary.words.size_bits_by_length[len])
 | |
| 	}
 | |
| 
 | |
| 	if backward > max_distance {
 | |
| 		return false
 | |
| 	}
 | |
| 
 | |
| 	score = backwardReferenceScore(matchlen, backward)
 | |
| 	if score < out.score {
 | |
| 		return false
 | |
| 	}
 | |
| 
 | |
| 	out.len = matchlen
 | |
| 	out.len_code_delta = int(len) - int(matchlen)
 | |
| 	out.distance = backward
 | |
| 	out.score = score
 | |
| 	return true
 | |
| }
 | |
| 
 | |
| func searchInStaticDictionary(dictionary *encoderDictionary, handle hasherHandle, data []byte, max_length uint, max_backward uint, max_distance uint, out *hasherSearchResult, shallow bool) {
 | |
| 	var key uint
 | |
| 	var i uint
 | |
| 	var self *hasherCommon = handle.Common()
 | |
| 	if self.dict_num_matches < self.dict_num_lookups>>7 {
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	key = uint(hash14(data) << 1)
 | |
| 	for i = 0; ; (func() { i++; key++ })() {
 | |
| 		var tmp uint
 | |
| 		if shallow {
 | |
| 			tmp = 1
 | |
| 		} else {
 | |
| 			tmp = 2
 | |
| 		}
 | |
| 		if i >= tmp {
 | |
| 			break
 | |
| 		}
 | |
| 		var item uint = uint(dictionary.hash_table[key])
 | |
| 		self.dict_num_lookups++
 | |
| 		if item != 0 {
 | |
| 			var item_matches bool = testStaticDictionaryItem(dictionary, item, data, max_length, max_backward, max_distance, out)
 | |
| 			if item_matches {
 | |
| 				self.dict_num_matches++
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| type backwardMatch struct {
 | |
| 	distance        uint32
 | |
| 	length_and_code uint32
 | |
| }
 | |
| 
 | |
| func initBackwardMatch(self *backwardMatch, dist uint, len uint) {
 | |
| 	self.distance = uint32(dist)
 | |
| 	self.length_and_code = uint32(len << 5)
 | |
| }
 | |
| 
 | |
| func initDictionaryBackwardMatch(self *backwardMatch, dist uint, len uint, len_code uint) {
 | |
| 	self.distance = uint32(dist)
 | |
| 	var tmp uint
 | |
| 	if len == len_code {
 | |
| 		tmp = 0
 | |
| 	} else {
 | |
| 		tmp = len_code
 | |
| 	}
 | |
| 	self.length_and_code = uint32(len<<5 | tmp)
 | |
| }
 | |
| 
 | |
| func backwardMatchLength(self *backwardMatch) uint {
 | |
| 	return uint(self.length_and_code >> 5)
 | |
| }
 | |
| 
 | |
| func backwardMatchLengthCode(self *backwardMatch) uint {
 | |
| 	var code uint = uint(self.length_and_code) & 31
 | |
| 	if code != 0 {
 | |
| 		return code
 | |
| 	} else {
 | |
| 		return backwardMatchLength(self)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func hasherReset(handle hasherHandle) {
 | |
| 	if handle == nil {
 | |
| 		return
 | |
| 	}
 | |
| 	handle.Common().is_prepared_ = false
 | |
| }
 | |
| 
 | |
| func newHasher(typ int) hasherHandle {
 | |
| 	switch typ {
 | |
| 	case 2:
 | |
| 		return &hashLongestMatchQuickly{
 | |
| 			bucketBits:    16,
 | |
| 			bucketSweep:   1,
 | |
| 			hashLen:       5,
 | |
| 			useDictionary: true,
 | |
| 		}
 | |
| 	case 3:
 | |
| 		return &hashLongestMatchQuickly{
 | |
| 			bucketBits:    16,
 | |
| 			bucketSweep:   2,
 | |
| 			hashLen:       5,
 | |
| 			useDictionary: false,
 | |
| 		}
 | |
| 	case 4:
 | |
| 		return &hashLongestMatchQuickly{
 | |
| 			bucketBits:    17,
 | |
| 			bucketSweep:   4,
 | |
| 			hashLen:       5,
 | |
| 			useDictionary: true,
 | |
| 		}
 | |
| 	case 5:
 | |
| 		return new(h5)
 | |
| 	case 6:
 | |
| 		return new(h6)
 | |
| 	case 10:
 | |
| 		return new(h10)
 | |
| 	case 35:
 | |
| 		return &hashComposite{
 | |
| 			ha: newHasher(3),
 | |
| 			hb: &hashRolling{jump: 4},
 | |
| 		}
 | |
| 	case 40:
 | |
| 		return &hashForgetfulChain{
 | |
| 			bucketBits:              15,
 | |
| 			numBanks:                1,
 | |
| 			bankBits:                16,
 | |
| 			numLastDistancesToCheck: 4,
 | |
| 		}
 | |
| 	case 41:
 | |
| 		return &hashForgetfulChain{
 | |
| 			bucketBits:              15,
 | |
| 			numBanks:                1,
 | |
| 			bankBits:                16,
 | |
| 			numLastDistancesToCheck: 10,
 | |
| 		}
 | |
| 	case 42:
 | |
| 		return &hashForgetfulChain{
 | |
| 			bucketBits:              15,
 | |
| 			numBanks:                512,
 | |
| 			bankBits:                9,
 | |
| 			numLastDistancesToCheck: 16,
 | |
| 		}
 | |
| 	case 54:
 | |
| 		return &hashLongestMatchQuickly{
 | |
| 			bucketBits:    20,
 | |
| 			bucketSweep:   4,
 | |
| 			hashLen:       7,
 | |
| 			useDictionary: false,
 | |
| 		}
 | |
| 	case 55:
 | |
| 		return &hashComposite{
 | |
| 			ha: newHasher(54),
 | |
| 			hb: &hashRolling{jump: 4},
 | |
| 		}
 | |
| 	case 65:
 | |
| 		return &hashComposite{
 | |
| 			ha: newHasher(6),
 | |
| 			hb: &hashRolling{jump: 1},
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	panic(fmt.Sprintf("unknown hasher type: %d", typ))
 | |
| }
 | |
| 
 | |
| func hasherSetup(handle *hasherHandle, params *encoderParams, data []byte, position uint, input_size uint, is_last bool) {
 | |
| 	var self hasherHandle = nil
 | |
| 	var common *hasherCommon = nil
 | |
| 	var one_shot bool = (position == 0 && is_last)
 | |
| 	if *handle == nil {
 | |
| 		chooseHasher(params, ¶ms.hasher)
 | |
| 		self = newHasher(params.hasher.type_)
 | |
| 
 | |
| 		*handle = self
 | |
| 		common = self.Common()
 | |
| 		common.params = params.hasher
 | |
| 		self.Initialize(params)
 | |
| 	}
 | |
| 
 | |
| 	self = *handle
 | |
| 	common = self.Common()
 | |
| 	if !common.is_prepared_ {
 | |
| 		self.Prepare(one_shot, input_size, data)
 | |
| 
 | |
| 		if position == 0 {
 | |
| 			common.dict_num_lookups = 0
 | |
| 			common.dict_num_matches = 0
 | |
| 		}
 | |
| 
 | |
| 		common.is_prepared_ = true
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func initOrStitchToPreviousBlock(handle *hasherHandle, data []byte, mask uint, params *encoderParams, position uint, input_size uint, is_last bool) {
 | |
| 	var self hasherHandle
 | |
| 	hasherSetup(handle, params, data, position, input_size, is_last)
 | |
| 	self = *handle
 | |
| 	self.StitchToPreviousBlock(input_size, position, data, mask)
 | |
| }
 |