mirror of
				https://codeberg.org/forgejo/forgejo.git
				synced 2025-11-04 08:21:11 +00:00 
			
		
		
		
	* Dump: Use mholt/archive/v3 to support tar including many compressions Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: Allow dump output to stdout Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: Fixed bug present since #6677 where SessionConfig.Provider is never "file" Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: never pack RepoRootPath, LFS.ContentPath and LogRootPath when they are below AppDataPath Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: also dump LFS (fixes #10058) Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: never dump CustomPath if CustomPath is a subdir of or equal to AppDataPath (fixes #10365) Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Use log.Info instead of fmt.Fprintf Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * import ordering * make fmt Co-authored-by: zeripath <art27@cantab.net> Co-authored-by: techknowlogick <techknowlogick@gitea.io> Co-authored-by: Matti R <matti@mdranta.net>
		
			
				
	
	
		
			374 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
			
		
		
	
	
			374 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
// Copyright 2015, Joe Tsai. All rights reserved.
 | 
						|
// Use of this source code is governed by a BSD-style
 | 
						|
// license that can be found in the LICENSE.md file.
 | 
						|
 | 
						|
package bzip2
 | 
						|
 | 
						|
import (
 | 
						|
	"io"
 | 
						|
 | 
						|
	"github.com/dsnet/compress/internal"
 | 
						|
	"github.com/dsnet/compress/internal/errors"
 | 
						|
	"github.com/dsnet/compress/internal/prefix"
 | 
						|
)
 | 
						|
 | 
						|
const (
 | 
						|
	minNumTrees = 2
 | 
						|
	maxNumTrees = 6
 | 
						|
 | 
						|
	maxPrefixBits = 20      // Maximum bit-width of a prefix code
 | 
						|
	maxNumSyms    = 256 + 2 // Maximum number of symbols in the alphabet
 | 
						|
	numBlockSyms  = 50      // Number of bytes in a block
 | 
						|
)
 | 
						|
 | 
						|
// encSel and decSel are used to handle the prefix encoding for tree selectors.
 | 
						|
// The prefix encoding is as follows:
 | 
						|
//
 | 
						|
//	Code         TreeIdx
 | 
						|
//	0        <=> 0
 | 
						|
//	10       <=> 1
 | 
						|
//	110      <=> 2
 | 
						|
//	1110     <=> 3
 | 
						|
//	11110    <=> 4
 | 
						|
//	111110   <=> 5
 | 
						|
//	111111   <=> 6	Invalid tree index, so should fail
 | 
						|
//
 | 
						|
var encSel, decSel = func() (e prefix.Encoder, d prefix.Decoder) {
 | 
						|
	var selCodes [maxNumTrees + 1]prefix.PrefixCode
 | 
						|
	for i := range selCodes {
 | 
						|
		selCodes[i] = prefix.PrefixCode{Sym: uint32(i), Len: uint32(i + 1)}
 | 
						|
	}
 | 
						|
	selCodes[maxNumTrees] = prefix.PrefixCode{Sym: maxNumTrees, Len: maxNumTrees}
 | 
						|
	prefix.GeneratePrefixes(selCodes[:])
 | 
						|
	e.Init(selCodes[:])
 | 
						|
	d.Init(selCodes[:])
 | 
						|
	return
 | 
						|
}()
 | 
						|
 | 
						|
type prefixReader struct{ prefix.Reader }
 | 
						|
 | 
						|
func (pr *prefixReader) Init(r io.Reader) {
 | 
						|
	pr.Reader.Init(r, true)
 | 
						|
}
 | 
						|
 | 
						|
func (pr *prefixReader) ReadBitsBE64(nb uint) uint64 {
 | 
						|
	if nb <= 32 {
 | 
						|
		v := uint32(pr.ReadBits(nb))
 | 
						|
		return uint64(internal.ReverseUint32N(v, nb))
 | 
						|
	}
 | 
						|
	v0 := internal.ReverseUint32(uint32(pr.ReadBits(32)))
 | 
						|
	v1 := internal.ReverseUint32(uint32(pr.ReadBits(nb - 32)))
 | 
						|
	v := uint64(v0)<<32 | uint64(v1)
 | 
						|
	return v >> (64 - nb)
 | 
						|
}
 | 
						|
 | 
						|
func (pr *prefixReader) ReadPrefixCodes(codes []prefix.PrefixCodes, trees []prefix.Decoder) {
 | 
						|
	for i, pc := range codes {
 | 
						|
		clen := int(pr.ReadBitsBE64(5))
 | 
						|
		sum := 1 << maxPrefixBits
 | 
						|
		for sym := range pc {
 | 
						|
			for {
 | 
						|
				if clen < 1 || clen > maxPrefixBits {
 | 
						|
					panicf(errors.Corrupted, "invalid prefix bit-length: %d", clen)
 | 
						|
				}
 | 
						|
 | 
						|
				b, ok := pr.TryReadBits(1)
 | 
						|
				if !ok {
 | 
						|
					b = pr.ReadBits(1)
 | 
						|
				}
 | 
						|
				if b == 0 {
 | 
						|
					break
 | 
						|
				}
 | 
						|
 | 
						|
				b, ok = pr.TryReadBits(1)
 | 
						|
				if !ok {
 | 
						|
					b = pr.ReadBits(1)
 | 
						|
				}
 | 
						|
				clen -= int(b*2) - 1 // +1 or -1
 | 
						|
			}
 | 
						|
			pc[sym] = prefix.PrefixCode{Sym: uint32(sym), Len: uint32(clen)}
 | 
						|
			sum -= (1 << maxPrefixBits) >> uint(clen)
 | 
						|
		}
 | 
						|
 | 
						|
		if sum == 0 {
 | 
						|
			// Fast path, but only handles complete trees.
 | 
						|
			if err := prefix.GeneratePrefixes(pc); err != nil {
 | 
						|
				errors.Panic(err) // Using complete trees; should never fail
 | 
						|
			}
 | 
						|
		} else {
 | 
						|
			// Slow path, but handles anything.
 | 
						|
			pc = handleDegenerateCodes(pc) // Never fails, but may fail later
 | 
						|
			codes[i] = pc
 | 
						|
		}
 | 
						|
		trees[i].Init(pc)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
type prefixWriter struct{ prefix.Writer }
 | 
						|
 | 
						|
func (pw *prefixWriter) Init(w io.Writer) {
 | 
						|
	pw.Writer.Init(w, true)
 | 
						|
}
 | 
						|
 | 
						|
func (pw *prefixWriter) WriteBitsBE64(v uint64, nb uint) {
 | 
						|
	if nb <= 32 {
 | 
						|
		v := internal.ReverseUint32N(uint32(v), nb)
 | 
						|
		pw.WriteBits(uint(v), nb)
 | 
						|
		return
 | 
						|
	}
 | 
						|
	v <<= (64 - nb)
 | 
						|
	v0 := internal.ReverseUint32(uint32(v >> 32))
 | 
						|
	v1 := internal.ReverseUint32(uint32(v))
 | 
						|
	pw.WriteBits(uint(v0), 32)
 | 
						|
	pw.WriteBits(uint(v1), nb-32)
 | 
						|
	return
 | 
						|
}
 | 
						|
 | 
						|
func (pw *prefixWriter) WritePrefixCodes(codes []prefix.PrefixCodes, trees []prefix.Encoder) {
 | 
						|
	for i, pc := range codes {
 | 
						|
		if err := prefix.GeneratePrefixes(pc); err != nil {
 | 
						|
			errors.Panic(err) // Using complete trees; should never fail
 | 
						|
		}
 | 
						|
		trees[i].Init(pc)
 | 
						|
 | 
						|
		clen := int(pc[0].Len)
 | 
						|
		pw.WriteBitsBE64(uint64(clen), 5)
 | 
						|
		for _, c := range pc {
 | 
						|
			for int(c.Len) < clen {
 | 
						|
				pw.WriteBits(3, 2) // 11
 | 
						|
				clen--
 | 
						|
			}
 | 
						|
			for int(c.Len) > clen {
 | 
						|
				pw.WriteBits(1, 2) // 10
 | 
						|
				clen++
 | 
						|
			}
 | 
						|
			pw.WriteBits(0, 1)
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// handleDegenerateCodes converts a degenerate tree into a canonical tree.
 | 
						|
//
 | 
						|
// For example, when the input is an under-subscribed tree:
 | 
						|
//	input:  []PrefixCode{
 | 
						|
//		{Sym: 0, Len: 3},
 | 
						|
//		{Sym: 1, Len: 4},
 | 
						|
//		{Sym: 2, Len: 3},
 | 
						|
//	}
 | 
						|
//	output: []PrefixCode{
 | 
						|
//		{Sym:   0, Len: 3, Val:  0}, //  000
 | 
						|
//		{Sym:   1, Len: 4, Val:  2}, // 0010
 | 
						|
//		{Sym:   2, Len: 3, Val:  4}, //  100
 | 
						|
//		{Sym: 258, Len: 4, Val: 10}, // 1010
 | 
						|
//		{Sym: 259, Len: 3, Val:  6}, //  110
 | 
						|
//		{Sym: 260, Len: 1, Val:  1}, //    1
 | 
						|
//	}
 | 
						|
//
 | 
						|
// For example, when the input is an over-subscribed tree:
 | 
						|
//	input:  []PrefixCode{
 | 
						|
//		{Sym: 0, Len: 1},
 | 
						|
//		{Sym: 1, Len: 3},
 | 
						|
//		{Sym: 2, Len: 4},
 | 
						|
//		{Sym: 3, Len: 3},
 | 
						|
//		{Sym: 4, Len: 2},
 | 
						|
//	}
 | 
						|
//	output: []PrefixCode{
 | 
						|
//		{Sym: 0, Len: 1, Val: 0}, //   0
 | 
						|
//		{Sym: 1, Len: 3, Val: 3}, // 011
 | 
						|
//		{Sym: 3, Len: 3, Val: 7}, // 111
 | 
						|
//		{Sym: 4, Len: 2, Val: 1}, //  01
 | 
						|
//	}
 | 
						|
func handleDegenerateCodes(codes prefix.PrefixCodes) prefix.PrefixCodes {
 | 
						|
	// Since there is no formal definition for the BZip2 format, there is no
 | 
						|
	// specification that says that the code lengths must form a complete
 | 
						|
	// prefix tree (IE: it is neither over-subscribed nor under-subscribed).
 | 
						|
	// Thus, the original C implementation becomes the reference for how prefix
 | 
						|
	// decoding is done in these edge cases. Unfortunately, the C version does
 | 
						|
	// not error when an invalid tree is used, but rather allows decoding to
 | 
						|
	// continue and only errors if some bit pattern happens to cause an error.
 | 
						|
	// Thus, it is possible for an invalid tree to end up decoding an input
 | 
						|
	// "properly" so long as invalid bit patterns are not present. In order to
 | 
						|
	// replicate this non-specified behavior, we use a ported version of the
 | 
						|
	// C code to generate the codes as a valid canonical tree by substituting
 | 
						|
	// invalid nodes with invalid symbols.
 | 
						|
	//
 | 
						|
	// ====================================================
 | 
						|
	// This program, "bzip2", the associated library "libbzip2", and all
 | 
						|
	// documentation, are copyright (C) 1996-2010 Julian R Seward.  All
 | 
						|
	// rights reserved.
 | 
						|
	//
 | 
						|
	// Redistribution and use in source and binary forms, with or without
 | 
						|
	// modification, are permitted provided that the following conditions
 | 
						|
	// are met:
 | 
						|
	//
 | 
						|
	// 1. Redistributions of source code must retain the above copyright
 | 
						|
	//    notice, this list of conditions and the following disclaimer.
 | 
						|
	//
 | 
						|
	// 2. The origin of this software must not be misrepresented; you must
 | 
						|
	//    not claim that you wrote the original software.  If you use this
 | 
						|
	//    software in a product, an acknowledgment in the product
 | 
						|
	//    documentation would be appreciated but is not required.
 | 
						|
	//
 | 
						|
	// 3. Altered source versions must be plainly marked as such, and must
 | 
						|
	//    not be misrepresented as being the original software.
 | 
						|
	//
 | 
						|
	// 4. The name of the author may not be used to endorse or promote
 | 
						|
	//    products derived from this software without specific prior written
 | 
						|
	//    permission.
 | 
						|
	//
 | 
						|
	// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
 | 
						|
	// OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 | 
						|
	// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | 
						|
	// ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
 | 
						|
	// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | 
						|
	// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
 | 
						|
	// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 | 
						|
	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 | 
						|
	// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 | 
						|
	// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 | 
						|
	// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
						|
	//
 | 
						|
	// Julian Seward, jseward@bzip.org
 | 
						|
	// bzip2/libbzip2 version 1.0.6 of 6 September 2010
 | 
						|
	// ====================================================
 | 
						|
	var (
 | 
						|
		limits [maxPrefixBits + 2]int32
 | 
						|
		bases  [maxPrefixBits + 2]int32
 | 
						|
		perms  [maxNumSyms]int32
 | 
						|
 | 
						|
		minLen = uint32(maxPrefixBits)
 | 
						|
		maxLen = uint32(0)
 | 
						|
	)
 | 
						|
 | 
						|
	const (
 | 
						|
		statusOkay = iota
 | 
						|
		statusInvalid
 | 
						|
		statusNeedBits
 | 
						|
		statusMaxBits
 | 
						|
	)
 | 
						|
 | 
						|
	// createTables is the BZ2_hbCreateDecodeTables function from the C code.
 | 
						|
	createTables := func(codes []prefix.PrefixCode) {
 | 
						|
		for _, c := range codes {
 | 
						|
			if c.Len > maxLen {
 | 
						|
				maxLen = c.Len
 | 
						|
			}
 | 
						|
			if c.Len < minLen {
 | 
						|
				minLen = c.Len
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		var pp int
 | 
						|
		for i := minLen; i <= maxLen; i++ {
 | 
						|
			for j, c := range codes {
 | 
						|
				if c.Len == i {
 | 
						|
					perms[pp] = int32(j)
 | 
						|
					pp++
 | 
						|
				}
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		var vec int32
 | 
						|
		for _, c := range codes {
 | 
						|
			bases[c.Len+1]++
 | 
						|
		}
 | 
						|
		for i := 1; i < len(bases); i++ {
 | 
						|
			bases[i] += bases[i-1]
 | 
						|
		}
 | 
						|
		for i := minLen; i <= maxLen; i++ {
 | 
						|
			vec += bases[i+1] - bases[i]
 | 
						|
			limits[i] = vec - 1
 | 
						|
			vec <<= 1
 | 
						|
		}
 | 
						|
		for i := minLen + 1; i <= maxLen; i++ {
 | 
						|
			bases[i] = ((limits[i-1] + 1) << 1) - bases[i]
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	// getSymbol is the GET_MTF_VAL macro from the C code.
 | 
						|
	getSymbol := func(c prefix.PrefixCode) (uint32, int) {
 | 
						|
		v := internal.ReverseUint32(c.Val)
 | 
						|
		n := c.Len
 | 
						|
 | 
						|
		zn := minLen
 | 
						|
		if zn > n {
 | 
						|
			return 0, statusNeedBits
 | 
						|
		}
 | 
						|
		zvec := int32(v >> (32 - zn))
 | 
						|
		v <<= zn
 | 
						|
		for {
 | 
						|
			if zn > maxLen {
 | 
						|
				return 0, statusMaxBits
 | 
						|
			}
 | 
						|
			if zvec <= limits[zn] {
 | 
						|
				break
 | 
						|
			}
 | 
						|
			zn++
 | 
						|
			if zn > n {
 | 
						|
				return 0, statusNeedBits
 | 
						|
			}
 | 
						|
			zvec = (zvec << 1) | int32(v>>31)
 | 
						|
			v <<= 1
 | 
						|
		}
 | 
						|
		if zvec-bases[zn] < 0 || zvec-bases[zn] >= maxNumSyms {
 | 
						|
			return 0, statusInvalid
 | 
						|
		}
 | 
						|
		return uint32(perms[zvec-bases[zn]]), statusOkay
 | 
						|
	}
 | 
						|
 | 
						|
	// Step 1: Create the prefix trees using the C algorithm.
 | 
						|
	createTables(codes)
 | 
						|
 | 
						|
	// Step 2: Starting with the shortest bit pattern, explore the whole tree.
 | 
						|
	// If tree is under-subscribed, the worst-case runtime is O(1<<maxLen).
 | 
						|
	// If tree is over-subscribed, the worst-case runtime is O(maxNumSyms).
 | 
						|
	var pcodesArr [2 * maxNumSyms]prefix.PrefixCode
 | 
						|
	pcodes := pcodesArr[:maxNumSyms]
 | 
						|
	var exploreCode func(prefix.PrefixCode) bool
 | 
						|
	exploreCode = func(c prefix.PrefixCode) (term bool) {
 | 
						|
		sym, status := getSymbol(c)
 | 
						|
		switch status {
 | 
						|
		case statusOkay:
 | 
						|
			// This code is valid, so insert it.
 | 
						|
			c.Sym = sym
 | 
						|
			pcodes[sym] = c
 | 
						|
			term = true
 | 
						|
		case statusInvalid:
 | 
						|
			// This code is invalid, so insert an invalid symbol.
 | 
						|
			c.Sym = uint32(len(pcodes))
 | 
						|
			pcodes = append(pcodes, c)
 | 
						|
			term = true
 | 
						|
		case statusNeedBits:
 | 
						|
			// This code is too short, so explore both children.
 | 
						|
			c.Len++
 | 
						|
			c0, c1 := c, c
 | 
						|
			c1.Val |= 1 << (c.Len - 1)
 | 
						|
 | 
						|
			b0 := exploreCode(c0)
 | 
						|
			b1 := exploreCode(c1)
 | 
						|
			switch {
 | 
						|
			case !b0 && b1:
 | 
						|
				c0.Sym = uint32(len(pcodes))
 | 
						|
				pcodes = append(pcodes, c0)
 | 
						|
			case !b1 && b0:
 | 
						|
				c1.Sym = uint32(len(pcodes))
 | 
						|
				pcodes = append(pcodes, c1)
 | 
						|
			}
 | 
						|
			term = b0 || b1
 | 
						|
		case statusMaxBits:
 | 
						|
			// This code is too long, so report it upstream.
 | 
						|
			term = false
 | 
						|
		}
 | 
						|
		return term // Did this code terminate?
 | 
						|
	}
 | 
						|
	exploreCode(prefix.PrefixCode{})
 | 
						|
 | 
						|
	// Step 3: Copy new sparse codes to old output codes.
 | 
						|
	codes = codes[:0]
 | 
						|
	for _, c := range pcodes {
 | 
						|
		if c.Len > 0 {
 | 
						|
			codes = append(codes, c)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return codes
 | 
						|
}
 |