mirror of
				https://codeberg.org/forgejo/forgejo.git
				synced 2025-10-27 12:31:02 +00:00 
			
		
		
		
	- In Go 1.21 the crypto/sha256 [got a massive improvement](https://go.dev/doc/go1.21#crypto/sha256) by utilizing the SHA instructions for AMD64 CPUs, which sha256-simd already was doing. The performance is now on par and I think it's preferable to use the standard library rather than a package when possible. ``` cpu: AMD Ryzen 5 3600X 6-Core Processor │ simd.txt │ go.txt │ │ sec/op │ sec/op vs base │ Hash/8Bytes-12 63.25n ± 1% 73.38n ± 1% +16.02% (p=0.002 n=6) Hash/64Bytes-12 98.73n ± 1% 105.30n ± 1% +6.65% (p=0.002 n=6) Hash/1K-12 567.2n ± 1% 572.8n ± 1% +0.99% (p=0.002 n=6) Hash/8K-12 4.062µ ± 1% 4.062µ ± 1% ~ (p=0.396 n=6) Hash/1M-12 512.1µ ± 0% 510.6µ ± 1% ~ (p=0.485 n=6) Hash/5M-12 2.556m ± 1% 2.564m ± 0% ~ (p=0.093 n=6) Hash/10M-12 5.112m ± 0% 5.127m ± 0% ~ (p=0.093 n=6) geomean 13.82µ 14.27µ +3.28% │ simd.txt │ go.txt │ │ B/s │ B/s vs base │ Hash/8Bytes-12 120.6Mi ± 1% 104.0Mi ± 1% -13.81% (p=0.002 n=6) Hash/64Bytes-12 618.2Mi ± 1% 579.8Mi ± 1% -6.22% (p=0.002 n=6) Hash/1K-12 1.682Gi ± 1% 1.665Gi ± 1% -0.98% (p=0.002 n=6) Hash/8K-12 1.878Gi ± 1% 1.878Gi ± 1% ~ (p=0.310 n=6) Hash/1M-12 1.907Gi ± 0% 1.913Gi ± 1% ~ (p=0.485 n=6) Hash/5M-12 1.911Gi ± 1% 1.904Gi ± 0% ~ (p=0.093 n=6) Hash/10M-12 1.910Gi ± 0% 1.905Gi ± 0% ~ (p=0.093 n=6) geomean 1.066Gi 1.032Gi -3.18% ``` (cherry picked from commitabd94ff5b5) (cherry picked from commit15e81637ab) Conflicts: go.mod https://codeberg.org/forgejo/forgejo/pulls/1581
		
			
				
	
	
		
			163 lines
		
	
	
	
		
			4.6 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			163 lines
		
	
	
	
		
			4.6 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright 2020 The Gitea Authors. All rights reserved.
 | |
| // SPDX-License-Identifier: MIT
 | |
| 
 | |
| package lfs
 | |
| 
 | |
| import (
 | |
| 	"crypto/sha256"
 | |
| 	"encoding/hex"
 | |
| 	"errors"
 | |
| 	"hash"
 | |
| 	"io"
 | |
| 	"os"
 | |
| 
 | |
| 	"code.gitea.io/gitea/modules/log"
 | |
| 	"code.gitea.io/gitea/modules/storage"
 | |
| )
 | |
| 
 | |
| var (
 | |
| 	// ErrHashMismatch occurs if the content has does not match OID
 | |
| 	ErrHashMismatch = errors.New("content hash does not match OID")
 | |
| 	// ErrSizeMismatch occurs if the content size does not match
 | |
| 	ErrSizeMismatch = errors.New("content size does not match")
 | |
| )
 | |
| 
 | |
| // ContentStore provides a simple file system based storage.
 | |
| type ContentStore struct {
 | |
| 	storage.ObjectStorage
 | |
| }
 | |
| 
 | |
| // NewContentStore creates the default ContentStore
 | |
| func NewContentStore() *ContentStore {
 | |
| 	contentStore := &ContentStore{ObjectStorage: storage.LFS}
 | |
| 	return contentStore
 | |
| }
 | |
| 
 | |
| // Get takes a Meta object and retrieves the content from the store, returning
 | |
| // it as an io.ReadSeekCloser.
 | |
| func (s *ContentStore) Get(pointer Pointer) (storage.Object, error) {
 | |
| 	f, err := s.Open(pointer.RelativePath())
 | |
| 	if err != nil {
 | |
| 		log.Error("Whilst trying to read LFS OID[%s]: Unable to open Error: %v", pointer.Oid, err)
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	return f, err
 | |
| }
 | |
| 
 | |
| // Put takes a Meta object and an io.Reader and writes the content to the store.
 | |
| func (s *ContentStore) Put(pointer Pointer, r io.Reader) error {
 | |
| 	p := pointer.RelativePath()
 | |
| 
 | |
| 	// Wrap the provided reader with an inline hashing and size checker
 | |
| 	wrappedRd := newHashingReader(pointer.Size, pointer.Oid, r)
 | |
| 
 | |
| 	// now pass the wrapped reader to Save - if there is a size mismatch or hash mismatch then
 | |
| 	// the errors returned by the newHashingReader should percolate up to here
 | |
| 	written, err := s.Save(p, wrappedRd, pointer.Size)
 | |
| 	if err != nil {
 | |
| 		log.Error("Whilst putting LFS OID[%s]: Failed to copy to tmpPath: %s Error: %v", pointer.Oid, p, err)
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	// check again whether there is any error during the Save operation
 | |
| 	// because some errors might be ignored by the Reader's caller
 | |
| 	if wrappedRd.lastError != nil && !errors.Is(wrappedRd.lastError, io.EOF) {
 | |
| 		err = wrappedRd.lastError
 | |
| 	} else if written != pointer.Size {
 | |
| 		err = ErrSizeMismatch
 | |
| 	}
 | |
| 
 | |
| 	// if the upload failed, try to delete the file
 | |
| 	if err != nil {
 | |
| 		if errDel := s.Delete(p); errDel != nil {
 | |
| 			log.Error("Cleaning the LFS OID[%s] failed: %v", pointer.Oid, errDel)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return err
 | |
| }
 | |
| 
 | |
| // Exists returns true if the object exists in the content store.
 | |
| func (s *ContentStore) Exists(pointer Pointer) (bool, error) {
 | |
| 	_, err := s.ObjectStorage.Stat(pointer.RelativePath())
 | |
| 	if err != nil {
 | |
| 		if os.IsNotExist(err) {
 | |
| 			return false, nil
 | |
| 		}
 | |
| 		return false, err
 | |
| 	}
 | |
| 	return true, nil
 | |
| }
 | |
| 
 | |
| // Verify returns true if the object exists in the content store and size is correct.
 | |
| func (s *ContentStore) Verify(pointer Pointer) (bool, error) {
 | |
| 	p := pointer.RelativePath()
 | |
| 	fi, err := s.ObjectStorage.Stat(p)
 | |
| 	if os.IsNotExist(err) || (err == nil && fi.Size() != pointer.Size) {
 | |
| 		return false, nil
 | |
| 	} else if err != nil {
 | |
| 		log.Error("Unable stat file: %s for LFS OID[%s] Error: %v", p, pointer.Oid, err)
 | |
| 		return false, err
 | |
| 	}
 | |
| 
 | |
| 	return true, nil
 | |
| }
 | |
| 
 | |
| // ReadMetaObject will read a git_model.LFSMetaObject and return a reader
 | |
| func ReadMetaObject(pointer Pointer) (io.ReadSeekCloser, error) {
 | |
| 	contentStore := NewContentStore()
 | |
| 	return contentStore.Get(pointer)
 | |
| }
 | |
| 
 | |
| type hashingReader struct {
 | |
| 	internal     io.Reader
 | |
| 	currentSize  int64
 | |
| 	expectedSize int64
 | |
| 	hash         hash.Hash
 | |
| 	expectedHash string
 | |
| 	lastError    error
 | |
| }
 | |
| 
 | |
| // recordError records the last error during the Save operation
 | |
| // Some callers of the Reader doesn't respect the returned "err"
 | |
| // For example, MinIO's Put will ignore errors if the written size could equal to expected size
 | |
| // So we must remember the error by ourselves,
 | |
| // and later check again whether ErrSizeMismatch or ErrHashMismatch occurs during the Save operation
 | |
| func (r *hashingReader) recordError(err error) error {
 | |
| 	r.lastError = err
 | |
| 	return err
 | |
| }
 | |
| 
 | |
| func (r *hashingReader) Read(b []byte) (int, error) {
 | |
| 	n, err := r.internal.Read(b)
 | |
| 
 | |
| 	if n > 0 {
 | |
| 		r.currentSize += int64(n)
 | |
| 		wn, werr := r.hash.Write(b[:n])
 | |
| 		if wn != n || werr != nil {
 | |
| 			return n, r.recordError(werr)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if errors.Is(err, io.EOF) || r.currentSize >= r.expectedSize {
 | |
| 		if r.currentSize != r.expectedSize {
 | |
| 			return n, r.recordError(ErrSizeMismatch)
 | |
| 		}
 | |
| 
 | |
| 		shaStr := hex.EncodeToString(r.hash.Sum(nil))
 | |
| 		if shaStr != r.expectedHash {
 | |
| 			return n, r.recordError(ErrHashMismatch)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return n, r.recordError(err)
 | |
| }
 | |
| 
 | |
| func newHashingReader(expectedSize int64, expectedHash string, reader io.Reader) *hashingReader {
 | |
| 	return &hashingReader{
 | |
| 		internal:     reader,
 | |
| 		expectedSize: expectedSize,
 | |
| 		expectedHash: expectedHash,
 | |
| 		hash:         sha256.New(),
 | |
| 	}
 | |
| }
 |