mirror of
				https://codeberg.org/forgejo/forgejo.git
				synced 2025-10-31 14:31:02 +00:00 
			
		
		
		
	Speed up enry.IsVendor (#15213)
		
	`enry.IsVendor` is kinda slow as it simply iterates across all regexps. This PR ajdusts the regexps to combine them to make this process a little quicker. Related #15143 Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
		
					parent
					
						
							
								43fb4921e3
							
						
					
				
			
			
				commit
				
					
						ff460ca74d
					
				
			
		
					 6 changed files with 116 additions and 4 deletions
				
			
		
							
								
								
									
										70
									
								
								modules/analyze/vendor.go
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								modules/analyze/vendor.go
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,70 @@ | |||
| // Copyright 2021 The Gitea Authors. All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
| 
 | ||||
| package analyze | ||||
| 
 | ||||
| import ( | ||||
| 	"regexp" | ||||
| 	"sort" | ||||
| 	"strings" | ||||
| 
 | ||||
| 	"github.com/go-enry/go-enry/v2/data" | ||||
| ) | ||||
| 
 | ||||
| var isVendorRegExp *regexp.Regexp | ||||
| 
 | ||||
| func init() { | ||||
| 	matchers := data.VendorMatchers | ||||
| 
 | ||||
| 	caretStrings := make([]string, 0, 10) | ||||
| 	caretShareStrings := make([]string, 0, 10) | ||||
| 
 | ||||
| 	matcherStrings := make([]string, 0, len(matchers)) | ||||
| 	for _, matcher := range matchers { | ||||
| 		str := matcher.String() | ||||
| 		if str[0] == '^' { | ||||
| 			caretStrings = append(caretStrings, str[1:]) | ||||
| 		} else if str[0:5] == "(^|/)" { | ||||
| 			caretShareStrings = append(caretShareStrings, str[5:]) | ||||
| 		} else { | ||||
| 			matcherStrings = append(matcherStrings, str) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	sort.Strings(caretShareStrings) | ||||
| 	sort.Strings(caretStrings) | ||||
| 	sort.Strings(matcherStrings) | ||||
| 
 | ||||
| 	sb := &strings.Builder{} | ||||
| 	sb.WriteString("(?:^(?:") | ||||
| 	sb.WriteString(caretStrings[0]) | ||||
| 	for _, matcher := range caretStrings[1:] { | ||||
| 		sb.WriteString(")|(?:") | ||||
| 		sb.WriteString(matcher) | ||||
| 	} | ||||
| 	sb.WriteString("))") | ||||
| 	sb.WriteString("|") | ||||
| 	sb.WriteString("(?:(?:^|/)(?:") | ||||
| 	sb.WriteString(caretShareStrings[0]) | ||||
| 	for _, matcher := range caretShareStrings[1:] { | ||||
| 		sb.WriteString(")|(?:") | ||||
| 		sb.WriteString(matcher) | ||||
| 	} | ||||
| 	sb.WriteString("))") | ||||
| 	sb.WriteString("|") | ||||
| 	sb.WriteString("(?:") | ||||
| 	sb.WriteString(matcherStrings[0]) | ||||
| 	for _, matcher := range matcherStrings[1:] { | ||||
| 		sb.WriteString(")|(?:") | ||||
| 		sb.WriteString(matcher) | ||||
| 	} | ||||
| 	sb.WriteString(")") | ||||
| 	combined := sb.String() | ||||
| 	isVendorRegExp = regexp.MustCompile(combined) | ||||
| } | ||||
| 
 | ||||
| // IsVendor returns whether or not path is a vendor path. | ||||
| func IsVendor(path string) bool { | ||||
| 	return isVendorRegExp.MatchString(path) | ||||
| } | ||||
							
								
								
									
										42
									
								
								modules/analyze/vendor_test.go
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								modules/analyze/vendor_test.go
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,42 @@ | |||
| // Copyright 2021 The Gitea Authors. All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
| 
 | ||||
| package analyze | ||||
| 
 | ||||
| import "testing" | ||||
| 
 | ||||
| func TestIsVendor(t *testing.T) { | ||||
| 	tests := []struct { | ||||
| 		path string | ||||
| 		want bool | ||||
| 	}{ | ||||
| 		{"cache/", true}, | ||||
| 		{"random/cache/", true}, | ||||
| 		{"cache", false}, | ||||
| 		{"dependencies/", true}, | ||||
| 		{"Dependencies/", true}, | ||||
| 		{"dependency/", false}, | ||||
| 		{"dist/", true}, | ||||
| 		{"dist", false}, | ||||
| 		{"random/dist/", true}, | ||||
| 		{"random/dist", false}, | ||||
| 		{"deps/", true}, | ||||
| 		{"configure", true}, | ||||
| 		{"a/configure", true}, | ||||
| 		{"config.guess", true}, | ||||
| 		{"config.guess/", false}, | ||||
| 		{".vscode/", true}, | ||||
| 		{"doc/_build/", true}, | ||||
| 		{"a/docs/_build/", true}, | ||||
| 		{"a/dasdocs/_build-vsdoc.js", true}, | ||||
| 		{"a/dasdocs/_build-vsdoc.j", false}, | ||||
| 	} | ||||
| 	for _, tt := range tests { | ||||
| 		t.Run(tt.path, func(t *testing.T) { | ||||
| 			if got := IsVendor(tt.path); got != tt.want { | ||||
| 				t.Errorf("IsVendor() = %v, want %v", got, tt.want) | ||||
| 			} | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
|  | @ -43,7 +43,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err | |||
| 
 | ||||
| 	sizes := make(map[string]int64) | ||||
| 	err = tree.Files().ForEach(func(f *object.File) error { | ||||
| 		if f.Size == 0 || enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) || | ||||
| 		if f.Size == 0 || analyze.IsVendor(f.Name) || enry.IsDotFile(f.Name) || | ||||
| 			enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) { | ||||
| 			return nil | ||||
| 		} | ||||
|  |  | |||
|  | @ -67,7 +67,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err | |||
| 	for _, f := range entries { | ||||
| 		contentBuf.Reset() | ||||
| 		content = contentBuf.Bytes() | ||||
| 		if f.Size() == 0 || enry.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) || | ||||
| 		if f.Size() == 0 || analyze.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) || | ||||
| 			enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) { | ||||
| 			continue | ||||
| 		} | ||||
|  |  | |||
|  | @ -178,7 +178,7 @@ func NewBleveIndexer(indexDir string) (*BleveIndexer, bool, error) { | |||
| 
 | ||||
| func (b *BleveIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error { | ||||
| 	// Ignore vendored files in code search | ||||
| 	if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) { | ||||
| 	if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { | ||||
| 		return nil | ||||
| 	} | ||||
| 
 | ||||
|  |  | |||
|  | @ -177,7 +177,7 @@ func (b *ElasticSearchIndexer) init() (bool, error) { | |||
| 
 | ||||
| func (b *ElasticSearchIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) { | ||||
| 	// Ignore vendored files in code search | ||||
| 	if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) { | ||||
| 	if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { | ||||
| 		return nil, nil | ||||
| 	} | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue