mirror of
				https://codeberg.org/forgejo/forgejo.git
				synced 2025-10-30 22:11:07 +00:00 
			
		
		
		
	Improved Linguist compatibility
Recognise the `linguist-documentation` and `linguist-detectable` attributes in `.gitattributes` files, and use them in `GetLanguageStats()` to make a decision whether to include a particular file in the stats or not. This allows one more control over which files in their repositories contribute toward the language statistics, so that for a project that is mostly documentation, the language stats can reflect that. Fixes #1672. Signed-off-by: Gergely Nagy <forgejo@gergo.csillger.hu>
This commit is contained in:
		
					parent
					
						
							
								fed50cf72e
							
						
					
				
			
			
				commit
				
					
						6d4e02fe5f
					
				
			
		
					 5 changed files with 341 additions and 28 deletions
				
			
		
							
								
								
									
										276
									
								
								tests/integration/repo_lang_stats_test.go
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										276
									
								
								tests/integration/repo_lang_stats_test.go
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,276 @@ | |||
| // Copyright 2024 The Forgejo Authors c/o Codeberg e.V.. All rights reserved. | ||||
| // SPDX-License-Identifier: MIT | ||||
| 
 | ||||
| package integration | ||||
| 
 | ||||
| import ( | ||||
| 	"context" | ||||
| 	"net/url" | ||||
| 	"strings" | ||||
| 	"testing" | ||||
| 	"time" | ||||
| 
 | ||||
| 	"code.gitea.io/gitea/models/db" | ||||
| 	repo_model "code.gitea.io/gitea/models/repo" | ||||
| 	"code.gitea.io/gitea/models/unittest" | ||||
| 	user_model "code.gitea.io/gitea/models/user" | ||||
| 	"code.gitea.io/gitea/modules/git" | ||||
| 	"code.gitea.io/gitea/modules/indexer/stats" | ||||
| 	"code.gitea.io/gitea/modules/queue" | ||||
| 	repo_service "code.gitea.io/gitea/services/repository" | ||||
| 	files_service "code.gitea.io/gitea/services/repository/files" | ||||
| 	"code.gitea.io/gitea/tests" | ||||
| 
 | ||||
| 	"github.com/stretchr/testify/assert" | ||||
| ) | ||||
| 
 | ||||
| func createLangStatTestRepo(t *testing.T) (*repo_model.Repository, func()) { | ||||
| 	t.Helper() | ||||
| 
 | ||||
| 	user2 := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2}) | ||||
| 
 | ||||
| 	// Create a new repository | ||||
| 	repo, err := repo_service.CreateRepository(db.DefaultContext, user2, user2, repo_service.CreateRepoOptions{ | ||||
| 		Name:          "lang-stat-test", | ||||
| 		Description:   "minimal repo for language stats testing", | ||||
| 		AutoInit:      true, | ||||
| 		Gitignores:    "Go", | ||||
| 		License:       "MIT", | ||||
| 		Readme:        "Default", | ||||
| 		DefaultBranch: "main", | ||||
| 		IsPrivate:     false, | ||||
| 	}) | ||||
| 	assert.NoError(t, err) | ||||
| 	assert.NotEmpty(t, repo) | ||||
| 
 | ||||
| 	return repo, func() { | ||||
| 		repo_service.DeleteRepository(db.DefaultContext, user2, repo, false) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func addLangStatTestFiles(t *testing.T, repo *repo_model.Repository, contents string) string { | ||||
| 	t.Helper() | ||||
| 
 | ||||
| 	owner := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: repo.OwnerID}) | ||||
| 
 | ||||
| 	addFilesResp, err := files_service.ChangeRepoFiles(git.DefaultContext, repo, owner, &files_service.ChangeRepoFilesOptions{ | ||||
| 		Files: []*files_service.ChangeRepoFile{ | ||||
| 			{ | ||||
| 				Operation:     "create", | ||||
| 				TreePath:      ".gitattributes", | ||||
| 				ContentReader: strings.NewReader(contents), | ||||
| 			}, | ||||
| 			{ | ||||
| 				Operation:     "create", | ||||
| 				TreePath:      "docs.md", | ||||
| 				ContentReader: strings.NewReader("This **is** a `markdown` file.\n"), | ||||
| 			}, | ||||
| 			{ | ||||
| 				Operation:     "create", | ||||
| 				TreePath:      "foo.c", | ||||
| 				ContentReader: strings.NewReader(`#include <stdio.h>\nint main() {\n  printf("Hello world!\n");\n  return 0;\n}\n`), | ||||
| 			}, | ||||
| 			{ | ||||
| 				Operation:     "create", | ||||
| 				TreePath:      "foo.nib", | ||||
| 				ContentReader: strings.NewReader("Pinky promise, this is not a generated file!\n"), | ||||
| 			}, | ||||
| 			{ | ||||
| 				Operation:     "create", | ||||
| 				TreePath:      ".dot.pas", | ||||
| 				ContentReader: strings.NewReader("program Hello;\nbegin\n  writeln('Hello, world.');\nend.\n"), | ||||
| 			}, | ||||
| 			{ | ||||
| 				Operation:     "create", | ||||
| 				TreePath:      "cpplint.py", | ||||
| 				ContentReader: strings.NewReader(`#! /usr/bin/env python\n\nprint("Hello world!")\n`), | ||||
| 			}, | ||||
| 			{ | ||||
| 				Operation:     "create", | ||||
| 				TreePath:      "some-file.xml", | ||||
| 				ContentReader: strings.NewReader(`<?xml version="1.0"?>\n<foo>\n <bar>Hello</bar>\n</foo>\n`), | ||||
| 			}, | ||||
| 		}, | ||||
| 		Message:   "add files", | ||||
| 		OldBranch: "main", | ||||
| 		NewBranch: "main", | ||||
| 		Author: &files_service.IdentityOptions{ | ||||
| 			Name:  owner.Name, | ||||
| 			Email: owner.Email, | ||||
| 		}, | ||||
| 		Committer: &files_service.IdentityOptions{ | ||||
| 			Name:  owner.Name, | ||||
| 			Email: owner.Email, | ||||
| 		}, | ||||
| 		Dates: &files_service.CommitDateOptions{ | ||||
| 			Author:    time.Now(), | ||||
| 			Committer: time.Now(), | ||||
| 		}, | ||||
| 	}) | ||||
| 	assert.NoError(t, err) | ||||
| 	assert.NotEmpty(t, addFilesResp) | ||||
| 
 | ||||
| 	return addFilesResp.Commit.SHA | ||||
| } | ||||
| 
 | ||||
| func TestRepoLangStats(t *testing.T) { | ||||
| 	onGiteaRun(t, func(t *testing.T, u *url.URL) { | ||||
| 		/****************** | ||||
| 		 ** Preparations ** | ||||
| 		 ******************/ | ||||
| 		prep := func(t *testing.T, attribs string) (*repo_model.Repository, string, func()) { | ||||
| 			t.Helper() | ||||
| 
 | ||||
| 			repo, f := createLangStatTestRepo(t) | ||||
| 			sha := addLangStatTestFiles(t, repo, attribs) | ||||
| 
 | ||||
| 			return repo, sha, f | ||||
| 		} | ||||
| 
 | ||||
| 		getFreshLanguageStats := func(t *testing.T, repo *repo_model.Repository, sha string) repo_model.LanguageStatList { | ||||
| 			t.Helper() | ||||
| 
 | ||||
| 			err := stats.UpdateRepoIndexer(repo) | ||||
| 			assert.NoError(t, err) | ||||
| 
 | ||||
| 			assert.NoError(t, queue.GetManager().FlushAll(context.Background(), 10*time.Second)) | ||||
| 
 | ||||
| 			status, err := repo_model.GetIndexerStatus(db.DefaultContext, repo, repo_model.RepoIndexerTypeStats) | ||||
| 			assert.NoError(t, err) | ||||
| 			assert.Equal(t, sha, status.CommitSha) | ||||
| 			langs, err := repo_model.GetTopLanguageStats(db.DefaultContext, repo, 5) | ||||
| 			assert.NoError(t, err) | ||||
| 
 | ||||
| 			return langs | ||||
| 		} | ||||
| 
 | ||||
| 		/*********** | ||||
| 		 ** Tests ** | ||||
| 		 ***********/ | ||||
| 
 | ||||
| 		// 1. By default, documentation is not indexed | ||||
| 		t.Run("default", func(t *testing.T) { | ||||
| 			defer tests.PrintCurrentTest(t)() | ||||
| 
 | ||||
| 			repo, sha, f := prep(t, "") | ||||
| 			defer f() | ||||
| 
 | ||||
| 			langs := getFreshLanguageStats(t, repo, sha) | ||||
| 
 | ||||
| 			// While this is a fairly short test, this exercises a number of | ||||
| 			// things: | ||||
| 			// | ||||
| 			// - `.gitattributes` is empty, so `isDetectable.IsFalse()`, | ||||
| 			//   `isVendored.IsTrue()`, and `isDocumentation.IsTrue()` will be | ||||
| 			//   false for every file, because these are only true if an | ||||
| 			//   attribute is explicitly set. | ||||
| 			// | ||||
| 			// - There is `.dot.pas`, which would be considered Pascal source, | ||||
| 			//   but it is a dotfile (thus, `enry.IsDotFile()` applies), and as | ||||
| 			//   such, is not considered. | ||||
| 			// | ||||
| 			// - `some-file.xml` will be skipped because Enry considers XML | ||||
| 			//   configuration, and `enry.IsConfiguration()` will catch it. | ||||
| 			// | ||||
| 			// - `!isVendored.IsFalse()` evaluates to true, so | ||||
| 			//   `analyze.isVendor()` will be called on `cpplint.py`, which will | ||||
| 			//   be considered vendored, even though both the filename and | ||||
| 			//   contents would otherwise make it Python. | ||||
| 			// | ||||
| 			// - `!isDocumentation.IsFalse()` evaluates to true, so | ||||
| 			//   `enry.IsDocumentation()` will be called for `docs.md`, and will | ||||
| 			//   be considered documentation, thus, skipped. | ||||
| 			// | ||||
| 			// Thus, this exercises all of the conditions in the first big if | ||||
| 			// that is supposed to filter out files early. With two short asserts! | ||||
| 
 | ||||
| 			assert.Len(t, langs, 1) | ||||
| 			assert.Equal(t, "C", langs[0].Language) | ||||
| 		}) | ||||
| 
 | ||||
| 		// 2. Marking foo.c as non-detectable | ||||
| 		t.Run("foo.c non-detectable", func(t *testing.T) { | ||||
| 			defer tests.PrintCurrentTest(t)() | ||||
| 
 | ||||
| 			repo, sha, f := prep(t, "foo.c linguist-detectable=false\n") | ||||
| 			defer f() | ||||
| 
 | ||||
| 			langs := getFreshLanguageStats(t, repo, sha) | ||||
| 			assert.Empty(t, langs) | ||||
| 		}) | ||||
| 
 | ||||
| 		// 3. Marking Markdown detectable | ||||
| 		t.Run("detectable markdown", func(t *testing.T) { | ||||
| 			defer tests.PrintCurrentTest(t)() | ||||
| 
 | ||||
| 			repo, sha, f := prep(t, "*.md linguist-detectable\n") | ||||
| 			defer f() | ||||
| 
 | ||||
| 			langs := getFreshLanguageStats(t, repo, sha) | ||||
| 			assert.Len(t, langs, 2) | ||||
| 			assert.Equal(t, "C", langs[0].Language) | ||||
| 			assert.Equal(t, "Markdown", langs[1].Language) | ||||
| 		}) | ||||
| 
 | ||||
| 		// 4. Marking foo.c as documentation | ||||
| 		t.Run("foo.c as documentation", func(t *testing.T) { | ||||
| 			defer tests.PrintCurrentTest(t)() | ||||
| 
 | ||||
| 			repo, sha, f := prep(t, "foo.c linguist-documentation\n") | ||||
| 			defer f() | ||||
| 
 | ||||
| 			langs := getFreshLanguageStats(t, repo, sha) | ||||
| 			assert.Empty(t, langs) | ||||
| 		}) | ||||
| 
 | ||||
| 		// 5. Overriding a generated file | ||||
| 		t.Run("linguist-generated=false", func(t *testing.T) { | ||||
| 			defer tests.PrintCurrentTest(t)() | ||||
| 
 | ||||
| 			repo, sha, f := prep(t, "foo.nib linguist-generated=false\nfoo.nib linguist-language=Perl\n") | ||||
| 			defer f() | ||||
| 
 | ||||
| 			langs := getFreshLanguageStats(t, repo, sha) | ||||
| 			assert.Len(t, langs, 2) | ||||
| 			assert.Equal(t, "C", langs[0].Language) | ||||
| 			assert.Equal(t, "Perl", langs[1].Language) | ||||
| 		}) | ||||
| 
 | ||||
| 		// 6. Disabling vendoring for a file | ||||
| 		t.Run("linguist-vendored=false", func(t *testing.T) { | ||||
| 			defer tests.PrintCurrentTest(t)() | ||||
| 
 | ||||
| 			repo, sha, f := prep(t, "cpplint.py linguist-vendored=false\n") | ||||
| 			defer f() | ||||
| 
 | ||||
| 			langs := getFreshLanguageStats(t, repo, sha) | ||||
| 			assert.Len(t, langs, 2) | ||||
| 			assert.Equal(t, "C", langs[0].Language) | ||||
| 			assert.Equal(t, "Python", langs[1].Language) | ||||
| 		}) | ||||
| 
 | ||||
| 		// 7. Disabling vendoring for a file, with -linguist-vendored | ||||
| 		t.Run("-linguist-vendored", func(t *testing.T) { | ||||
| 			defer tests.PrintCurrentTest(t)() | ||||
| 
 | ||||
| 			repo, sha, f := prep(t, "cpplint.py -linguist-vendored\n") | ||||
| 			defer f() | ||||
| 
 | ||||
| 			langs := getFreshLanguageStats(t, repo, sha) | ||||
| 			assert.Len(t, langs, 2) | ||||
| 			assert.Equal(t, "C", langs[0].Language) | ||||
| 			assert.Equal(t, "Python", langs[1].Language) | ||||
| 		}) | ||||
| 
 | ||||
| 		// 8. Marking foo.c as vendored | ||||
| 		t.Run("foo.c as vendored", func(t *testing.T) { | ||||
| 			defer tests.PrintCurrentTest(t)() | ||||
| 
 | ||||
| 			repo, sha, f := prep(t, "foo.c linguist-vendored\n") | ||||
| 			defer f() | ||||
| 
 | ||||
| 			langs := getFreshLanguageStats(t, repo, sha) | ||||
| 			assert.Empty(t, langs) | ||||
| 		}) | ||||
| 	}) | ||||
| } | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue