Add .gitattribute assisted language detection to blame, diff and render (#17590)

Use check attribute code to check the assigned language of a file and send that in to
chroma as a hint for the language of the file.

Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
zeripath 2021-11-17 20:37:00 +00:00 committed by GitHub
parent 81a4fc7528
commit 3c4724d70e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 223 additions and 97 deletions

View file

@ -31,7 +31,6 @@ import (
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
"github.com/sergi/go-diff/diffmatchpatch"
stdcharset "golang.org/x/net/html/charset"
@ -178,6 +177,7 @@ func getLineContent(content string) string {
// DiffSection represents a section of a DiffFile.
type DiffSection struct {
file *DiffFile
FileName string
Name string
Lines []*DiffLine
@ -546,6 +546,11 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) tem
diff2 string
)
language := ""
if diffSection.file != nil {
language = diffSection.file.Language
}
// try to find equivalent diff line. ignore, otherwise
switch diffLine.Type {
case DiffLineSection:
@ -553,25 +558,25 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) tem
case DiffLineAdd:
compareDiffLine = diffSection.GetLine(DiffLineDel, diffLine.RightIdx)
if compareDiffLine == nil {
return template.HTML(highlight.Code(diffSection.FileName, diffLine.Content[1:]))
return template.HTML(highlight.Code(diffSection.FileName, language, diffLine.Content[1:]))
}
diff1 = compareDiffLine.Content
diff2 = diffLine.Content
case DiffLineDel:
compareDiffLine = diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx)
if compareDiffLine == nil {
return template.HTML(highlight.Code(diffSection.FileName, diffLine.Content[1:]))
return template.HTML(highlight.Code(diffSection.FileName, language, diffLine.Content[1:]))
}
diff1 = diffLine.Content
diff2 = compareDiffLine.Content
default:
if strings.IndexByte(" +-", diffLine.Content[0]) > -1 {
return template.HTML(highlight.Code(diffSection.FileName, diffLine.Content[1:]))
return template.HTML(highlight.Code(diffSection.FileName, language, diffLine.Content[1:]))
}
return template.HTML(highlight.Code(diffSection.FileName, diffLine.Content))
return template.HTML(highlight.Code(diffSection.FileName, language, diffLine.Content))
}
diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, diff1[1:]), highlight.Code(diffSection.FileName, diff2[1:]), true)
diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, language, diff1[1:]), highlight.Code(diffSection.FileName, language, diff2[1:]), true)
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
return diffToHTML(diffSection.FileName, diffRecord, diffLine.Type)
@ -597,6 +602,7 @@ type DiffFile struct {
IsProtected bool
IsGenerated bool
IsVendored bool
Language string
}
// GetType returns type of diff file.
@ -1008,7 +1014,7 @@ func parseHunks(curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio
line := sb.String()
// Create a new section to represent this hunk
curSection = &DiffSection{}
curSection = &DiffSection{file: curFile}
lastLeftIdx = -1
curFile.Sections = append(curFile.Sections, curSection)
@ -1048,7 +1054,7 @@ func parseHunks(curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio
rightLine++
if curSection == nil {
// Create a new section to represent this hunk
curSection = &DiffSection{}
curSection = &DiffSection{file: curFile}
curFile.Sections = append(curFile.Sections, curSection)
lastLeftIdx = -1
}
@ -1074,7 +1080,7 @@ func parseHunks(curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio
}
if curSection == nil {
// Create a new section to represent this hunk
curSection = &DiffSection{}
curSection = &DiffSection{file: curFile}
curFile.Sections = append(curFile.Sections, curSection)
lastLeftIdx = -1
}
@ -1094,7 +1100,7 @@ func parseHunks(curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio
lastLeftIdx = -1
if curSection == nil {
// Create a new section to represent this hunk
curSection = &DiffSection{}
curSection = &DiffSection{file: curFile}
curFile.Sections = append(curFile.Sections, curSection)
}
curSection.Lines = append(curSection.Lines, diffLine)
@ -1302,23 +1308,15 @@ func GetDiffRangeWithWhitespaceBehavior(gitRepo *git.Repository, beforeCommitID,
var checker *git.CheckAttributeReader
if git.CheckGitVersionAtLeast("1.7.8") == nil {
indexFilename, deleteTemporaryFile, err := gitRepo.ReadTreeToTemporaryIndex(afterCommitID)
indexFilename, worktree, deleteTemporaryFile, err := gitRepo.ReadTreeToTemporaryIndex(afterCommitID)
if err == nil {
defer deleteTemporaryFile()
workdir, err := os.MkdirTemp("", "empty-work-dir")
if err != nil {
log.Error("Unable to create temporary directory: %v", err)
return nil, err
}
defer func() {
_ = util.RemoveAll(workdir)
}()
checker = &git.CheckAttributeReader{
Attributes: []string{"linguist-vendored", "linguist-generated"},
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language"},
Repo: gitRepo,
IndexFile: indexFilename,
WorkTree: workdir,
WorkTree: worktree,
}
ctx, cancel := context.WithCancel(git.DefaultContext)
if err := checker.Init(ctx); err != nil {
@ -1361,6 +1359,11 @@ func GetDiffRangeWithWhitespaceBehavior(gitRepo *git.Repository, beforeCommitID,
gotGenerated = generated == "false"
}
}
if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
diffFile.Language = language
} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
diffFile.Language = language
}
} else {
log.Error("Unexpected error: %v", err)
}