fix: more permissive markup commit hash detection (#6784)

This allows many more variants of commit hashes to be detected and interpreted as link if they are enclosed by up to two different non-word/non-digit characters. I also had in mind RTL languages, where the question mark and similar symbols are on the left of the commit hash.

Resolves #6771

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/6784
Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org>
Reviewed-by: Gusted <gusted@noreply.codeberg.org>
Co-authored-by: Robert Wolff <mahlzahn@posteo.de>
Co-committed-by: Robert Wolff <mahlzahn@posteo.de>
This commit is contained in:
Robert Wolff 2025-02-05 09:04:19 +00:00 committed by Gusted
parent 499497c959
commit 519169ee7b
3 changed files with 17 additions and 5 deletions

View file

@ -1,4 +1,5 @@
// Copyright 2018 The Gitea Authors. All rights reserved.
// Copyright 2025 The Forgejo Authors.
// SPDX-License-Identifier: MIT
package markup
@ -391,7 +392,7 @@ func TestRender_FullIssueURLs(t *testing.T) {
`<a href="http://localhost:3000/testOrg/testOrgRepo/pulls/2/commits" class="ref-issue">testOrg/testOrgRepo#2/commits</a>`)
}
func TestRegExp_sha1CurrentPattern(t *testing.T) {
func TestRegExp_hashCurrentPattern(t *testing.T) {
trueTestCases := []string{
"d8a994ef243349f321568f9e36d5c3f444b99cae",
"abcdefabcdefabcdefabcdefabcdefabcdefabcd",
@ -399,6 +400,13 @@ func TestRegExp_sha1CurrentPattern(t *testing.T) {
"[abcdefabcdefabcdefabcdefabcdefabcdefabcd]",
"abcdefabcdefabcdefabcdefabcdefabcdefabcd.",
"abcdefabcdefabcdefabcdefabcdefabcdefabcd:",
"d8a994ef243349f321568f9e36d5c3f444b99cae12424fa123391042fbae2319",
"abcdefd?",
"abcdefd!",
"!abcd3ef",
":abcd3ef",
".abcd3ef",
" (abcd3ef). ",
}
falseTestCases := []string{
"test",
@ -406,6 +414,8 @@ func TestRegExp_sha1CurrentPattern(t *testing.T) {
"e59ff077-2d03-4e6b-964d-63fbaea81f",
"abcdefghijklmnopqrstuvwxyzabcdefghijklmn",
"abcdefghijklmnopqrstuvwxyzabcdefghijklmO",
"commit/abcdefd",
"abcd3ef...defabcd",
}
for _, testCase := range trueTestCases {