Fix broken spans in diffs (#14678)

Gitea runs diff on highlighted code fragment for each line in order to provide
code highlight diffs. Unfortunately this diff algorithm is not aware that span tags
and entities are atomic and cannot be split.

The current fixup code makes some attempt to fix these broken tags however, it cannot
handle situations where a tag is split over multiple blocks.

This PR provides a more algorithmic fixup mechanism whereby spans and entities are
completely coalesced into their respective blocks.

This may result in a incompletely reduced diff but - it will definitely prevent the
broken entities and spans that are currently possible.

As a result of this fixup several inconsistencies were discovered in our testcases
and these were also fixed.

Fix #14231

Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
zeripath 2021-02-14 14:51:00 +00:00 committed by GitHub
parent f3847c9d82
commit beb2058186
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 228 additions and 6 deletions

View file

@ -15,6 +15,7 @@ import (
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/highlight"
"code.gitea.io/gitea/modules/setting"
dmp "github.com/sergi/go-diff/diffmatchpatch"
"github.com/stretchr/testify/assert"
@ -23,7 +24,7 @@ import (
func assertEqual(t *testing.T, s1 string, s2 template.HTML) {
if s1 != string(s2) {
t.Errorf("%s should be equal %s", s2, s1)
t.Errorf("Did not receive expected results:\nExpected: %s\nActual: %s", s1, s2)
}
}
@ -61,7 +62,7 @@ func TestDiffToHTML(t *testing.T) {
{Type: dmp.DiffEqual, Text: "</span><span class=\"p\">)</span>"},
}, DiffLineDel))
assertEqual(t, "<span class=\"nx\">r</span><span class=\"p\">.</span><span class=\"nf\">WrapperRenderer</span><span class=\"p\">(</span><span class=\"nx\">w</span><span class=\"p\">,</span> <span class=\"removed-code\"><span class=\"nx\">language</span></span><span class=\"removed-code\"><span class=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs</span></span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{
assertEqual(t, "<span class=\"nx\">r</span><span class=\"p\">.</span><span class=\"nf\">WrapperRenderer</span><span class=\"p\">(</span><span class=\"nx\">w</span><span class=\"p\">,</span> <span class=\"removed-code\"><span class=\"nx\">language</span><span class=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs</span></span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{
{Type: dmp.DiffEqual, Text: "<span class=\"nx\">r</span><span class=\"p\">.</span><span class=\"nf\">WrapperRenderer</span><span class=\"p\">(</span><span class=\"nx\">w</span><span class=\"p\">,</span> <span class=\"nx\">"},
{Type: dmp.DiffDelete, Text: "language</span><span "},
{Type: dmp.DiffEqual, Text: "c"},
@ -69,14 +70,14 @@ func TestDiffToHTML(t *testing.T) {
{Type: dmp.DiffEqual, Text: "</span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>"},
}, DiffLineDel))
assertEqual(t, "<span class=\"added-code\">language</span></span><span class=\"added-code\"><span class=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs</span></span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{
assertEqual(t, "<span class=\"added-code\">language</span><span class=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs</span></span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{
{Type: dmp.DiffInsert, Text: "language</span><span "},
{Type: dmp.DiffEqual, Text: "c"},
{Type: dmp.DiffInsert, Text: "lass=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs"},
{Type: dmp.DiffEqual, Text: "</span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>"},
}, DiffLineAdd))
assertEqual(t, "<span class=\"k\">print</span><span class=\"added-code\"></span><span class=\"added-code\"><span class=\"p\">(</span></span><span class=\"sa\"></span><span class=\"s2\">&#34;</span><span class=\"s2\">// </span><span class=\"s2\">&#34;</span><span class=\"p\">,</span> <span class=\"n\">sys</span><span class=\"o\">.</span><span class=\"n\">argv</span><span class=\"added-code\"><span class=\"p\">)</span></span>", diffToHTML("", []dmp.Diff{
assertEqual(t, "<span class=\"k\">print</span><span class=\"added-code\"><span class=\"p\">(</span></span><span class=\"sa\"></span><span class=\"s2\">&#34;</span><span class=\"s2\">// </span><span class=\"s2\">&#34;</span><span class=\"p\">,</span> <span class=\"n\">sys</span><span class=\"o\">.</span><span class=\"n\">argv</span><span class=\"added-code\"><span class=\"p\">)</span></span>", diffToHTML("", []dmp.Diff{
{Type: dmp.DiffEqual, Text: "<span class=\"k\">print</span>"},
{Type: dmp.DiffInsert, Text: "<span"},
{Type: dmp.DiffEqual, Text: " "},
@ -85,14 +86,14 @@ func TestDiffToHTML(t *testing.T) {
{Type: dmp.DiffInsert, Text: "<span class=\"p\">)</span>"},
}, DiffLineAdd))
assertEqual(t, "sh <span class=\"added-code\">&#39;useradd -u $(stat -c &#34;%u&#34; .gitignore) jenkins</span>&#39;", diffToHTML("", []dmp.Diff{
assertEqual(t, "sh <span class=\"added-code\">&#39;useradd -u $(stat -c &#34;%u&#34; .gitignore) jenkins&#39;</span>", diffToHTML("", []dmp.Diff{
{Type: dmp.DiffEqual, Text: "sh &#3"},
{Type: dmp.DiffDelete, Text: "4;useradd -u 111 jenkins&#34"},
{Type: dmp.DiffInsert, Text: "9;useradd -u $(stat -c &#34;%u&#34; .gitignore) jenkins&#39"},
{Type: dmp.DiffEqual, Text: ";"},
}, DiffLineAdd))
assertEqual(t, "<span class=\"x\"> &lt;h<span class=\"added-code\">4 class=</span><span class=\"added-code\">&#34;release-list-title df ac&#34;</span>&gt;</span>", diffToHTML("", []dmp.Diff{
assertEqual(t, "<span class=\"x\"> &lt;h<span class=\"added-code\">4 class=&#34;release-list-title df ac&#34;</span>&gt;</span>", diffToHTML("", []dmp.Diff{
{Type: dmp.DiffEqual, Text: "<span class=\"x\"> &lt;h"},
{Type: dmp.DiffInsert, Text: "4 class=&#"},
{Type: dmp.DiffEqual, Text: "3"},
@ -462,3 +463,14 @@ func TestGetDiffRangeWithWhitespaceBehavior(t *testing.T) {
}
}
}
func TestDiffToHTML_14231(t *testing.T) {
setting.Cfg = ini.Empty()
diffRecord := diffMatchPatch.DiffMain(highlight.Code("main.v", " run()\n"), highlight.Code("main.v", " run(db)\n"), true)
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
expected := ` <span class="n">run</span><span class="added-code"><span class="o">(</span><span class="n">db</span></span><span class="o">)</span>`
output := diffToHTML("main.v", diffRecord, DiffLineAdd)
assertEqual(t, expected, output)
}