Add warning for BIDI characters in page renders and in diffs (#17562)
Fix #17514 Given the comments I've adjusted this somewhat. The numbers of characters detected are increased and include things like the use of U+300 to make à instead of à and non-breaking spaces. There is a button which can be used to escape the content to show it. Signed-off-by: Andrew Thornton <art27@cantab.net> Co-authored-by: Gwyneth Morgan <gwymor@tilde.club> Co-authored-by: silverwind <me@silverwind.io> Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>forgejo
parent
ee60f27aec
commit
21ed4fd8da
@ -0,0 +1,230 @@
|
||||
// Copyright 2021 The Gitea Authors. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package charset
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/unicode/bidi"
|
||||
)
|
||||
|
||||
// EscapeStatus represents the findings of the unicode escaper
|
||||
type EscapeStatus struct {
|
||||
Escaped bool
|
||||
HasError bool
|
||||
HasBadRunes bool
|
||||
HasControls bool
|
||||
HasSpaces bool
|
||||
HasMarks bool
|
||||
HasBIDI bool
|
||||
BadBIDI bool
|
||||
HasRTLScript bool
|
||||
HasLTRScript bool
|
||||
}
|
||||
|
||||
// Or combines two EscapeStatus structs into one representing the conjunction of the two
|
||||
func (status EscapeStatus) Or(other EscapeStatus) EscapeStatus {
|
||||
st := status
|
||||
st.Escaped = st.Escaped || other.Escaped
|
||||
st.HasError = st.HasError || other.HasError
|
||||
st.HasBadRunes = st.HasBadRunes || other.HasBadRunes
|
||||
st.HasControls = st.HasControls || other.HasControls
|
||||
st.HasSpaces = st.HasSpaces || other.HasSpaces
|
||||
st.HasMarks = st.HasMarks || other.HasMarks
|
||||
st.HasBIDI = st.HasBIDI || other.HasBIDI
|
||||
st.BadBIDI = st.BadBIDI || other.BadBIDI
|
||||
st.HasRTLScript = st.HasRTLScript || other.HasRTLScript
|
||||
st.HasLTRScript = st.HasLTRScript || other.HasLTRScript
|
||||
return st
|
||||
}
|
||||
|
||||
// EscapeControlString escapes the unicode control sequences in a provided string and returns the findings as an EscapeStatus and the escaped string
|
||||
func EscapeControlString(text string) (EscapeStatus, string) {
|
||||
sb := &strings.Builder{}
|
||||
escaped, _ := EscapeControlReader(strings.NewReader(text), sb)
|
||||
return escaped, sb.String()
|
||||
}
|
||||
|
||||
// EscapeControlBytes escapes the unicode control sequences a provided []byte and returns the findings as an EscapeStatus and the escaped []byte
|
||||
func EscapeControlBytes(text []byte) (EscapeStatus, []byte) {
|
||||
buf := &bytes.Buffer{}
|
||||
escaped, _ := EscapeControlReader(bytes.NewReader(text), buf)
|
||||
return escaped, buf.Bytes()
|
||||
}
|
||||
|
||||
// EscapeControlReader escapes the unicode control sequences a provided Reader writing the escaped output to the output and returns the findings as an EscapeStatus and an error
|
||||
func EscapeControlReader(text io.Reader, output io.Writer) (escaped EscapeStatus, err error) {
|
||||
buf := make([]byte, 4096)
|
||||
readStart := 0
|
||||
var n int
|
||||
var writePos int
|
||||
|
||||
lineHasBIDI := false
|
||||
lineHasRTLScript := false
|
||||
lineHasLTRScript := false
|
||||
|
||||
readingloop:
|
||||
for err == nil {
|
||||
n, err = text.Read(buf[readStart:])
|
||||
bs := buf[:n+readStart]
|
||||
i := 0
|
||||
|
||||
for i < len(bs) {
|
||||
r, size := utf8.DecodeRune(bs[i:])
|
||||
// Now handle the codepoints
|
||||
switch {
|
||||
case r == utf8.RuneError:
|
||||
if writePos < i {
|
||||
if _, err = output.Write(bs[writePos:i]); err != nil {
|
||||
escaped.HasError = true
|
||||
return
|
||||
}
|
||||
writePos = i
|
||||
}
|
||||
// runes can be at most 4 bytes - so...
|
||||
if len(bs)-i <= 3 {
|
||||
// if not request more data
|
||||
copy(buf, bs[i:])
|
||||
readStart = n - i
|
||||
writePos = 0
|
||||
continue readingloop
|
||||
}
|
||||
// this is a real broken rune
|
||||
escaped.HasBadRunes = true
|
||||
escaped.Escaped = true
|
||||
if err = writeBroken(output, bs[i:i+size]); err != nil {
|
||||
escaped.HasError = true
|
||||
return
|
||||
}
|
||||
writePos += size
|
||||
case r == '\n':
|
||||
if lineHasBIDI && !lineHasRTLScript && lineHasLTRScript {
|
||||
escaped.BadBIDI = true
|
||||
}
|
||||
lineHasBIDI = false
|
||||
lineHasRTLScript = false
|
||||
lineHasLTRScript = false
|
||||
|
||||
case r == '\r' || r == '\t' || r == ' ':
|
||||
// These are acceptable control characters and space characters
|
||||
case unicode.IsSpace(r):
|
||||
escaped.HasSpaces = true
|
||||
escaped.Escaped = true
|
||||
if writePos < i {
|
||||
if _, err = output.Write(bs[writePos:i]); err != nil {
|
||||
escaped.HasError = true
|
||||
return
|
||||
}
|
||||
}
|
||||
if err = writeEscaped(output, r); err != nil {
|
||||
escaped.HasError = true
|
||||
return
|
||||
}
|
||||
writePos = i + size
|
||||
case unicode.Is(unicode.Bidi_Control, r):
|
||||
escaped.Escaped = true
|
||||
escaped.HasBIDI = true
|
||||
if writePos < i {
|
||||
if _, err = output.Write(bs[writePos:i]); err != nil {
|
||||
escaped.HasError = true
|
||||
return
|
||||
}
|
||||
}
|
||||
lineHasBIDI = true
|
||||
if err = writeEscaped(output, r); err != nil {
|
||||
escaped.HasError = true
|
||||
return
|
||||
}
|
||||
writePos = i + size
|
||||
case unicode.Is(unicode.C, r):
|
||||
escaped.Escaped = true
|
||||
escaped.HasControls = true
|
||||
if writePos < i {
|
||||
if _, err = output.Write(bs[writePos:i]); err != nil {
|
||||
escaped.HasError = true
|
||||
return
|
||||
}
|
||||
}
|
||||
if err = writeEscaped(output, r); err != nil {
|
||||
escaped.HasError = true
|
||||
return
|
||||
}
|
||||
writePos = i + size
|
||||
case unicode.Is(unicode.M, r):
|
||||
escaped.Escaped = true
|
||||
escaped.HasMarks = true
|
||||
if writePos < i {
|
||||
if _, err = output.Write(bs[writePos:i]); err != nil {
|
||||
escaped.HasError = true
|
||||
return
|
||||
}
|
||||
}
|
||||
if err = writeEscaped(output, r); err != nil {
|
||||
escaped.HasError = true
|
||||
return
|
||||
}
|
||||
writePos = i + size
|
||||
default:
|
||||
p, _ := bidi.Lookup(bs[i : i+size])
|
||||
c := p.Class()
|
||||
if c == bidi.R || c == bidi.AL {
|
||||
lineHasRTLScript = true
|
||||
escaped.HasRTLScript = true
|
||||
} else if c == bidi.L {
|
||||
lineHasLTRScript = true
|
||||
escaped.HasLTRScript = true
|
||||
}
|
||||
}
|
||||
i += size
|
||||
}
|
||||
if n > 0 {
|
||||
// we read something...
|
||||
// write everything unwritten
|
||||
if writePos < i {
|
||||
if _, err = output.Write(bs[writePos:i]); err != nil {
|
||||
escaped.HasError = true
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// reset the starting positions for the next read
|
||||
readStart = 0
|
||||
writePos = 0
|
||||
}
|
||||
}
|
||||
if readStart > 0 {
|
||||
// this means that there is an incomplete or broken rune at 0-readStart and we read nothing on the last go round
|
||||
escaped.Escaped = true
|
||||
escaped.HasBadRunes = true
|
||||
if err = writeBroken(output, buf[:readStart]); err != nil {
|
||||
escaped.HasError = true
|
||||
return
|
||||
}
|
||||
}
|
||||
if err == io.EOF {
|
||||
if lineHasBIDI && !lineHasRTLScript && lineHasLTRScript {
|
||||
escaped.BadBIDI = true
|
||||
}
|
||||
err = nil
|
||||
return
|
||||
}
|
||||
escaped.HasError = true
|
||||
return
|
||||
}
|
||||
|
||||
func writeBroken(output io.Writer, bs []byte) (err error) {
|
||||
_, err = fmt.Fprintf(output, `<span class="broken-code-point"><%X></span>`, bs)
|
||||
return
|
||||
}
|
||||
|
||||
func writeEscaped(output io.Writer, r rune) (err error) {
|
||||
_, err = fmt.Fprintf(output, `<span class="escaped-code-point" data-escaped="[U+%04X]"><span class="char">%c</span></span>`, r, r)
|
||||
return
|
||||
}
|
@ -0,0 +1,17 @@
|
||||
{{if .EscapeStatus.BadBIDI}}
|
||||
<div class="ui error message unicode-escape-prompt">
|
||||
<span class="close icon hide-panel button" data-panel-closest=".message">{{svg "octicon-x" 16 "close inside"}}</span>
|
||||
<div class="header">
|
||||
{{$.root.i18n.Tr "repo.bidi_bad_header"}}
|
||||
</div>
|
||||
<p>{{$.root.i18n.Tr "repo.bidi_bad_description" | Str2html}}</p>
|
||||
</div>
|
||||
{{else if .EscapeStatus.Escaped}}
|
||||
<div class="ui warning message unicode-escape-prompt">
|
||||
<span class="close icon hide-panel button" data-panel-closest=".message">{{svg "octicon-x" 16 "close inside"}}</span>
|
||||
<div class="header">
|
||||
{{$.root.i18n.Tr "repo.unicode_header"}}
|
||||
</div>
|
||||
<p>{{$.root.i18n.Tr "repo.unicode_description" | Str2html}}</p>
|
||||
</div>
|
||||
{{end}}
|
@ -0,0 +1,28 @@
|
||||
export function initUnicodeEscapeButton() {
|
||||
$(document).on('click', 'a.escape-button', (e) => {
|
||||
e.preventDefault();
|
||||
$(e.target).parents('.file-content, .non-diff-file-content').find('.file-code, .file-view').addClass('unicode-escaped');
|
||||
$(e.target).hide();
|
||||
$(e.target).siblings('a.unescape-button').show();
|
||||
});
|
||||
$(document).on('click', 'a.unescape-button', (e) => {
|
||||
e.preventDefault();
|
||||
$(e.target).parents('.file-content, .non-diff-file-content').find('.file-code, .file-view').removeClass('unicode-escaped');
|
||||
$(e.target).hide();
|
||||
$(e.target).siblings('a.escape-button').show();
|
||||
});
|
||||
$(document).on('click', 'a.toggle-escape-button', (e) => {
|
||||
e.preventDefault();
|
||||
const fileContent = $(e.target).parents('.file-content, .non-diff-file-content');
|
||||
const fileView = fileContent.find('.file-code, .file-view');
|
||||
if (fileView.hasClass('unicode-escaped')) {
|
||||
fileView.removeClass('unicode-escaped');
|
||||
fileContent.find('a.unescape-button').hide();
|
||||
fileContent.find('a.escape-button').show();
|
||||
} else {
|
||||
fileView.addClass('unicode-escaped');
|
||||
fileContent.find('a.unescape-button').show();
|
||||
fileContent.find('a.escape-button').hide();
|
||||
}
|
||||
});
|
||||
}
|
Loading…
Reference in New Issue