Convert files to utf-8 for indexing (#7814)

* Convert files to utf-8 for indexing

* Move utf8 functions to modules/base

* Bump repoIndexerLatestVersion to 3

* Add tests for base/encoding.go

* Changes to pass gosimple

* Move UTF8 funcs into new modules/charset package
This commit is contained in:
guillep2k
2019-08-15 09:07:28 -03:00
committed by Lunny Xiao
parent c2c35d169c
commit 5a44be627c
13 changed files with 371 additions and 166 deletions

View File

@ -11,11 +11,11 @@ import (
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/templates"
)
const (
@ -251,7 +251,7 @@ func Diff(ctx *context.Context) {
note := &git.Note{}
err = git.GetNote(ctx.Repo.GitRepo, commitID, note)
if err == nil {
ctx.Data["Note"] = string(templates.ToUTF8WithFallback(note.Message))
ctx.Data["Note"] = string(charset.ToUTF8WithFallback(note.Message))
ctx.Data["NoteCommit"] = note.Commit
ctx.Data["NoteAuthor"] = models.ValidateCommitWithEmail(note.Commit)
}

View File

@ -14,12 +14,12 @@ import (
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/auth"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/repofiles"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/templates"
"code.gitea.io/gitea/modules/upload"
"code.gitea.io/gitea/modules/util"
)
@ -118,7 +118,7 @@ func editFile(ctx *context.Context, isNewFile bool) {
d, _ := ioutil.ReadAll(dataRc)
buf = append(buf, d...)
if content, err := templates.ToUTF8WithErr(buf); err != nil {
if content, err := charset.ToUTF8WithErr(buf); err != nil {
log.Error("ToUTF8WithErr: %v", err)
ctx.Data["FileContent"] = string(buf)
} else {

View File

@ -16,6 +16,7 @@ import (
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/highlight"
@ -23,7 +24,6 @@ import (
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/templates"
)
const (
@ -160,7 +160,7 @@ func renderDirectory(ctx *context.Context, treeLink string) {
ctx.Data["FileSize"] = fileSize
} else {
d, _ := ioutil.ReadAll(dataRc)
buf = templates.ToUTF8WithFallback(append(buf, d...))
buf = charset.ToUTF8WithFallback(append(buf, d...))
if markup.Type(readmeFile.Name()) != "" {
ctx.Data["IsMarkup"] = true
@ -278,7 +278,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
}
d, _ := ioutil.ReadAll(dataRc)
buf = templates.ToUTF8WithFallback(append(buf, d...))
buf = charset.ToUTF8WithFallback(append(buf, d...))
readmeExist := markup.IsReadmeFile(blob.Name())
ctx.Data["ReadmeExist"] = readmeExist
@ -293,7 +293,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
} else {
// Building code view blocks with line number on server side.
var fileContent string
if content, err := templates.ToUTF8WithErr(buf); err != nil {
if content, err := charset.ToUTF8WithErr(buf); err != nil {
log.Error("ToUTF8WithErr: %v", err)
fileContent = string(buf)
} else {