mirror of
https://github.com/go-gitea/gitea.git
synced 2025-06-21 05:28:00 +02:00
Convert files to utf-8 for indexing (#7814)
* Convert files to utf-8 for indexing * Move utf8 functions to modules/base * Bump repoIndexerLatestVersion to 3 * Add tests for base/encoding.go * Changes to pass gosimple * Move UTF8 funcs into new modules/charset package
This commit is contained in:
@ -19,7 +19,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/modules/base"
|
||||
"code.gitea.io/gitea/modules/charset"
|
||||
"code.gitea.io/gitea/modules/git"
|
||||
"code.gitea.io/gitea/modules/highlight"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
@ -27,7 +27,7 @@ import (
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"github.com/Unknwon/com"
|
||||
"github.com/sergi/go-diff/diffmatchpatch"
|
||||
"golang.org/x/net/html/charset"
|
||||
stdcharset "golang.org/x/net/html/charset"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
@ -641,9 +641,9 @@ func ParsePatch(maxLines, maxLineCharacters, maxFiles int, reader io.Reader) (*D
|
||||
buf.WriteString("\n")
|
||||
}
|
||||
}
|
||||
charsetLabel, err := base.DetectEncoding(buf.Bytes())
|
||||
charsetLabel, err := charset.DetectEncoding(buf.Bytes())
|
||||
if charsetLabel != "UTF-8" && err == nil {
|
||||
encoding, _ := charset.Lookup(charsetLabel)
|
||||
encoding, _ := stdcharset.Lookup(charsetLabel)
|
||||
if encoding != nil {
|
||||
d := encoding.NewDecoder()
|
||||
for _, sec := range f.Sections {
|
||||
|
@ -10,6 +10,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/modules/base"
|
||||
"code.gitea.io/gitea/modules/charset"
|
||||
"code.gitea.io/gitea/modules/git"
|
||||
"code.gitea.io/gitea/modules/indexer"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
@ -207,6 +208,7 @@ func addUpdate(update fileUpdate, repo *Repository, batch rupture.FlushingBatch)
|
||||
if err != nil {
|
||||
return err
|
||||
} else if !base.IsTextFile(fileContents) {
|
||||
// FIXME: UTF-16 files will probably fail here
|
||||
return nil
|
||||
}
|
||||
indexerUpdate := indexer.RepoIndexerUpdate{
|
||||
@ -214,7 +216,7 @@ func addUpdate(update fileUpdate, repo *Repository, batch rupture.FlushingBatch)
|
||||
Op: indexer.RepoIndexerOpUpdate,
|
||||
Data: &indexer.RepoIndexerData{
|
||||
RepoID: repo.ID,
|
||||
Content: string(fileContents),
|
||||
Content: string(charset.ToUTF8DropErrors(fileContents)),
|
||||
},
|
||||
}
|
||||
return indexerUpdate.AddToFlushingBatch(batch)
|
||||
|
Reference in New Issue
Block a user