Add Gist code search (#194)

This commit is contained in:
Thomas Miceli
2024-01-04 03:38:15 +01:00
parent 4cb7dc2d30
commit 87a6113cc7
25 changed files with 734 additions and 87 deletions

View File

@ -5,6 +5,7 @@ import (
"github.com/thomiceli/opengist/internal/config"
"github.com/thomiceli/opengist/internal/db"
"github.com/thomiceli/opengist/internal/git"
"github.com/thomiceli/opengist/internal/index"
"os"
"path/filepath"
"strings"
@ -21,6 +22,7 @@ const (
GitGcRepos = iota
SyncGistPreviews = iota
ResetHooks = iota
IndexGists = iota
)
var (
@ -69,6 +71,8 @@ func Run(actionType int) {
functionToRun = syncGistPreviews
case ResetHooks:
functionToRun = resetHooks
case IndexGists:
functionToRun = indexGists
default:
panic("unhandled default case")
}
@ -88,7 +92,6 @@ func syncReposFromFS() {
if _, err := os.Stat(git.RepositoryPath(gist.User.Username, gist.Uuid)); err != nil && !os.IsExist(err) {
if err2 := gist.Delete(); err2 != nil {
log.Error().Err(err2).Msgf("Cannot delete gist %d", gist.ID)
return
}
}
}
@ -109,7 +112,6 @@ func syncReposFromDB() {
if gist.ID == 0 {
if err := git.DeleteRepository(path[len(path)-2], path[len(path)-1]); err != nil {
log.Error().Err(err).Msgf("Cannot delete repository %s/%s", path[len(path)-2], path[len(path)-1])
return
}
}
}
@ -133,7 +135,6 @@ func syncGistPreviews() {
for _, gist := range gists {
if err = gist.UpdatePreviewAndCount(false); err != nil {
log.Error().Err(err).Msgf("Cannot update preview and count for gist %d", gist.ID)
return
}
}
}
@ -150,7 +151,27 @@ func resetHooks() {
path := strings.Split(e, string(os.PathSeparator))
if err := git.CreateDotGitFiles(path[len(path)-2], path[len(path)-1]); err != nil {
log.Error().Err(err).Msgf("Cannot reset hooks for repository %s/%s", path[len(path)-2], path[len(path)-1])
return
}
}
}
func indexGists() {
log.Info().Msg("Indexing all Gists...")
gists, err := db.GetAllGistsRows()
if err != nil {
log.Error().Err(err).Msg("Cannot get gists")
return
}
for _, gist := range gists {
log.Info().Msgf("Indexing gist %d", gist.ID)
indexedGist, err := gist.ToIndexedGist()
if err != nil {
log.Error().Err(err).Msgf("Cannot convert gist %d to indexed gist", gist.ID)
continue
}
if err = index.AddInIndex(indexedGist); err != nil {
log.Error().Err(err).Msgf("Cannot index gist %d", gist.ID)
}
}
}

View File

@ -28,6 +28,8 @@ type config struct {
ExternalUrl string `yaml:"external-url" env:"OG_EXTERNAL_URL"`
OpengistHome string `yaml:"opengist-home" env:"OG_OPENGIST_HOME"`
DBFilename string `yaml:"db-filename" env:"OG_DB_FILENAME"`
IndexEnabled bool `yaml:"index.enabled" env:"OG_INDEX_ENABLED"`
IndexDirname string `yaml:"index.dirname" env:"OG_INDEX_DIRNAME"`
GitDefaultBranch string `yaml:"git.default-branch" env:"OG_GIT_DEFAULT_BRANCH"`
@ -66,6 +68,8 @@ func configWithDefaults() (*config, error) {
c.LogOutput = "stdout,file"
c.OpengistHome = ""
c.DBFilename = "opengist.db"
c.IndexEnabled = true
c.IndexDirname = "opengist.index"
c.SqliteJournalMode = "WAL"

View File

@ -2,9 +2,14 @@ package db
import (
"fmt"
"github.com/alecthomas/chroma/v2"
"github.com/alecthomas/chroma/v2/lexers"
"github.com/dustin/go-humanize"
"github.com/labstack/echo/v4"
"github.com/rs/zerolog/log"
"github.com/thomiceli/opengist/internal/index"
"os/exec"
"path/filepath"
"strings"
"time"
@ -213,6 +218,25 @@ func GetAllGistsRows() ([]*Gist, error) {
return gists, err
}
func GetAllGistsVisibleByUser(userId uint) ([]uint, error) {
var gists []uint
err := db.Table("gists").
Where("gists.private = 0 or gists.user_id = ?", userId).
Pluck("gists.id", &gists).Error
return gists, err
}
func GetAllGistsByIds(ids []uint) ([]*Gist, error) {
var gists []*Gist
err := db.Preload("User").Preload("Forked.User").
Where("id in ?", ids).
Find(&gists).Error
return gists, err
}
func (gist *Gist) Create() error {
// avoids foreign key constraint error because the default value in the struct is 0
return db.Omit("forked_id").Create(&gist).Error
@ -361,6 +385,10 @@ func (gist *Gist) File(revision string, filename string, truncate bool) (*git.Fi
}, err
}
func (gist *Gist) FileNames(revision string) ([]string, error) {
return git.GetFilesOfRepository(gist.User.Username, gist.Uuid, revision)
}
func (gist *Gist) Log(skip int) ([]*git.Commit, error) {
return git.GetLog(gist.User.Username, gist.Uuid, skip)
}
@ -475,6 +503,30 @@ func (gist *Gist) Identifier() string {
return gist.Uuid
}
func (gist *Gist) GetLanguagesFromFiles() ([]string, error) {
files, err := gist.Files("HEAD", true)
if err != nil {
return nil, err
}
languages := make([]string, 0, len(files))
for _, file := range files {
var lexer chroma.Lexer
if lexer = lexers.Get(file.Filename); lexer == nil {
lexer = lexers.Fallback
}
fileType := lexer.Config().Name
if lexer.Config().Name == "fallback" || lexer.Config().Name == "plaintext" {
fileType = "Text"
}
languages = append(languages, fileType)
}
return languages, nil
}
// -- DTO -- //
type GistDTO struct {
@ -507,3 +559,74 @@ func (dto *GistDTO) ToExistingGist(gist *Gist) *Gist {
gist.URL = dto.URL
return gist
}
// -- Index -- //
func (gist *Gist) ToIndexedGist() (*index.Gist, error) {
files, err := gist.Files("HEAD", true)
if err != nil {
return nil, err
}
exts := make([]string, 0, len(files))
wholeContent := ""
for _, file := range files {
wholeContent += file.Content
exts = append(exts, filepath.Ext(file.Filename))
}
fileNames, err := gist.FileNames("HEAD")
if err != nil {
return nil, err
}
langs, err := gist.GetLanguagesFromFiles()
if err != nil {
return nil, err
}
indexedGist := &index.Gist{
GistID: gist.ID,
Username: gist.User.Username,
Title: gist.Title,
Content: wholeContent,
Filenames: fileNames,
Extensions: exts,
Languages: langs,
CreatedAt: gist.CreatedAt,
UpdatedAt: gist.UpdatedAt,
}
return indexedGist, nil
}
func (gist *Gist) AddInIndex() {
if !index.Enabled() {
return
}
go func() {
indexedGist, err := gist.ToIndexedGist()
if err != nil {
log.Error().Err(err).Msgf("Cannot convert gist %d to indexed gist", gist.ID)
return
}
err = index.AddInIndex(indexedGist)
if err != nil {
log.Error().Err(err).Msgf("Error adding gist %d to index", gist.ID)
}
}()
}
func (gist *Gist) RemoveFromIndex() {
if !index.Enabled() {
return
}
go func() {
err := index.RemoveFromIndex(gist.ID)
if err != nil {
log.Error().Err(err).Msgf("Error remove gist %d from index", gist.ID)
}
}()
}

View File

@ -68,6 +68,14 @@ gist.list.files: files
gist.list.last-active: Last active
gist.list.no-gists: No gists
gist.search.found: gists found
gist.search.no-results: No gists found
gist.search.help.user: gists created by user
gist.search.help.title: gists with given title
gist.search.help.filename: gists having files with given name
gist.search.help.extension: gists having files with given extension
gist.search.help.language: gists having files with given language
gist.forks: Forks
gist.forks.view: View fork
gist.forks.no: No public forks
@ -164,6 +172,7 @@ admin.actions.sync-db: Synchronize gists from database
admin.actions.git-gc: Garbage collect all git repositories
admin.actions.sync-previews: Synchronize all gists previews
admin.actions.reset-hooks: Reset Git server hooks for all repositories
admin.actions.index-gists: Index all gists
admin.id: ID
admin.user: User
admin.delete: Delete

157
internal/index/bleve.go Normal file
View File

@ -0,0 +1,157 @@
package index
import (
"errors"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
"github.com/blevesearch/bleve/v2/analysis/token/camelcase"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
"github.com/blevesearch/bleve/v2/search/query"
"github.com/thomiceli/opengist/internal/config"
"strconv"
)
var bleveIndex bleve.Index
func Enabled() bool {
return config.C.IndexEnabled
}
func Open(indexFilename string) error {
var err error
bleveIndex, err = bleve.Open(indexFilename)
if err == nil {
return nil
}
if !errors.Is(err, bleve.ErrorIndexPathDoesNotExist) {
return err
}
docMapping := bleve.NewDocumentMapping()
docMapping.AddFieldMappingsAt("GistID", bleve.NewNumericFieldMapping())
docMapping.AddFieldMappingsAt("Content", bleve.NewTextFieldMapping())
mapping := bleve.NewIndexMapping()
if err = mapping.AddCustomTokenFilter("unicodeNormalize", map[string]any{
"type": unicodenorm.Name,
"form": unicodenorm.NFC,
}); err != nil {
return err
}
if err = mapping.AddCustomAnalyzer("gistAnalyser", map[string]interface{}{
"type": custom.Name,
"char_filters": []string{},
"tokenizer": unicode.Name,
"token_filters": []string{"unicodeNormalize", camelcase.Name, lowercase.Name},
}); err != nil {
return err
}
docMapping.DefaultAnalyzer = "gistAnalyser"
bleveIndex, err = bleve.New(indexFilename, mapping)
return err
}
func Close() error {
return bleveIndex.Close()
}
func AddInIndex(gist *Gist) error {
if !Enabled() {
return nil
}
if gist == nil {
return errors.New("failed to add nil gist to index")
}
return bleveIndex.Index(strconv.Itoa(int(gist.GistID)), gist)
}
func RemoveFromIndex(gistID uint) error {
if !Enabled() {
return nil
}
return bleveIndex.Delete(strconv.Itoa(int(gistID)))
}
func SearchGists(queryStr string, queryMetadata SearchGistMetadata, gistsIds []uint, page int) ([]uint, uint64, map[string]int, error) {
if !Enabled() {
return nil, 0, nil, nil
}
var err error
var indexerQuery query.Query
if queryStr != "" {
contentQuery := bleve.NewMatchPhraseQuery(queryStr)
contentQuery.FieldVal = "Content"
indexerQuery = contentQuery
} else {
contentQuery := bleve.NewMatchAllQuery()
indexerQuery = contentQuery
}
if len(gistsIds) > 0 {
repoQueries := make([]query.Query, 0, len(gistsIds))
truee := true
for _, id := range gistsIds {
f := float64(id)
qq := bleve.NewNumericRangeInclusiveQuery(&f, &f, &truee, &truee)
qq.SetField("GistID")
repoQueries = append(repoQueries, qq)
}
indexerQuery = bleve.NewConjunctionQuery(bleve.NewDisjunctionQuery(repoQueries...), indexerQuery)
}
addQuery := func(field, value string) {
if value != "" && value != "." {
q := bleve.NewMatchPhraseQuery(value)
q.FieldVal = field
indexerQuery = bleve.NewConjunctionQuery(indexerQuery, q)
}
}
addQuery("Username", queryMetadata.Username)
addQuery("Title", queryMetadata.Title)
addQuery("Extensions", "."+queryMetadata.Extension)
addQuery("Filenames", queryMetadata.Filename)
addQuery("Languages", queryMetadata.Language)
languageFacet := bleve.NewFacetRequest("Languages", 10)
perPage := 10
offset := (page - 1) * perPage
s := bleve.NewSearchRequestOptions(indexerQuery, perPage, offset, false)
s.AddFacet("languageFacet", languageFacet)
s.Fields = []string{"GistID"}
s.IncludeLocations = false
results, err := bleveIndex.Search(s)
if err != nil {
return nil, 0, nil, err
}
gistIds := make([]uint, 0, len(results.Hits))
for _, hit := range results.Hits {
gistIds = append(gistIds, uint(hit.Fields["GistID"].(float64)))
}
languageCounts := make(map[string]int)
if facets, found := results.Facets["languageFacet"]; found {
for _, term := range facets.Terms.Terms() {
languageCounts[term.Term] = term.Count
}
}
return gistIds, results.Total, languageCounts, nil
}

21
internal/index/gist.go Normal file
View File

@ -0,0 +1,21 @@
package index
type Gist struct {
GistID uint
Username string
Title string
Content string
Filenames []string
Extensions []string
Languages []string
CreatedAt int64
UpdatedAt int64
}
type SearchGistMetadata struct {
Username string
Title string
Filename string
Extension string
Language string
}

View File

@ -95,6 +95,7 @@ func runGitCommand(ch ssh.Channel, gitCmd string, key string, ip string) error {
if verb == "receive-pack" {
_ = gist.SetLastActiveNow()
_ = gist.UpdatePreviewAndCount(false)
gist.AddInIndex()
}
return nil

View File

@ -46,6 +46,7 @@ func adminIndex(ctx echo.Context) error {
setData(ctx, "gitGcRepos", actions.IsRunning(actions.GitGcRepos))
setData(ctx, "syncGistPreviews", actions.IsRunning(actions.SyncGistPreviews))
setData(ctx, "resetHooks", actions.IsRunning(actions.ResetHooks))
setData(ctx, "indexGists", actions.IsRunning(actions.IndexGists))
return html(ctx, "admin_index.html")
}
@ -116,6 +117,8 @@ func adminGistDelete(ctx echo.Context) error {
return errorRes(500, "Cannot delete this gist", err)
}
gist.RemoveFromIndex()
addFlash(ctx, "Gist has been deleted", "success")
return redirect(ctx, "/admin-panel/gists")
}
@ -150,6 +153,12 @@ func adminResetHooks(ctx echo.Context) error {
return redirect(ctx, "/admin-panel")
}
func adminIndexGists(ctx echo.Context) error {
addFlash(ctx, "Indexing all gists...", "success")
go actions.Run(actions.IndexGists)
return redirect(ctx, "/admin-panel")
}
func adminConfig(ctx echo.Context) error {
setData(ctx, "title", "Configuration")
setData(ctx, "htmlTitle", "Configuration - Admin panel")

View File

@ -8,6 +8,7 @@ import (
"fmt"
"github.com/rs/zerolog/log"
"github.com/thomiceli/opengist/internal/git"
"github.com/thomiceli/opengist/internal/index"
"github.com/thomiceli/opengist/internal/render"
"html/template"
"net/url"
@ -252,20 +253,20 @@ func allGists(ctx echo.Context) error {
}
}
renderedFiles := make([]*render.RenderedGist, 0, len(gists))
renderedGists := make([]*render.RenderedGist, 0, len(gists))
for _, gist := range gists {
rendered, err := render.HighlightGistPreview(gist)
if err != nil {
log.Warn().Err(err).Msg("Error rendering gist preview for " + gist.Identifier() + " - " + gist.PreviewFilename)
log.Error().Err(err).Msg("Error rendering gist preview for " + gist.Identifier() + " - " + gist.PreviewFilename)
}
renderedFiles = append(renderedFiles, &rendered)
renderedGists = append(renderedGists, &rendered)
}
if err != nil {
return errorRes(500, "Error fetching gists", err)
}
if err = paginate(ctx, renderedFiles, pageInt, 10, "gists", fromUserStr, 2, "&sort="+sort+"&order="+order); err != nil {
if err = paginate(ctx, renderedGists, pageInt, 10, "gists", fromUserStr, 2, "&sort="+sort+"&order="+order); err != nil {
return errorRes(404, "Page not found", nil)
}
@ -273,6 +274,69 @@ func allGists(ctx echo.Context) error {
return html(ctx, "all.html")
}
func search(ctx echo.Context) error {
var err error
content, meta := parseSearchQueryStr(ctx.QueryParam("q"))
pageInt := getPage(ctx)
var currentUserId uint
userLogged := getUserLogged(ctx)
if userLogged != nil {
currentUserId = userLogged.ID
} else {
currentUserId = 0
}
var visibleGistsIds []uint
visibleGistsIds, err = db.GetAllGistsVisibleByUser(currentUserId)
if err != nil {
return errorRes(500, "Error fetching gists", err)
}
gistsIds, nbHits, langs, err := index.SearchGists(content, index.SearchGistMetadata{
Username: meta["user"],
Title: meta["title"],
Filename: meta["filename"],
Extension: meta["extension"],
Language: meta["language"],
}, visibleGistsIds, pageInt)
if err != nil {
return errorRes(500, "Error searching gists", err)
}
gists, err := db.GetAllGistsByIds(gistsIds)
if err != nil {
return errorRes(500, "Error fetching gists", err)
}
renderedGists := make([]*render.RenderedGist, 0, len(gists))
for _, gist := range gists {
rendered, err := render.HighlightGistPreview(gist)
if err != nil {
log.Error().Err(err).Msg("Error rendering gist preview for " + gist.Identifier() + " - " + gist.PreviewFilename)
}
renderedGists = append(renderedGists, &rendered)
}
if pageInt > 1 && len(renderedGists) != 0 {
setData(ctx, "prevPage", pageInt-1)
}
if 10*pageInt < int(nbHits) {
setData(ctx, "nextPage", pageInt+1)
}
setData(ctx, "prevLabel", tr(ctx, "pagination.previous"))
setData(ctx, "nextLabel", tr(ctx, "pagination.next"))
setData(ctx, "urlPage", "search")
setData(ctx, "urlParams", template.URL("&q="+ctx.QueryParam("q")))
setData(ctx, "htmlTitle", "Search results")
setData(ctx, "nbHits", nbHits)
setData(ctx, "gists", renderedGists)
setData(ctx, "langs", langs)
setData(ctx, "searchQuery", ctx.QueryParam("q"))
return html(ctx, "search.html")
}
func gistIndex(ctx echo.Context) error {
if getData(ctx, "gistpage") == "js" {
return gistJs(ctx)
@ -545,6 +609,8 @@ func processCreate(ctx echo.Context) error {
}
}
gist.AddInIndex()
return redirect(ctx, "/"+user.Username+"/"+gist.Identifier())
}
@ -566,6 +632,7 @@ func deleteGist(ctx echo.Context) error {
if err := gist.Delete(); err != nil {
return errorRes(500, "Error deleting this gist", err)
}
gist.RemoveFromIndex()
addFlash(ctx, "Gist has been deleted", "success")
return redirect(ctx, "/")

View File

@ -218,6 +218,7 @@ func pack(ctx echo.Context, serviceType string) error {
_ = gist.SetLastActiveNow()
_ = gist.UpdatePreviewAndCount(false)
gist.AddInIndex()
}
return nil
}

View File

@ -3,7 +3,9 @@ package web
import (
"context"
"encoding/json"
"errors"
"fmt"
"github.com/thomiceli/opengist/internal/index"
htmlpkg "html"
"html/template"
"io"
@ -115,6 +117,22 @@ var (
"safe": func(s string) template.HTML {
return template.HTML(s)
},
"dict": func(values ...interface{}) (map[string]interface{}, error) {
if len(values)%2 != 0 {
return nil, errors.New("invalid dict call")
}
dict := make(map[string]interface{})
for i := 0; i < len(values); i += 2 {
key, ok := values[i].(string)
if !ok {
return nil, errors.New("dict keys must be strings")
}
dict[key] = values[i+1]
}
return dict, nil
},
"addMetadataToSearchQuery": addMetadataToSearchQuery,
"indexEnabled": index.Enabled,
}
)
@ -223,7 +241,6 @@ func NewServer(isDev bool) *Server {
g1.DELETE("/settings/ssh-keys/:id", sshKeysDelete, logged)
g1.PUT("/settings/password", passwordProcess, logged)
g1.PUT("/settings/username", usernameProcess, logged)
g2 := g1.Group("/admin-panel")
{
g2.Use(adminPermission)
@ -237,6 +254,7 @@ func NewServer(isDev bool) *Server {
g2.POST("/gc-repos", adminGcRepos)
g2.POST("/sync-previews", adminSyncGistPreviews)
g2.POST("/reset-hooks", adminResetHooks)
g2.POST("/index-gists", adminIndexGists)
g2.GET("/configuration", adminConfig)
g2.PUT("/set-config", adminSetConfig)
}
@ -246,7 +264,13 @@ func NewServer(isDev bool) *Server {
}
g1.GET("/all", allGists, checkRequireLogin)
g1.GET("/search", allGists, checkRequireLogin)
if index.Enabled() {
g1.GET("/search", search, checkRequireLogin)
} else {
g1.GET("/search", allGists, checkRequireLogin)
}
g1.GET("/:user", allGists, checkRequireLogin)
g1.GET("/:user/liked", allGists, checkRequireLogin)
g1.GET("/:user/forked", allGists, checkRequireLogin)

View File

@ -1,11 +1,10 @@
package test
import (
"testing"
"github.com/stretchr/testify/require"
"github.com/thomiceli/opengist/internal/db"
"github.com/thomiceli/opengist/internal/git"
"testing"
)
func TestGists(t *testing.T) {

View File

@ -133,14 +133,13 @@ func setup(t *testing.T) {
git.ReposDirectory = path.Join("tests")
config.C.IndexEnabled = false
config.C.LogLevel = "debug"
config.InitLog()
homePath := config.GetHomeDir()
log.Info().Msg("Data directory: " + homePath)
err = os.MkdirAll(filepath.Join(homePath, "repos"), 0755)
require.NoError(t, err, "Could not create repos directory")
err = os.MkdirAll(filepath.Join(homePath, "tmp", "repos"), 0755)
require.NoError(t, err, "Could not create tmp repos directory")
@ -149,6 +148,9 @@ func setup(t *testing.T) {
err = memdb.Setup()
require.NoError(t, err, "Could not initialize in memory database")
// err = index.Open(filepath.Join(homePath, "testsindex", "opengist.index"))
// require.NoError(t, err, "Could not open index")
}
func teardown(t *testing.T, s *testServer) {
@ -159,4 +161,10 @@ func teardown(t *testing.T, s *testServer) {
err = os.RemoveAll(path.Join(config.C.OpengistHome, "tests"))
require.NoError(t, err, "Could not remove repos directory")
// err = os.RemoveAll(path.Join(config.C.OpengistHome, "testsindex"))
// require.NoError(t, err, "Could not remove repos directory")
// err = index.Close()
// require.NoError(t, err, "Could not close index")
}

View File

@ -248,6 +248,46 @@ func tr(ctx echo.Context, key string) template.HTML {
return l.Tr(key)
}
func parseSearchQueryStr(query string) (string, map[string]string) {
words := strings.Fields(query)
metadata := make(map[string]string)
var contentBuilder strings.Builder
for _, word := range words {
if strings.Contains(word, ":") {
keyValue := strings.SplitN(word, ":", 2)
if len(keyValue) == 2 {
key := keyValue[0]
value := keyValue[1]
metadata[key] = value
}
} else {
contentBuilder.WriteString(word + " ")
}
}
content := strings.TrimSpace(contentBuilder.String())
return content, metadata
}
func addMetadataToSearchQuery(input, key, value string) string {
content, metadata := parseSearchQueryStr(input)
metadata[key] = value
var resultBuilder strings.Builder
resultBuilder.WriteString(content)
for k, v := range metadata {
resultBuilder.WriteString(" ")
resultBuilder.WriteString(k)
resultBuilder.WriteString(":")
resultBuilder.WriteString(v)
}
return strings.TrimSpace(resultBuilder.String())
}
type Argon2ID struct {
format string
version int