Initial commit

This commit is contained in:
2023-03-26 16:11:00 +03:00
commit 92469fa3a2
47 changed files with 5610 additions and 0 deletions

View File

@@ -0,0 +1,128 @@
package badger
import (
"errors"
"fmt"
"os"
"path"
"github.com/dgraph-io/badger/v4"
"github.com/dgraph-io/badger/v4/options"
"go.uber.org/zap"
)
const (
backupStartPath = "backup_start.db"
backupStopPath = "backup_stop.db"
)
type BackupType uint8
const (
BackupStart BackupType = iota
BackupStop
)
var ErrDBClosed = fmt.Errorf("database is closed")
type logger struct {
*zap.SugaredLogger
}
func (l *logger) Warningf(s string, i ...interface{}) {
l.SugaredLogger.Warnf(s, i...)
}
func NewBadger(dir string, log *zap.Logger) (*badger.DB, error) {
opts := badger.DefaultOptions(dir)
opts.Logger = &logger{SugaredLogger: log.Sugar()}
opts.Compression = options.ZSTD
opts.ZSTDCompressionLevel = 6
db, err := badger.Open(opts)
if err != nil {
return nil, fmt.Errorf("open database: %w", err)
}
if err := Backup(db, BackupStart); err != nil {
log.Error("backup on start failed", zap.Error(err))
}
return db, nil
}
func Backup(db *badger.DB, bt BackupType) error {
dir := db.Opts().Dir
var backupPath string
switch bt {
case BackupStart:
backupPath = path.Join(dir, backupStartPath)
case BackupStop:
backupPath = path.Join(dir, backupStopPath)
}
file, err := os.OpenFile(backupPath, os.O_CREATE|os.O_WRONLY, os.FileMode(0600))
if err != nil {
return fmt.Errorf("open backup file %s: %w", backupPath, err)
}
defer func() {
_ = file.Close()
}()
_, err = db.Backup(file, 0)
if err != nil {
return fmt.Errorf("backup: %w", err)
}
return nil
}
func Restore(db *badger.DB) error {
dir := db.Opts().Dir
backupPathStart := path.Join(dir, backupStartPath)
backupPathStop := path.Join(dir, backupStopPath)
startStat, err := os.Stat(backupPathStart)
if err != nil && !errors.Is(err, os.ErrNotExist) {
return fmt.Errorf("stat file %s: %w", backupPathStart, err)
}
stopStat, err := os.Stat(backupPathStop)
if err != nil && !errors.Is(err, os.ErrNotExist) {
return fmt.Errorf("stat file %s: %w", backupPathStop, err)
}
var backupFile string
switch {
case stopStat != nil && startStat != nil:
if stopStat.ModTime().After(startStat.ModTime()) {
backupFile = backupPathStop
} else {
backupFile = backupPathStart
}
case stopStat != nil:
backupFile = backupPathStart
case startStat != nil:
backupFile = backupPathStop
}
file, err := os.OpenFile(backupFile, os.O_RDONLY, os.FileMode(0600))
if err != nil {
return fmt.Errorf("open backup file %s: %w", backupFile, err)
}
defer func() {
_ = file.Close()
}()
if err := db.Load(file, 20); err != nil {
return fmt.Errorf("load backup: %w", err)
}
return nil
}

View File

@@ -0,0 +1,40 @@
package badger
import (
"context"
"fmt"
"github.com/dgraph-io/badger/v4"
"github.com/derfenix/webarchive/entity"
)
func NewFile(db *badger.DB) *File {
return &File{db: db, prefix: []byte("file:")}
}
type File struct {
db *badger.DB
prefix []byte
}
func (f *File) SaveTx(_ context.Context, txn *badger.Txn, file *entity.File) error {
if f.db.IsClosed() {
return ErrDBClosed
}
marshaled, err := marshal(file)
if err != nil {
return fmt.Errorf("marshal data: %w", err)
}
if err := txn.Set(f.key(file), marshaled); err != nil {
return fmt.Errorf("put data: %w", err)
}
return nil
}
func (f *File) key(file *entity.File) []byte {
return append(f.prefix, []byte(file.ID.String())...)
}

View File

@@ -0,0 +1,13 @@
package badger
import (
"github.com/vmihailenco/msgpack/v5"
)
func marshal(v interface{}) ([]byte, error) {
return msgpack.Marshal(v)
}
func unmarshal(b []byte, v interface{}) error {
return msgpack.Unmarshal(b, v)
}

View File

@@ -0,0 +1,142 @@
package badger
import (
"context"
"fmt"
"sort"
"github.com/dgraph-io/badger/v4"
"github.com/google/uuid"
"github.com/derfenix/webarchive/entity"
)
func NewPage(db *badger.DB, file *File) (*Page, error) {
return &Page{
db: db,
prefix: []byte("page:"),
file: file,
}, nil
}
type Page struct {
db *badger.DB
prefix []byte
file *File
}
func (p *Page) Save(ctx context.Context, site *entity.Page) error {
if p.db.IsClosed() {
return ErrDBClosed
}
marshaled, err := marshal(site)
if err != nil {
return fmt.Errorf("marshal data: %w", err)
}
if err := p.db.Update(func(txn *badger.Txn) error {
if err := txn.Set(p.key(site), marshaled); err != nil {
return fmt.Errorf("put data: %w", err)
}
for i, result := range site.Results.Results() {
for j, file := range result.Files {
if err := p.file.SaveTx(ctx, txn, &file); err != nil {
return fmt.Errorf("save file %d (%s) for result %d: %w", j, file.ID.String(), i, err)
}
}
}
return nil
}); err != nil {
return fmt.Errorf("update db: %w", err)
}
return nil
}
func (p *Page) Get(_ context.Context, id uuid.UUID) (*entity.Page, error) {
site := entity.Page{ID: id}
err := p.db.View(func(txn *badger.Txn) error {
data, err := txn.Get(p.key(&site))
if err != nil {
return fmt.Errorf("get data: %w", err)
}
err = data.Value(func(val []byte) error {
if err := unmarshal(val, &site); err != nil {
return fmt.Errorf("unmarshal data: %w", err)
}
return nil
})
if err != nil {
return fmt.Errorf("get value: %w", err)
}
return nil
})
if err != nil {
return nil, fmt.Errorf("view: %w", err)
}
return &site, nil
}
func (p *Page) ListAll(ctx context.Context) ([]*entity.Page, error) {
pages := make([]*entity.Page, 0, 100)
err := p.db.View(func(txn *badger.Txn) error {
iterator := txn.NewIterator(badger.DefaultIteratorOptions)
defer iterator.Close()
for iterator.Seek(p.prefix); iterator.ValidForPrefix(p.prefix); iterator.Next() {
if err := ctx.Err(); err != nil {
return fmt.Errorf("context canceled: %w", err)
}
var page entity.Page
err := iterator.Item().Value(func(val []byte) error {
if err := unmarshal(val, &page); err != nil {
return fmt.Errorf("unmarshal: %w", err)
}
return nil
})
if err != nil {
return fmt.Errorf("get item: %w", err)
}
pages = append(pages, &entity.Page{
ID: page.ID,
URL: page.URL,
Description: page.Description,
Created: page.Created,
Formats: page.Formats,
Version: page.Version,
Status: page.Status,
})
}
return nil
})
if err != nil {
return nil, fmt.Errorf("view: %w", err)
}
sort.Slice(pages, func(i, j int) bool {
return pages[i].Created.After(pages[j].Created)
})
return pages, nil
}
func (p *Page) key(site *entity.Page) []byte {
return append(p.prefix, []byte(site.ID.String())...)
}

View File

@@ -0,0 +1,60 @@
package badger
import (
"context"
"os"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/zap/zaptest"
"github.com/derfenix/webarchive/entity"
)
func TestSite(t *testing.T) {
t.Parallel()
if testing.Short() {
t.Skip("skip db test")
}
ctx := context.Background()
tempDir, err := os.MkdirTemp(os.TempDir(), "badger_test")
require.NoError(t, err)
t.Cleanup(func() {
assert.NoError(t, os.RemoveAll(tempDir))
})
log := zaptest.NewLogger(t)
db, err := NewBadger(tempDir, log.Named("db"))
require.NoError(t, err)
siteRepo, err := NewPage(db, nil)
require.NoError(t, err)
t.Run("base path", func(t *testing.T) {
t.Parallel()
site := entity.NewPage("https://google.com", "Save all google", entity.FormatPDF, entity.FormatSingleFile)
site.Created = site.Created.Truncate(time.Microsecond)
err := siteRepo.Save(ctx, site)
require.NoError(t, err)
storedSite, err := siteRepo.Get(ctx, site.ID)
require.NoError(t, err)
assert.Equal(t, site, storedSite)
all, err := siteRepo.ListAll(ctx)
require.NoError(t, err)
require.Len(t, all, 1)
assert.Equal(t, site, all[0])
})
}