Complete loading page to pdf and base API

This commit is contained in:
2023-03-27 22:09:54 +03:00
parent 92469fa3a2
commit 91d8f676ae
24 changed files with 864 additions and 95 deletions

View File

@@ -29,8 +29,9 @@ func (P *PDF) Process(_ context.Context, url string) ([]entity.File, error) {
gen.Title.Set(url)
page := wkhtmltopdf.NewPage(url)
page.PrintMediaType.Set(true)
page.JavascriptDelay.Set(200)
page.LoadMediaErrorHandling.Set("abort")
page.LoadMediaErrorHandling.Set("ignore")
page.FooterRight.Set("[page]")
page.HeaderLeft.Set(url)
page.HeaderRight.Set(time.Now().Format(time.DateOnly))

View File

@@ -69,7 +69,7 @@ func (p *Processors) Process(ctx context.Context, format entity.Format, url stri
proc, ok := p.processors[format]
if !ok {
result.Err = fmt.Errorf("no processor registered for format %v", format)
result.Err = fmt.Errorf("no processor registered")
return result
}
@@ -86,7 +86,7 @@ func (p *Processors) Process(ctx context.Context, format entity.Format, url stri
return result
}
func (p *Processors) Override(format entity.Format, proc processor) error {
func (p *Processors) OverrideProcessor(format entity.Format, proc processor) error {
p.processors[format] = proc
return nil

Binary file not shown.

View File

@@ -1,40 +0,0 @@
package badger
import (
"context"
"fmt"
"github.com/dgraph-io/badger/v4"
"github.com/derfenix/webarchive/entity"
)
func NewFile(db *badger.DB) *File {
return &File{db: db, prefix: []byte("file:")}
}
type File struct {
db *badger.DB
prefix []byte
}
func (f *File) SaveTx(_ context.Context, txn *badger.Txn, file *entity.File) error {
if f.db.IsClosed() {
return ErrDBClosed
}
marshaled, err := marshal(file)
if err != nil {
return fmt.Errorf("marshal data: %w", err)
}
if err := txn.Set(f.key(file), marshaled); err != nil {
return fmt.Errorf("put data: %w", err)
}
return nil
}
func (f *File) key(file *entity.File) []byte {
return append(f.prefix, []byte(file.ID.String())...)
}

View File

@@ -11,21 +11,60 @@ import (
"github.com/derfenix/webarchive/entity"
)
func NewPage(db *badger.DB, file *File) (*Page, error) {
func NewPage(db *badger.DB) (*Page, error) {
return &Page{
db: db,
prefix: []byte("page:"),
file: file,
}, nil
}
type Page struct {
db *badger.DB
prefix []byte
file *File
}
func (p *Page) Save(ctx context.Context, site *entity.Page) error {
func (p *Page) GetFile(_ context.Context, pageID, fileID uuid.UUID) (*entity.File, error) {
page := entity.Page{ID: pageID}
var file *entity.File
err := p.db.View(func(txn *badger.Txn) error {
data, err := txn.Get(p.key(&page))
if err != nil {
return fmt.Errorf("get data: %w", err)
}
err = data.Value(func(val []byte) error {
if err := unmarshal(val, &page); err != nil {
return fmt.Errorf("unmarshal data: %w", err)
}
return nil
})
if err != nil {
return fmt.Errorf("get value: %w", err)
}
for i := range page.Results.Results() {
for j := range page.Results.Results()[i].Files {
ff := &page.Results.Results()[i].Files[j]
if ff.ID == fileID {
file = ff
}
}
}
return nil
})
if err != nil {
return nil, fmt.Errorf("view: %w", err)
}
return file, nil
}
func (p *Page) Save(_ context.Context, site *entity.Page) error {
if p.db.IsClosed() {
return ErrDBClosed
}
@@ -40,14 +79,6 @@ func (p *Page) Save(ctx context.Context, site *entity.Page) error {
return fmt.Errorf("put data: %w", err)
}
for i, result := range site.Results.Results() {
for j, file := range result.Files {
if err := p.file.SaveTx(ctx, txn, &file); err != nil {
return fmt.Errorf("save file %d (%s) for result %d: %w", j, file.ID.String(), i, err)
}
}
}
return nil
}); err != nil {
return fmt.Errorf("update db: %w", err)