mirror of
https://github.com/derfenix/webarchive.git
synced 2026-03-11 12:41:54 +03:00
Improved single_file processor, refactoring
Reduce inlined image size, get page metadata before save and put into processing queue
This commit is contained in:
@@ -3,6 +3,7 @@ package entity
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"runtime/debug"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -80,16 +81,18 @@ func (p *Page) SetProcessing() {
|
||||
p.Status = StatusProcessing
|
||||
}
|
||||
|
||||
func (p *Page) Process(ctx context.Context, processor Processor) {
|
||||
innerWG := sync.WaitGroup{}
|
||||
innerWG.Add(len(p.Formats))
|
||||
|
||||
func (p *Page) Prepare(ctx context.Context, processor Processor) {
|
||||
meta, err := processor.GetMeta(ctx, p.URL, p.cache)
|
||||
if err != nil {
|
||||
p.Meta.Error = err.Error()
|
||||
} else {
|
||||
p.Meta = meta
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Page) Process(ctx context.Context, processor Processor) {
|
||||
innerWG := sync.WaitGroup{}
|
||||
innerWG.Add(len(p.Formats))
|
||||
|
||||
results := Results{}
|
||||
|
||||
@@ -99,7 +102,7 @@ func (p *Page) Process(ctx context.Context, processor Processor) {
|
||||
|
||||
defer func() {
|
||||
if err := recover(); err != nil {
|
||||
results.Add(Result{Format: format, Err: fmt.Errorf("recovered from panic: %v", err)})
|
||||
results.Add(Result{Format: format, Err: fmt.Errorf("recovered from panic: %v (%s)", err, string(debug.Stack()))})
|
||||
}
|
||||
}()
|
||||
|
||||
|
||||
@@ -66,6 +66,16 @@ func (w *Worker) Start(ctx context.Context, wg *sync.WaitGroup) {
|
||||
func (w *Worker) do(ctx context.Context, wg *sync.WaitGroup, page *Page, log *zap.Logger) {
|
||||
defer wg.Done()
|
||||
|
||||
page.SetProcessing()
|
||||
if err := w.pages.Save(ctx, page); err != nil {
|
||||
w.log.Error(
|
||||
"failed to save processing page",
|
||||
zap.String("page_id", page.ID.String()),
|
||||
zap.String("page_url", page.URL),
|
||||
zap.Error(err),
|
||||
)
|
||||
}
|
||||
|
||||
page.Process(ctx, w.processor)
|
||||
|
||||
log.Debug("page processed")
|
||||
|
||||
Reference in New Issue
Block a user