Improved single_file processor, refactoring

Reduce inlined image size, get page metadata before save and put into processing queue
This commit is contained in:
2023-11-24 14:25:17 +03:00
parent 7e53519ca0
commit 870f13f7bf
14 changed files with 325 additions and 96 deletions

View File

@@ -3,6 +3,7 @@ package entity
import (
"context"
"fmt"
"runtime/debug"
"sync"
"time"
@@ -80,16 +81,18 @@ func (p *Page) SetProcessing() {
p.Status = StatusProcessing
}
func (p *Page) Process(ctx context.Context, processor Processor) {
innerWG := sync.WaitGroup{}
innerWG.Add(len(p.Formats))
func (p *Page) Prepare(ctx context.Context, processor Processor) {
meta, err := processor.GetMeta(ctx, p.URL, p.cache)
if err != nil {
p.Meta.Error = err.Error()
} else {
p.Meta = meta
}
}
func (p *Page) Process(ctx context.Context, processor Processor) {
innerWG := sync.WaitGroup{}
innerWG.Add(len(p.Formats))
results := Results{}
@@ -99,7 +102,7 @@ func (p *Page) Process(ctx context.Context, processor Processor) {
defer func() {
if err := recover(); err != nil {
results.Add(Result{Format: format, Err: fmt.Errorf("recovered from panic: %v", err)})
results.Add(Result{Format: format, Err: fmt.Errorf("recovered from panic: %v (%s)", err, string(debug.Stack()))})
}
}()

View File

@@ -66,6 +66,16 @@ func (w *Worker) Start(ctx context.Context, wg *sync.WaitGroup) {
func (w *Worker) do(ctx context.Context, wg *sync.WaitGroup, page *Page, log *zap.Logger) {
defer wg.Done()
page.SetProcessing()
if err := w.pages.Save(ctx, page); err != nil {
w.log.Error(
"failed to save processing page",
zap.String("page_id", page.ID.String()),
zap.String("page_url", page.URL),
zap.Error(err),
)
}
page.Process(ctx, w.processor)
log.Debug("page processed")