Improved single_file processor, refactoring

Reduce inlined image size, get page metadata before save and put into processing queue
This commit is contained in:
2023-11-24 14:25:17 +03:00
parent 7e53519ca0
commit 870f13f7bf
14 changed files with 325 additions and 96 deletions

2
.gitignore vendored
View File

@@ -43,3 +43,5 @@ fabric.properties
go.work go.work
test.http test.http
db db
http-client.env.json
http-client.private.env.json

6
.idea/swagger-settings.xml generated Normal file
View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="SwaggerSettings">
<option name="defaultPreviewType" value="SWAGGER_UI" />
</component>
</project>

View File

@@ -0,0 +1,255 @@
package internal
import (
"bytes"
"context"
"encoding/base64"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"github.com/disintegration/imaging"
"github.com/gabriel-vasile/mimetype"
"go.uber.org/zap"
"golang.org/x/net/html"
)
type MediaInline struct {
log *zap.Logger
getter func(context.Context, string) (*http.Response, error)
}
func NewMediaInline(log *zap.Logger, getter func(context.Context, string) (*http.Response, error)) *MediaInline {
return &MediaInline{log: log, getter: getter}
}
func (m *MediaInline) Inline(ctx context.Context, reader io.Reader, pageURL string) (*html.Node, error) {
htmlNode, err := html.Parse(reader)
if err != nil {
return nil, fmt.Errorf("parse response body: %w", err)
}
baseURL, err := url.Parse(pageURL)
if err != nil {
return nil, fmt.Errorf("parse page url: %w", err)
}
m.visit(ctx, htmlNode, m.processorFunc, baseURL)
return htmlNode, nil
}
func (m *MediaInline) processorFunc(ctx context.Context, node *html.Node, baseURL *url.URL) error {
switch node.Data {
case "link":
if err := m.processHref(ctx, node.Attr, baseURL); err != nil {
return fmt.Errorf("process link %s: %w", node.Attr, err)
}
case "script", "img":
if err := m.processSrc(ctx, node.Attr, baseURL); err != nil {
return fmt.Errorf("process script %s: %w", node.Attr, err)
}
case "a":
if err := m.processAHref(node.Attr, baseURL); err != nil {
return fmt.Errorf("process a href %s: %w", node.Attr, err)
}
}
return nil
}
func (m *MediaInline) processAHref(attrs []html.Attribute, baseURL *url.URL) error {
for idx, attr := range attrs {
switch attr.Key {
case "href":
attrs[idx].Val = normalizeURL(attr.Val, baseURL)
}
}
return nil
}
func (m *MediaInline) processHref(ctx context.Context, attrs []html.Attribute, baseURL *url.URL) error {
var shouldProcess bool
var value string
var valueIdx int
for idx, attr := range attrs {
switch attr.Key {
case "rel":
switch attr.Val {
case "stylesheet", "icon", "alternate icon", "shortcut icon", "manifest":
shouldProcess = true
}
case "href":
value = attr.Val
valueIdx = idx
}
}
if !shouldProcess {
return nil
}
encodedValue, err := m.loadAndEncode(ctx, baseURL, value)
if err != nil {
return err
}
attrs[valueIdx].Val = encodedValue
return nil
}
func (m *MediaInline) processSrc(ctx context.Context, attrs []html.Attribute, baseURL *url.URL) error {
var shouldProcess bool
var value string
var valueIdx int
for idx, attr := range attrs {
switch attr.Key {
case "src":
value = attr.Val
valueIdx = idx
shouldProcess = true
case "data-src":
value = attr.Val
}
}
if !shouldProcess {
return nil
}
encodedValue, err := m.loadAndEncode(ctx, baseURL, value)
if err != nil {
return err
}
attrs[valueIdx].Val = encodedValue
return nil
}
func (m *MediaInline) loadAndEncode(ctx context.Context, baseURL *url.URL, value string) (string, error) {
mime := "text/plain"
if value == "" {
return "", nil
}
normalizedURL := normalizeURL(value, baseURL)
if normalizedURL == "" {
return value, nil
}
response, err := m.getter(ctx, normalizedURL)
if err != nil {
m.log.Sugar().With(zap.Error(err)).Errorf("load %s", normalizedURL)
return value, nil
}
defer func() {
_ = response.Body.Close()
}()
cleanMime := func(s string) string {
s, _, _ = strings.Cut(s, "+")
return s
}
if ct := response.Header.Get("Content-Type"); ct != "" {
mime = ct
}
encodedVal, err := m.encodeResource(response.Body, &mime)
if err != nil {
return value, fmt.Errorf("encode resource: %w", err)
}
return fmt.Sprintf("data:%s;base64, %s", cleanMime(mime), encodedVal), nil
}
func (m *MediaInline) visit(ctx context.Context, n *html.Node, proc func(context.Context, *html.Node, *url.URL) error, baseURL *url.URL) {
if err := proc(ctx, n, baseURL); err != nil {
m.log.Error("process error", zap.Error(err))
}
if n.FirstChild != nil {
m.visit(ctx, n.FirstChild, proc, baseURL)
}
if n.NextSibling != nil {
m.visit(ctx, n.NextSibling, proc, baseURL)
}
}
func normalizeURL(resourceURL string, base *url.URL) string {
if strings.HasPrefix(resourceURL, "//") {
return "https:" + resourceURL
}
if strings.HasPrefix(resourceURL, "about:") {
return ""
}
parsedResourceURL, err := url.Parse(resourceURL)
if err != nil {
return resourceURL
}
reference := base.ResolveReference(parsedResourceURL)
return reference.String()
}
func (m *MediaInline) encodeResource(r io.Reader, mime *string) (string, error) {
all, err := io.ReadAll(r)
if err != nil {
return "", fmt.Errorf("read data: %w", err)
}
all, err = m.preprocessResource(all, mime)
if err != nil {
return "", fmt.Errorf("preprocess resource: %w", err)
}
return base64.StdEncoding.EncodeToString(all), nil
}
func (m *MediaInline) preprocessResource(data []byte, mime *string) ([]byte, error) {
detectedMime := mimetype.Detect(data)
switch {
case strings.HasPrefix(detectedMime.String(), "image"):
decodedImage, err := imaging.Decode(bytes.NewBuffer(data))
if err != nil {
m.log.Error("failed to decode image", zap.Error(err))
return data, nil
}
if size := decodedImage.Bounds().Size(); size.X > 1024 || size.Y > 1024 {
thumbnail := imaging.Thumbnail(decodedImage, 1024, 1024, imaging.Lanczos)
buf := bytes.NewBuffer(nil)
if err := imaging.Encode(buf, thumbnail, imaging.JPEG, imaging.JPEGQuality(90)); err != nil {
m.log.Error("failed to create resized image", zap.Error(err))
return data, nil
}
*mime = "image/jpeg"
m.log.Info("Resized")
return buf.Bytes(), nil
}
}
return data, nil
}

View File

@@ -40,9 +40,9 @@ func (p *PDF) Process(_ context.Context, url string, cache *entity.Cache) ([]ent
opts := wkhtmltopdf.NewPageOptions() opts := wkhtmltopdf.NewPageOptions()
opts.PrintMediaType.Set(p.cfg.MediaPrint) opts.PrintMediaType.Set(p.cfg.MediaPrint)
opts.JavascriptDelay.Set(200) opts.JavascriptDelay.Set(200)
opts.DisableJavascript.Set(true) opts.DisableJavascript.Set(false)
opts.LoadErrorHandling.Set("ignore") opts.LoadErrorHandling.Set("ignore")
opts.LoadMediaErrorHandling.Set("ignore") opts.LoadMediaErrorHandling.Set("skip")
opts.FooterRight.Set("[opts]") opts.FooterRight.Set("[opts]")
opts.HeaderLeft.Set(url) opts.HeaderLeft.Set(url)
opts.HeaderRight.Set(time.Now().Format(time.DateOnly)) opts.HeaderRight.Set(time.Now().Format(time.DateOnly))
@@ -50,9 +50,9 @@ func (p *PDF) Process(_ context.Context, url string, cache *entity.Cache) ([]ent
opts.Zoom.Set(p.cfg.Zoom) opts.Zoom.Set(p.cfg.Zoom)
opts.ViewportSize.Set(p.cfg.Viewport) opts.ViewportSize.Set(p.cfg.Viewport)
opts.NoBackground.Set(true) opts.NoBackground.Set(true)
opts.DisableLocalFileAccess.Set(true) opts.DisableLocalFileAccess.Set(false)
opts.DisableExternalLinks.Set(true) opts.DisableExternalLinks.Set(false)
opts.DisableInternalLinks.Set(true) opts.DisableInternalLinks.Set(false)
var page wkhtmltopdf.PageProvider var page wkhtmltopdf.PageProvider
if len(cache.Get()) > 0 { if len(cache.Get()) > 0 {

View File

@@ -10,6 +10,7 @@ import (
"strings" "strings"
"time" "time"
"go.uber.org/zap"
"golang.org/x/net/html" "golang.org/x/net/html"
"github.com/derfenix/webarchive/config" "github.com/derfenix/webarchive/config"
@@ -22,7 +23,7 @@ type processor interface {
Process(ctx context.Context, url string, cache *entity.Cache) ([]entity.File, error) Process(ctx context.Context, url string, cache *entity.Cache) ([]entity.File, error)
} }
func NewProcessors(cfg config.Config) (*Processors, error) { func NewProcessors(cfg config.Config, log *zap.Logger) (*Processors, error) {
jar, err := cookiejar.New(&cookiejar.Options{ jar, err := cookiejar.New(&cookiejar.Options{
PublicSuffixList: nil, PublicSuffixList: nil,
}) })
@@ -62,7 +63,7 @@ func NewProcessors(cfg config.Config) (*Processors, error) {
processors: map[entity.Format]processor{ processors: map[entity.Format]processor{
entity.FormatHeaders: NewHeaders(httpClient), entity.FormatHeaders: NewHeaders(httpClient),
entity.FormatPDF: NewPDF(cfg.PDF), entity.FormatPDF: NewPDF(cfg.PDF),
entity.FormatSingleFile: NewSingleFile(httpClient), entity.FormatSingleFile: NewSingleFile(httpClient, log),
}, },
} }

View File

@@ -6,6 +6,7 @@ import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"go.uber.org/zap/zaptest"
"github.com/derfenix/webarchive/config" "github.com/derfenix/webarchive/config"
"github.com/derfenix/webarchive/entity" "github.com/derfenix/webarchive/entity"
@@ -18,7 +19,7 @@ func TestProcessors_GetMeta(t *testing.T) {
cfg, err := config.NewConfig(ctx) cfg, err := config.NewConfig(ctx)
require.NoError(t, err) require.NoError(t, err)
procs, err := NewProcessors(cfg) procs, err := NewProcessors(cfg, zaptest.NewLogger(t))
require.NoError(t, err) require.NoError(t, err)
cache := entity.NewCache() cache := entity.NewCache()

View File

@@ -5,50 +5,46 @@ import (
"context" "context"
"fmt" "fmt"
"net/http" "net/http"
"net/url"
"go.uber.org/zap"
"golang.org/x/net/html" "golang.org/x/net/html"
"github.com/derfenix/webarchive/adapters/processors/internal"
"github.com/derfenix/webarchive/entity" "github.com/derfenix/webarchive/entity"
) )
func NewSingleFile(client *http.Client) *SingleFile { func NewSingleFile(client *http.Client, log *zap.Logger) *SingleFile {
return &SingleFile{client: client} return &SingleFile{client: client, log: log}
} }
type SingleFile struct { type SingleFile struct {
client *http.Client client *http.Client
log *zap.Logger
} }
func (s *SingleFile) Process(ctx context.Context, url string, cache *entity.Cache) ([]entity.File, error) { func (s *SingleFile) Process(ctx context.Context, pageURL string, cache *entity.Cache) ([]entity.File, error) {
reader := cache.Reader() reader := cache.Reader()
if reader == nil { if reader == nil {
response, err := s.get(ctx, url) response, err := s.get(ctx, pageURL)
if err != nil { if err != nil {
return nil, err return nil, err
} }
if response.Body != nil { defer func() {
defer func() { _ = response.Body.Close()
_ = response.Body.Close() }()
}()
}
reader = response.Body reader = response.Body
} }
htmlNode, err := html.Parse(reader) inlinedHTML, err := internal.NewMediaInline(s.log, s.get).Inline(ctx, reader, pageURL)
if err != nil { if err != nil {
return nil, fmt.Errorf("parse response body: %w", err) return nil, fmt.Errorf("inline media: %w", err)
}
if err := s.process(ctx, htmlNode, url); err != nil {
return nil, fmt.Errorf("process: %w", err)
} }
buf := bytes.NewBuffer(nil) buf := bytes.NewBuffer(nil)
if err := html.Render(buf, htmlNode); err != nil { if err := html.Render(buf, inlinedHTML); err != nil {
return nil, fmt.Errorf("render result html: %w", err) return nil, fmt.Errorf("render result html: %w", err)
} }
@@ -78,59 +74,3 @@ func (s *SingleFile) get(ctx context.Context, url string) (*http.Response, error
return response, nil return response, nil
} }
func (s *SingleFile) process(ctx context.Context, node *html.Node, pageURL string) error {
parsedURL, err := url.Parse(pageURL)
if err != nil {
return fmt.Errorf("parse page url: %w", err)
}
baseURL := fmt.Sprintf("%s://%s", parsedURL.Scheme, parsedURL.Host)
for child := node.FirstChild; child != nil; child = child.NextSibling {
var err error
switch child.Data {
case "head":
err = s.processHead(ctx, child, baseURL)
case "body":
err = s.processBody(ctx, child, baseURL)
}
if err != nil {
return err
}
}
return nil
}
func (s *SingleFile) processHead(ctx context.Context, node *html.Node, baseURL string) error {
for child := node.FirstChild; child != nil; child = child.NextSibling {
switch child.Data {
case "link":
if err := s.processHref(ctx, child.Attr, baseURL); err != nil {
return fmt.Errorf("process link %s: %w", child.Attr, err)
}
case "script":
if err := s.processSrc(ctx, child.Attr, baseURL); err != nil {
return fmt.Errorf("process script %s: %w", child.Attr, err)
}
}
}
return nil
}
func (s *SingleFile) processBody(ctx context.Context, child *html.Node, url string) error {
return nil
}
func (s *SingleFile) processHref(ctx context.Context, attrs []html.Attribute, baseURL string) error {
return nil
}
func (s *SingleFile) processSrc(ctx context.Context, attrs []html.Attribute, baseURL string) error {
return nil
}

View File

@@ -192,7 +192,7 @@ func (p *Page) ListUnprocessed(ctx context.Context) ([]entity.Page, error) {
return fmt.Errorf("get item: %w", err) return fmt.Errorf("get item: %w", err)
} }
if page.Status == entity.StatusNew { if page.Status == entity.StatusNew || page.Status == entity.StatusProcessing {
//goland:noinspection GoVetCopyLock //goland:noinspection GoVetCopyLock
pages = append(pages, page) //nolint:govet // didn't touch the lock here pages = append(pages, page) //nolint:govet // didn't touch the lock here
} }

View File

@@ -10,13 +10,14 @@ import (
"sync" "sync"
"time" "time"
"github.com/derfenix/webarchive/adapters/repository"
"github.com/dgraph-io/badger/v4" "github.com/dgraph-io/badger/v4"
"github.com/ogen-go/ogen/middleware" "github.com/ogen-go/ogen/middleware"
"go.uber.org/multierr" "go.uber.org/multierr"
"go.uber.org/zap" "go.uber.org/zap"
"go.uber.org/zap/zapcore" "go.uber.org/zap/zapcore"
"github.com/derfenix/webarchive/adapters/repository"
"github.com/derfenix/webarchive/adapters/processors" "github.com/derfenix/webarchive/adapters/processors"
badgerRepo "github.com/derfenix/webarchive/adapters/repository/badger" badgerRepo "github.com/derfenix/webarchive/adapters/repository/badger"
"github.com/derfenix/webarchive/api/openapi" "github.com/derfenix/webarchive/api/openapi"
@@ -41,7 +42,7 @@ func NewApplication(cfg config.Config) (Application, error) {
return Application{}, fmt.Errorf("new page repo: %w", err) return Application{}, fmt.Errorf("new page repo: %w", err)
} }
processor, err := processors.NewProcessors(cfg) processor, err := processors.NewProcessors(cfg, log.Named("processor"))
if err != nil { if err != nil {
return Application{}, fmt.Errorf("new processors: %w", err) return Application{}, fmt.Errorf("new processors: %w", err)
} }
@@ -50,7 +51,7 @@ func NewApplication(cfg config.Config) (Application, error) {
worker := entity.NewWorker(workerCh, pageRepo, processor, log.Named("worker")) worker := entity.NewWorker(workerCh, pageRepo, processor, log.Named("worker"))
server, err := openapi.NewServer( server, err := openapi.NewServer(
rest.NewService(pageRepo, workerCh), rest.NewService(pageRepo, workerCh, processor),
openapi.WithPathPrefix("/api/v1"), openapi.WithPathPrefix("/api/v1"),
openapi.WithMiddleware( openapi.WithMiddleware(
func(r middleware.Request, next middleware.Next) (middleware.Response, error) { func(r middleware.Request, next middleware.Next) (middleware.Response, error) {
@@ -190,6 +191,7 @@ func newLogger(cfg config.Logging) (*zap.Logger, error) {
logCfg.EncoderConfig.EncodeTime = zapcore.RFC3339TimeEncoder logCfg.EncoderConfig.EncodeTime = zapcore.RFC3339TimeEncoder
logCfg.EncoderConfig.EncodeDuration = zapcore.NanosDurationEncoder logCfg.EncoderConfig.EncodeDuration = zapcore.NanosDurationEncoder
logCfg.DisableCaller = true logCfg.DisableCaller = true
logCfg.DisableStacktrace = true
logCfg.Level = zap.NewAtomicLevelAt(zapcore.InfoLevel) logCfg.Level = zap.NewAtomicLevelAt(zapcore.InfoLevel)
if cfg.Debug { if cfg.Debug {

View File

@@ -3,6 +3,7 @@ package entity
import ( import (
"context" "context"
"fmt" "fmt"
"runtime/debug"
"sync" "sync"
"time" "time"
@@ -80,16 +81,18 @@ func (p *Page) SetProcessing() {
p.Status = StatusProcessing p.Status = StatusProcessing
} }
func (p *Page) Process(ctx context.Context, processor Processor) { func (p *Page) Prepare(ctx context.Context, processor Processor) {
innerWG := sync.WaitGroup{}
innerWG.Add(len(p.Formats))
meta, err := processor.GetMeta(ctx, p.URL, p.cache) meta, err := processor.GetMeta(ctx, p.URL, p.cache)
if err != nil { if err != nil {
p.Meta.Error = err.Error() p.Meta.Error = err.Error()
} else { } else {
p.Meta = meta p.Meta = meta
} }
}
func (p *Page) Process(ctx context.Context, processor Processor) {
innerWG := sync.WaitGroup{}
innerWG.Add(len(p.Formats))
results := Results{} results := Results{}
@@ -99,7 +102,7 @@ func (p *Page) Process(ctx context.Context, processor Processor) {
defer func() { defer func() {
if err := recover(); err != nil { if err := recover(); err != nil {
results.Add(Result{Format: format, Err: fmt.Errorf("recovered from panic: %v", err)}) results.Add(Result{Format: format, Err: fmt.Errorf("recovered from panic: %v (%s)", err, string(debug.Stack()))})
} }
}() }()

View File

@@ -66,6 +66,16 @@ func (w *Worker) Start(ctx context.Context, wg *sync.WaitGroup) {
func (w *Worker) do(ctx context.Context, wg *sync.WaitGroup, page *Page, log *zap.Logger) { func (w *Worker) do(ctx context.Context, wg *sync.WaitGroup, page *Page, log *zap.Logger) {
defer wg.Done() defer wg.Done()
page.SetProcessing()
if err := w.pages.Save(ctx, page); err != nil {
w.log.Error(
"failed to save processing page",
zap.String("page_id", page.ID.String()),
zap.String("page_url", page.URL),
zap.Error(err),
)
}
page.Process(ctx, w.processor) page.Process(ctx, w.processor)
log.Debug("page processed") log.Debug("page processed")

2
go.mod
View File

@@ -27,6 +27,7 @@ require (
github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dgraph-io/ristretto v0.1.1 // indirect github.com/dgraph-io/ristretto v0.1.1 // indirect
github.com/disintegration/imaging v1.6.2 // indirect
github.com/dlclark/regexp2 v1.10.0 // indirect github.com/dlclark/regexp2 v1.10.0 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect github.com/dustin/go-humanize v1.0.1 // indirect
github.com/fatih/color v1.15.0 // indirect github.com/fatih/color v1.15.0 // indirect
@@ -58,6 +59,7 @@ require (
go.opencensus.io v0.24.0 // indirect go.opencensus.io v0.24.0 // indirect
golang.org/x/crypto v0.14.0 // indirect golang.org/x/crypto v0.14.0 // indirect
golang.org/x/exp v0.0.0-20230725093048-515e97ebf090 // indirect golang.org/x/exp v0.0.0-20230725093048-515e97ebf090 // indirect
golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8 // indirect
golang.org/x/sync v0.4.0 // indirect golang.org/x/sync v0.4.0 // indirect
golang.org/x/sys v0.13.0 // indirect golang.org/x/sys v0.13.0 // indirect
golang.org/x/text v0.13.0 // indirect golang.org/x/text v0.13.0 // indirect

4
go.sum
View File

@@ -21,6 +21,8 @@ github.com/dgraph-io/ristretto v0.1.1 h1:6CWw5tJNgpegArSHpNHJKldNeq03FQCwYvfMVWa
github.com/dgraph-io/ristretto v0.1.1/go.mod h1:S1GPSBCYCIhmVNfcth17y2zZtQT6wzkzgwUve0VDWWA= github.com/dgraph-io/ristretto v0.1.1/go.mod h1:S1GPSBCYCIhmVNfcth17y2zZtQT6wzkzgwUve0VDWWA=
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA= github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA=
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
github.com/disintegration/imaging v1.6.2 h1:w1LecBlG2Lnp8B3jk5zSuNqd7b4DXhcjwek1ei82L+c=
github.com/disintegration/imaging v1.6.2/go.mod h1:44/5580QXChDfwIclfc/PCwrr44amcmDAg8hxG0Ewe4=
github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0= github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
@@ -169,6 +171,8 @@ golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20230725093048-515e97ebf090 h1:Di6/M8l0O2lCLc6VVRWhgCiApHV8MnQurBnFSHsQtNY= golang.org/x/exp v0.0.0-20230725093048-515e97ebf090 h1:Di6/M8l0O2lCLc6VVRWhgCiApHV8MnQurBnFSHsQtNY=
golang.org/x/exp v0.0.0-20230725093048-515e97ebf090/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc= golang.org/x/exp v0.0.0-20230725093048-515e97ebf090/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc=
golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8 h1:hVwzHzIUGRjiF7EcUjqNxk3NCfkPxbDKRdnNE1Rpg0U=
golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=

View File

@@ -20,17 +20,19 @@ type Pages interface {
GetFile(ctx context.Context, pageID, fileID uuid.UUID) (*entity.File, error) GetFile(ctx context.Context, pageID, fileID uuid.UUID) (*entity.File, error)
} }
func NewService(pages Pages, ch chan *entity.Page) *Service { func NewService(pages Pages, ch chan *entity.Page, processor entity.Processor) *Service {
return &Service{ return &Service{
pages: pages, pages: pages,
ch: ch, ch: ch,
processor: processor,
} }
} }
type Service struct { type Service struct {
openapi.UnimplementedHandler openapi.UnimplementedHandler
pages Pages processor entity.Processor
ch chan *entity.Page pages Pages
ch chan *entity.Page
} }
func (s *Service) GetPage(ctx context.Context, params openapi.GetPageParams) (openapi.GetPageRes, error) { func (s *Service) GetPage(ctx context.Context, params openapi.GetPageParams) (openapi.GetPageRes, error) {
@@ -79,7 +81,8 @@ func (s *Service) AddPage(ctx context.Context, req openapi.OptAddPageReq, params
} }
page := entity.NewPage(url, description, domainFormats...) page := entity.NewPage(url, description, domainFormats...)
page.Status = entity.StatusProcessing page.Status = entity.StatusNew
page.Prepare(ctx, s.processor)
if err := s.pages.Save(ctx, page); err != nil { if err := s.pages.Save(ctx, page); err != nil {
return nil, fmt.Errorf("save page: %w", err) return nil, fmt.Errorf("save page: %w", err)