mirror of
https://github.com/derfenix/webarchive.git
synced 2026-03-12 07:40:30 +03:00
Compare commits
5 Commits
v0.1.4
...
c2a5e04647
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c2a5e04647 | ||
| 8195a26aca | |||
|
|
b6393c7451 | ||
|
870f13f7bf
|
|||
|
7e53519ca0
|
2
.gitignore
vendored
2
.gitignore
vendored
@@ -43,3 +43,5 @@ fabric.properties
|
|||||||
go.work
|
go.work
|
||||||
test.http
|
test.http
|
||||||
db
|
db
|
||||||
|
http-client.env.json
|
||||||
|
http-client.private.env.json
|
||||||
|
|||||||
6
.idea/swagger-settings.xml
generated
Normal file
6
.idea/swagger-settings.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="SwaggerSettings">
|
||||||
|
<option name="defaultPreviewType" value="SWAGGER_UI" />
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
255
adapters/processors/internal/mediainline.go
Normal file
255
adapters/processors/internal/mediainline.go
Normal file
@@ -0,0 +1,255 @@
|
|||||||
|
package internal
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/base64"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/disintegration/imaging"
|
||||||
|
"github.com/gabriel-vasile/mimetype"
|
||||||
|
"go.uber.org/zap"
|
||||||
|
"golang.org/x/net/html"
|
||||||
|
)
|
||||||
|
|
||||||
|
type MediaInline struct {
|
||||||
|
log *zap.Logger
|
||||||
|
getter func(context.Context, string) (*http.Response, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewMediaInline(log *zap.Logger, getter func(context.Context, string) (*http.Response, error)) *MediaInline {
|
||||||
|
return &MediaInline{log: log, getter: getter}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MediaInline) Inline(ctx context.Context, reader io.Reader, pageURL string) (*html.Node, error) {
|
||||||
|
htmlNode, err := html.Parse(reader)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("parse response body: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
baseURL, err := url.Parse(pageURL)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("parse page url: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
m.visit(ctx, htmlNode, m.processorFunc, baseURL)
|
||||||
|
|
||||||
|
return htmlNode, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MediaInline) processorFunc(ctx context.Context, node *html.Node, baseURL *url.URL) error {
|
||||||
|
switch node.Data {
|
||||||
|
case "link":
|
||||||
|
if err := m.processHref(ctx, node.Attr, baseURL); err != nil {
|
||||||
|
return fmt.Errorf("process link %s: %w", node.Attr, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
case "script", "img":
|
||||||
|
if err := m.processSrc(ctx, node.Attr, baseURL); err != nil {
|
||||||
|
return fmt.Errorf("process script %s: %w", node.Attr, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
case "a":
|
||||||
|
if err := m.processAHref(node.Attr, baseURL); err != nil {
|
||||||
|
return fmt.Errorf("process a href %s: %w", node.Attr, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MediaInline) processAHref(attrs []html.Attribute, baseURL *url.URL) error {
|
||||||
|
for idx, attr := range attrs {
|
||||||
|
switch attr.Key {
|
||||||
|
case "href":
|
||||||
|
attrs[idx].Val = normalizeURL(attr.Val, baseURL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MediaInline) processHref(ctx context.Context, attrs []html.Attribute, baseURL *url.URL) error {
|
||||||
|
var shouldProcess bool
|
||||||
|
var value string
|
||||||
|
var valueIdx int
|
||||||
|
|
||||||
|
for idx, attr := range attrs {
|
||||||
|
switch attr.Key {
|
||||||
|
case "rel":
|
||||||
|
switch attr.Val {
|
||||||
|
case "stylesheet", "icon", "alternate icon", "shortcut icon", "manifest":
|
||||||
|
shouldProcess = true
|
||||||
|
}
|
||||||
|
|
||||||
|
case "href":
|
||||||
|
value = attr.Val
|
||||||
|
valueIdx = idx
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !shouldProcess {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
encodedValue, err := m.loadAndEncode(ctx, baseURL, value)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
attrs[valueIdx].Val = encodedValue
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MediaInline) processSrc(ctx context.Context, attrs []html.Attribute, baseURL *url.URL) error {
|
||||||
|
var shouldProcess bool
|
||||||
|
var value string
|
||||||
|
var valueIdx int
|
||||||
|
|
||||||
|
for idx, attr := range attrs {
|
||||||
|
switch attr.Key {
|
||||||
|
case "src":
|
||||||
|
value = attr.Val
|
||||||
|
valueIdx = idx
|
||||||
|
shouldProcess = true
|
||||||
|
case "data-src":
|
||||||
|
value = attr.Val
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !shouldProcess {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
encodedValue, err := m.loadAndEncode(ctx, baseURL, value)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
attrs[valueIdx].Val = encodedValue
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MediaInline) loadAndEncode(ctx context.Context, baseURL *url.URL, value string) (string, error) {
|
||||||
|
mime := "text/plain"
|
||||||
|
|
||||||
|
if value == "" {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
normalizedURL := normalizeURL(value, baseURL)
|
||||||
|
if normalizedURL == "" {
|
||||||
|
return value, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
response, err := m.getter(ctx, normalizedURL)
|
||||||
|
if err != nil {
|
||||||
|
m.log.Sugar().With(zap.Error(err)).Errorf("load %s", normalizedURL)
|
||||||
|
return value, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
defer func() {
|
||||||
|
_ = response.Body.Close()
|
||||||
|
}()
|
||||||
|
|
||||||
|
cleanMime := func(s string) string {
|
||||||
|
s, _, _ = strings.Cut(s, "+")
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
if ct := response.Header.Get("Content-Type"); ct != "" {
|
||||||
|
mime = ct
|
||||||
|
}
|
||||||
|
|
||||||
|
encodedVal, err := m.encodeResource(response.Body, &mime)
|
||||||
|
if err != nil {
|
||||||
|
return value, fmt.Errorf("encode resource: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return fmt.Sprintf("data:%s;base64, %s", cleanMime(mime), encodedVal), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MediaInline) visit(ctx context.Context, n *html.Node, proc func(context.Context, *html.Node, *url.URL) error, baseURL *url.URL) {
|
||||||
|
if err := proc(ctx, n, baseURL); err != nil {
|
||||||
|
m.log.Error("process error", zap.Error(err))
|
||||||
|
}
|
||||||
|
|
||||||
|
if n.FirstChild != nil {
|
||||||
|
m.visit(ctx, n.FirstChild, proc, baseURL)
|
||||||
|
}
|
||||||
|
|
||||||
|
if n.NextSibling != nil {
|
||||||
|
m.visit(ctx, n.NextSibling, proc, baseURL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeURL(resourceURL string, base *url.URL) string {
|
||||||
|
if strings.HasPrefix(resourceURL, "//") {
|
||||||
|
return "https:" + resourceURL
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.HasPrefix(resourceURL, "about:") {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
parsedResourceURL, err := url.Parse(resourceURL)
|
||||||
|
if err != nil {
|
||||||
|
return resourceURL
|
||||||
|
}
|
||||||
|
|
||||||
|
reference := base.ResolveReference(parsedResourceURL)
|
||||||
|
|
||||||
|
return reference.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MediaInline) encodeResource(r io.Reader, mime *string) (string, error) {
|
||||||
|
all, err := io.ReadAll(r)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("read data: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
all, err = m.preprocessResource(all, mime)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("preprocess resource: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return base64.StdEncoding.EncodeToString(all), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MediaInline) preprocessResource(data []byte, mime *string) ([]byte, error) {
|
||||||
|
detectedMime := mimetype.Detect(data)
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case strings.HasPrefix(detectedMime.String(), "image"):
|
||||||
|
decodedImage, err := imaging.Decode(bytes.NewBuffer(data))
|
||||||
|
if err != nil {
|
||||||
|
m.log.Error("failed to decode image", zap.Error(err))
|
||||||
|
|
||||||
|
return data, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if size := decodedImage.Bounds().Size(); size.X > 1024 || size.Y > 1024 {
|
||||||
|
thumbnail := imaging.Thumbnail(decodedImage, 1024, 1024, imaging.Lanczos)
|
||||||
|
buf := bytes.NewBuffer(nil)
|
||||||
|
|
||||||
|
if err := imaging.Encode(buf, thumbnail, imaging.JPEG, imaging.JPEGQuality(90)); err != nil {
|
||||||
|
m.log.Error("failed to create resized image", zap.Error(err))
|
||||||
|
|
||||||
|
return data, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
*mime = "image/jpeg"
|
||||||
|
m.log.Info("Resized")
|
||||||
|
|
||||||
|
return buf.Bytes(), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return data, nil
|
||||||
|
}
|
||||||
@@ -40,9 +40,9 @@ func (p *PDF) Process(_ context.Context, url string, cache *entity.Cache) ([]ent
|
|||||||
opts := wkhtmltopdf.NewPageOptions()
|
opts := wkhtmltopdf.NewPageOptions()
|
||||||
opts.PrintMediaType.Set(p.cfg.MediaPrint)
|
opts.PrintMediaType.Set(p.cfg.MediaPrint)
|
||||||
opts.JavascriptDelay.Set(200)
|
opts.JavascriptDelay.Set(200)
|
||||||
opts.DisableJavascript.Set(true)
|
opts.DisableJavascript.Set(false)
|
||||||
opts.LoadErrorHandling.Set("ignore")
|
opts.LoadErrorHandling.Set("ignore")
|
||||||
opts.LoadMediaErrorHandling.Set("ignore")
|
opts.LoadMediaErrorHandling.Set("skip")
|
||||||
opts.FooterRight.Set("[opts]")
|
opts.FooterRight.Set("[opts]")
|
||||||
opts.HeaderLeft.Set(url)
|
opts.HeaderLeft.Set(url)
|
||||||
opts.HeaderRight.Set(time.Now().Format(time.DateOnly))
|
opts.HeaderRight.Set(time.Now().Format(time.DateOnly))
|
||||||
@@ -50,9 +50,9 @@ func (p *PDF) Process(_ context.Context, url string, cache *entity.Cache) ([]ent
|
|||||||
opts.Zoom.Set(p.cfg.Zoom)
|
opts.Zoom.Set(p.cfg.Zoom)
|
||||||
opts.ViewportSize.Set(p.cfg.Viewport)
|
opts.ViewportSize.Set(p.cfg.Viewport)
|
||||||
opts.NoBackground.Set(true)
|
opts.NoBackground.Set(true)
|
||||||
opts.DisableLocalFileAccess.Set(true)
|
opts.DisableLocalFileAccess.Set(false)
|
||||||
opts.DisableExternalLinks.Set(true)
|
opts.DisableExternalLinks.Set(false)
|
||||||
opts.DisableInternalLinks.Set(true)
|
opts.DisableInternalLinks.Set(false)
|
||||||
|
|
||||||
var page wkhtmltopdf.PageProvider
|
var page wkhtmltopdf.PageProvider
|
||||||
if len(cache.Get()) > 0 {
|
if len(cache.Get()) > 0 {
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"go.uber.org/zap"
|
||||||
"golang.org/x/net/html"
|
"golang.org/x/net/html"
|
||||||
|
|
||||||
"github.com/derfenix/webarchive/config"
|
"github.com/derfenix/webarchive/config"
|
||||||
@@ -22,7 +23,7 @@ type processor interface {
|
|||||||
Process(ctx context.Context, url string, cache *entity.Cache) ([]entity.File, error)
|
Process(ctx context.Context, url string, cache *entity.Cache) ([]entity.File, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewProcessors(cfg config.Config) (*Processors, error) {
|
func NewProcessors(cfg config.Config, log *zap.Logger) (*Processors, error) {
|
||||||
jar, err := cookiejar.New(&cookiejar.Options{
|
jar, err := cookiejar.New(&cookiejar.Options{
|
||||||
PublicSuffixList: nil,
|
PublicSuffixList: nil,
|
||||||
})
|
})
|
||||||
@@ -62,7 +63,7 @@ func NewProcessors(cfg config.Config) (*Processors, error) {
|
|||||||
processors: map[entity.Format]processor{
|
processors: map[entity.Format]processor{
|
||||||
entity.FormatHeaders: NewHeaders(httpClient),
|
entity.FormatHeaders: NewHeaders(httpClient),
|
||||||
entity.FormatPDF: NewPDF(cfg.PDF),
|
entity.FormatPDF: NewPDF(cfg.PDF),
|
||||||
entity.FormatSingleFile: NewSingleFile(httpClient),
|
entity.FormatSingleFile: NewSingleFile(httpClient, log),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
"go.uber.org/zap/zaptest"
|
||||||
|
|
||||||
"github.com/derfenix/webarchive/config"
|
"github.com/derfenix/webarchive/config"
|
||||||
"github.com/derfenix/webarchive/entity"
|
"github.com/derfenix/webarchive/entity"
|
||||||
@@ -18,7 +19,7 @@ func TestProcessors_GetMeta(t *testing.T) {
|
|||||||
cfg, err := config.NewConfig(ctx)
|
cfg, err := config.NewConfig(ctx)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
procs, err := NewProcessors(cfg)
|
procs, err := NewProcessors(cfg, zaptest.NewLogger(t))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
cache := entity.NewCache()
|
cache := entity.NewCache()
|
||||||
|
|||||||
@@ -5,50 +5,46 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
|
||||||
|
|
||||||
|
"go.uber.org/zap"
|
||||||
"golang.org/x/net/html"
|
"golang.org/x/net/html"
|
||||||
|
|
||||||
|
"github.com/derfenix/webarchive/adapters/processors/internal"
|
||||||
"github.com/derfenix/webarchive/entity"
|
"github.com/derfenix/webarchive/entity"
|
||||||
)
|
)
|
||||||
|
|
||||||
func NewSingleFile(client *http.Client) *SingleFile {
|
func NewSingleFile(client *http.Client, log *zap.Logger) *SingleFile {
|
||||||
return &SingleFile{client: client}
|
return &SingleFile{client: client, log: log}
|
||||||
}
|
}
|
||||||
|
|
||||||
type SingleFile struct {
|
type SingleFile struct {
|
||||||
client *http.Client
|
client *http.Client
|
||||||
|
log *zap.Logger
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *SingleFile) Process(ctx context.Context, url string, cache *entity.Cache) ([]entity.File, error) {
|
func (s *SingleFile) Process(ctx context.Context, pageURL string, cache *entity.Cache) ([]entity.File, error) {
|
||||||
reader := cache.Reader()
|
reader := cache.Reader()
|
||||||
|
|
||||||
if reader == nil {
|
if reader == nil {
|
||||||
response, err := s.get(ctx, url)
|
response, err := s.get(ctx, pageURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if response.Body != nil {
|
defer func() {
|
||||||
defer func() {
|
_ = response.Body.Close()
|
||||||
_ = response.Body.Close()
|
}()
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
reader = response.Body
|
reader = response.Body
|
||||||
}
|
}
|
||||||
|
|
||||||
htmlNode, err := html.Parse(reader)
|
inlinedHTML, err := internal.NewMediaInline(s.log, s.get).Inline(ctx, reader, pageURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("parse response body: %w", err)
|
return nil, fmt.Errorf("inline media: %w", err)
|
||||||
}
|
|
||||||
|
|
||||||
if err := s.process(ctx, htmlNode, url); err != nil {
|
|
||||||
return nil, fmt.Errorf("process: %w", err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
buf := bytes.NewBuffer(nil)
|
buf := bytes.NewBuffer(nil)
|
||||||
if err := html.Render(buf, htmlNode); err != nil {
|
if err := html.Render(buf, inlinedHTML); err != nil {
|
||||||
return nil, fmt.Errorf("render result html: %w", err)
|
return nil, fmt.Errorf("render result html: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -78,59 +74,3 @@ func (s *SingleFile) get(ctx context.Context, url string) (*http.Response, error
|
|||||||
|
|
||||||
return response, nil
|
return response, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *SingleFile) process(ctx context.Context, node *html.Node, pageURL string) error {
|
|
||||||
parsedURL, err := url.Parse(pageURL)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("parse page url: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
baseURL := fmt.Sprintf("%s://%s", parsedURL.Scheme, parsedURL.Host)
|
|
||||||
|
|
||||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
||||||
var err error
|
|
||||||
switch child.Data {
|
|
||||||
case "head":
|
|
||||||
err = s.processHead(ctx, child, baseURL)
|
|
||||||
|
|
||||||
case "body":
|
|
||||||
err = s.processBody(ctx, child, baseURL)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SingleFile) processHead(ctx context.Context, node *html.Node, baseURL string) error {
|
|
||||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
||||||
switch child.Data {
|
|
||||||
case "link":
|
|
||||||
if err := s.processHref(ctx, child.Attr, baseURL); err != nil {
|
|
||||||
return fmt.Errorf("process link %s: %w", child.Attr, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
case "script":
|
|
||||||
if err := s.processSrc(ctx, child.Attr, baseURL); err != nil {
|
|
||||||
return fmt.Errorf("process script %s: %w", child.Attr, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SingleFile) processBody(ctx context.Context, child *html.Node, url string) error {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SingleFile) processHref(ctx context.Context, attrs []html.Attribute, baseURL string) error {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SingleFile) processSrc(ctx context.Context, attrs []html.Attribute, baseURL string) error {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -192,7 +192,7 @@ func (p *Page) ListUnprocessed(ctx context.Context) ([]entity.Page, error) {
|
|||||||
return fmt.Errorf("get item: %w", err)
|
return fmt.Errorf("get item: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if page.Status == entity.StatusNew {
|
if page.Status == entity.StatusNew || page.Status == entity.StatusProcessing {
|
||||||
//goland:noinspection GoVetCopyLock
|
//goland:noinspection GoVetCopyLock
|
||||||
pages = append(pages, page) //nolint:govet // didn't touch the lock here
|
pages = append(pages, page) //nolint:govet // didn't touch the lock here
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,11 +6,12 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/derfenix/webarchive/adapters/repository"
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
"go.uber.org/zap/zaptest"
|
"go.uber.org/zap/zaptest"
|
||||||
|
|
||||||
|
"github.com/derfenix/webarchive/adapters/repository"
|
||||||
|
|
||||||
"github.com/derfenix/webarchive/entity"
|
"github.com/derfenix/webarchive/entity"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -50,12 +51,16 @@ func TestSite(t *testing.T) {
|
|||||||
storedSite, err := siteRepo.Get(ctx, site.ID)
|
storedSite, err := siteRepo.Get(ctx, site.ID)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
assert.Equal(t, site, storedSite)
|
assert.Equal(t, site.ID, storedSite.ID)
|
||||||
|
assert.Equal(t, site.URL, storedSite.URL)
|
||||||
|
assert.Equal(t, site.Status, storedSite.Status)
|
||||||
|
|
||||||
all, err := siteRepo.ListAll(ctx)
|
all, err := siteRepo.ListAll(ctx)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Len(t, all, 1)
|
require.Len(t, all, 1)
|
||||||
|
|
||||||
assert.Equal(t, site, all[0])
|
assert.Equal(t, site.ID, all[0].ID)
|
||||||
|
assert.Equal(t, site.URL, all[0].URL)
|
||||||
|
assert.Equal(t, site.Status, all[0].Status)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,13 +10,14 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/derfenix/webarchive/adapters/repository"
|
|
||||||
"github.com/dgraph-io/badger/v4"
|
"github.com/dgraph-io/badger/v4"
|
||||||
"github.com/ogen-go/ogen/middleware"
|
"github.com/ogen-go/ogen/middleware"
|
||||||
"go.uber.org/multierr"
|
"go.uber.org/multierr"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
"go.uber.org/zap/zapcore"
|
"go.uber.org/zap/zapcore"
|
||||||
|
|
||||||
|
"github.com/derfenix/webarchive/adapters/repository"
|
||||||
|
|
||||||
"github.com/derfenix/webarchive/adapters/processors"
|
"github.com/derfenix/webarchive/adapters/processors"
|
||||||
badgerRepo "github.com/derfenix/webarchive/adapters/repository/badger"
|
badgerRepo "github.com/derfenix/webarchive/adapters/repository/badger"
|
||||||
"github.com/derfenix/webarchive/api/openapi"
|
"github.com/derfenix/webarchive/api/openapi"
|
||||||
@@ -41,7 +42,7 @@ func NewApplication(cfg config.Config) (Application, error) {
|
|||||||
return Application{}, fmt.Errorf("new page repo: %w", err)
|
return Application{}, fmt.Errorf("new page repo: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
processor, err := processors.NewProcessors(cfg)
|
processor, err := processors.NewProcessors(cfg, log.Named("processor"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return Application{}, fmt.Errorf("new processors: %w", err)
|
return Application{}, fmt.Errorf("new processors: %w", err)
|
||||||
}
|
}
|
||||||
@@ -50,7 +51,7 @@ func NewApplication(cfg config.Config) (Application, error) {
|
|||||||
worker := entity.NewWorker(workerCh, pageRepo, processor, log.Named("worker"))
|
worker := entity.NewWorker(workerCh, pageRepo, processor, log.Named("worker"))
|
||||||
|
|
||||||
server, err := openapi.NewServer(
|
server, err := openapi.NewServer(
|
||||||
rest.NewService(pageRepo, workerCh),
|
rest.NewService(pageRepo, workerCh, processor),
|
||||||
openapi.WithPathPrefix("/api/v1"),
|
openapi.WithPathPrefix("/api/v1"),
|
||||||
openapi.WithMiddleware(
|
openapi.WithMiddleware(
|
||||||
func(r middleware.Request, next middleware.Next) (middleware.Response, error) {
|
func(r middleware.Request, next middleware.Next) (middleware.Response, error) {
|
||||||
@@ -190,6 +191,7 @@ func newLogger(cfg config.Logging) (*zap.Logger, error) {
|
|||||||
logCfg.EncoderConfig.EncodeTime = zapcore.RFC3339TimeEncoder
|
logCfg.EncoderConfig.EncodeTime = zapcore.RFC3339TimeEncoder
|
||||||
logCfg.EncoderConfig.EncodeDuration = zapcore.NanosDurationEncoder
|
logCfg.EncoderConfig.EncodeDuration = zapcore.NanosDurationEncoder
|
||||||
logCfg.DisableCaller = true
|
logCfg.DisableCaller = true
|
||||||
|
logCfg.DisableStacktrace = true
|
||||||
|
|
||||||
logCfg.Level = zap.NewAtomicLevelAt(zapcore.InfoLevel)
|
logCfg.Level = zap.NewAtomicLevelAt(zapcore.InfoLevel)
|
||||||
if cfg.Debug {
|
if cfg.Debug {
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package entity
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"runtime/debug"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -80,16 +81,18 @@ func (p *Page) SetProcessing() {
|
|||||||
p.Status = StatusProcessing
|
p.Status = StatusProcessing
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Page) Process(ctx context.Context, processor Processor) {
|
func (p *Page) Prepare(ctx context.Context, processor Processor) {
|
||||||
innerWG := sync.WaitGroup{}
|
|
||||||
innerWG.Add(len(p.Formats))
|
|
||||||
|
|
||||||
meta, err := processor.GetMeta(ctx, p.URL, p.cache)
|
meta, err := processor.GetMeta(ctx, p.URL, p.cache)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
p.Meta.Error = err.Error()
|
p.Meta.Error = err.Error()
|
||||||
} else {
|
} else {
|
||||||
p.Meta = meta
|
p.Meta = meta
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Page) Process(ctx context.Context, processor Processor) {
|
||||||
|
innerWG := sync.WaitGroup{}
|
||||||
|
innerWG.Add(len(p.Formats))
|
||||||
|
|
||||||
results := Results{}
|
results := Results{}
|
||||||
|
|
||||||
@@ -99,7 +102,7 @@ func (p *Page) Process(ctx context.Context, processor Processor) {
|
|||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
if err := recover(); err != nil {
|
if err := recover(); err != nil {
|
||||||
results.Add(Result{Format: format, Err: fmt.Errorf("recovered from panic: %v", err)})
|
results.Add(Result{Format: format, Err: fmt.Errorf("recovered from panic: %v (%s)", err, string(debug.Stack()))})
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
|||||||
@@ -66,6 +66,16 @@ func (w *Worker) Start(ctx context.Context, wg *sync.WaitGroup) {
|
|||||||
func (w *Worker) do(ctx context.Context, wg *sync.WaitGroup, page *Page, log *zap.Logger) {
|
func (w *Worker) do(ctx context.Context, wg *sync.WaitGroup, page *Page, log *zap.Logger) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
|
||||||
|
page.SetProcessing()
|
||||||
|
if err := w.pages.Save(ctx, page); err != nil {
|
||||||
|
w.log.Error(
|
||||||
|
"failed to save processing page",
|
||||||
|
zap.String("page_id", page.ID.String()),
|
||||||
|
zap.String("page_url", page.URL),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
page.Process(ctx, w.processor)
|
page.Process(ctx, w.processor)
|
||||||
|
|
||||||
log.Debug("page processed")
|
log.Debug("page processed")
|
||||||
|
|||||||
4
go.mod
4
go.mod
@@ -5,6 +5,7 @@ go 1.19
|
|||||||
require (
|
require (
|
||||||
github.com/SebastiaanKlippert/go-wkhtmltopdf v1.9.0
|
github.com/SebastiaanKlippert/go-wkhtmltopdf v1.9.0
|
||||||
github.com/dgraph-io/badger/v4 v4.0.1
|
github.com/dgraph-io/badger/v4 v4.0.1
|
||||||
|
github.com/disintegration/imaging v1.6.2
|
||||||
github.com/gabriel-vasile/mimetype v1.4.2
|
github.com/gabriel-vasile/mimetype v1.4.2
|
||||||
github.com/go-faster/errors v0.6.1
|
github.com/go-faster/errors v0.6.1
|
||||||
github.com/go-faster/jx v1.1.0
|
github.com/go-faster/jx v1.1.0
|
||||||
@@ -58,10 +59,11 @@ require (
|
|||||||
go.opencensus.io v0.24.0 // indirect
|
go.opencensus.io v0.24.0 // indirect
|
||||||
golang.org/x/crypto v0.14.0 // indirect
|
golang.org/x/crypto v0.14.0 // indirect
|
||||||
golang.org/x/exp v0.0.0-20230725093048-515e97ebf090 // indirect
|
golang.org/x/exp v0.0.0-20230725093048-515e97ebf090 // indirect
|
||||||
|
golang.org/x/image v0.10.0 // indirect
|
||||||
golang.org/x/sync v0.4.0 // indirect
|
golang.org/x/sync v0.4.0 // indirect
|
||||||
golang.org/x/sys v0.13.0 // indirect
|
golang.org/x/sys v0.13.0 // indirect
|
||||||
golang.org/x/text v0.13.0 // indirect
|
golang.org/x/text v0.13.0 // indirect
|
||||||
google.golang.org/protobuf v1.30.0 // indirect
|
google.golang.org/protobuf v1.33.0 // indirect
|
||||||
gopkg.in/ini.v1 v1.67.0 // indirect
|
gopkg.in/ini.v1 v1.67.0 // indirect
|
||||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||||
|
|||||||
31
go.sum
31
go.sum
@@ -21,6 +21,8 @@ github.com/dgraph-io/ristretto v0.1.1 h1:6CWw5tJNgpegArSHpNHJKldNeq03FQCwYvfMVWa
|
|||||||
github.com/dgraph-io/ristretto v0.1.1/go.mod h1:S1GPSBCYCIhmVNfcth17y2zZtQT6wzkzgwUve0VDWWA=
|
github.com/dgraph-io/ristretto v0.1.1/go.mod h1:S1GPSBCYCIhmVNfcth17y2zZtQT6wzkzgwUve0VDWWA=
|
||||||
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA=
|
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA=
|
||||||
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
|
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
|
||||||
|
github.com/disintegration/imaging v1.6.2 h1:w1LecBlG2Lnp8B3jk5zSuNqd7b4DXhcjwek1ei82L+c=
|
||||||
|
github.com/disintegration/imaging v1.6.2/go.mod h1:44/5580QXChDfwIclfc/PCwrr44amcmDAg8hxG0Ewe4=
|
||||||
github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
|
github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
|
||||||
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
|
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
|
||||||
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
|
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
|
||||||
@@ -148,6 +150,7 @@ github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAh
|
|||||||
github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds=
|
github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds=
|
||||||
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||||
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||||
|
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||||
go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
|
go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
|
||||||
go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
|
go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
|
||||||
go.opentelemetry.io/otel v1.19.0 h1:MuS/TNf4/j4IXsZuJegVzI1cwut7Qc00344rgH7p8bs=
|
go.opentelemetry.io/otel v1.19.0 h1:MuS/TNf4/j4IXsZuJegVzI1cwut7Qc00344rgH7p8bs=
|
||||||
@@ -164,16 +167,22 @@ go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so=
|
|||||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||||
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||||
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||||
|
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||||
golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
|
golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
|
||||||
golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
|
golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
|
||||||
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
||||||
golang.org/x/exp v0.0.0-20230725093048-515e97ebf090 h1:Di6/M8l0O2lCLc6VVRWhgCiApHV8MnQurBnFSHsQtNY=
|
golang.org/x/exp v0.0.0-20230725093048-515e97ebf090 h1:Di6/M8l0O2lCLc6VVRWhgCiApHV8MnQurBnFSHsQtNY=
|
||||||
golang.org/x/exp v0.0.0-20230725093048-515e97ebf090/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc=
|
golang.org/x/exp v0.0.0-20230725093048-515e97ebf090/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc=
|
||||||
|
golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
|
||||||
|
golang.org/x/image v0.10.0 h1:gXjUUtwtx5yOE0VKWq1CH4IJAClq4UGgUA3i+rpON9M=
|
||||||
|
golang.org/x/image v0.10.0/go.mod h1:jtrku+n79PfroUbvDdeUWMAI+heR786BofxrbiSF+J0=
|
||||||
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
|
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
|
||||||
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
|
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
|
||||||
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
|
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
|
||||||
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||||
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||||
|
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||||
|
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||||
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||||
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||||
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||||
@@ -183,6 +192,9 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL
|
|||||||
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||||
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
||||||
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
||||||
|
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||||
|
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||||
|
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
||||||
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
|
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
|
||||||
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
|
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
|
||||||
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
||||||
@@ -191,21 +203,34 @@ golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJ
|
|||||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
|
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
|
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
golang.org/x/sync v0.4.0 h1:zxkM55ReGkDlKSM+Fu41A+zmbZuaPVbGMzvvdUPznYQ=
|
golang.org/x/sync v0.4.0 h1:zxkM55ReGkDlKSM+Fu41A+zmbZuaPVbGMzvvdUPznYQ=
|
||||||
golang.org/x/sync v0.4.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
|
golang.org/x/sync v0.4.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
|
||||||
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20221010170243-090e33056c14/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20221010170243-090e33056c14/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
|
golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
|
||||||
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||||
|
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||||
|
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
|
||||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
|
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||||
|
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||||
|
golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
|
||||||
golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
|
golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
|
||||||
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
|
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
|
||||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
@@ -216,6 +241,8 @@ golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBn
|
|||||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||||
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
|
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
|
||||||
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
||||||
|
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||||
|
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
|
||||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
@@ -241,8 +268,8 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD
|
|||||||
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
|
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
|
||||||
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
|
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
|
||||||
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
|
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
|
||||||
google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=
|
google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
|
||||||
google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
|
google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||||
gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
|
gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
|
||||||
|
|||||||
@@ -20,17 +20,19 @@ type Pages interface {
|
|||||||
GetFile(ctx context.Context, pageID, fileID uuid.UUID) (*entity.File, error)
|
GetFile(ctx context.Context, pageID, fileID uuid.UUID) (*entity.File, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewService(pages Pages, ch chan *entity.Page) *Service {
|
func NewService(pages Pages, ch chan *entity.Page, processor entity.Processor) *Service {
|
||||||
return &Service{
|
return &Service{
|
||||||
pages: pages,
|
pages: pages,
|
||||||
ch: ch,
|
ch: ch,
|
||||||
|
processor: processor,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type Service struct {
|
type Service struct {
|
||||||
openapi.UnimplementedHandler
|
openapi.UnimplementedHandler
|
||||||
pages Pages
|
processor entity.Processor
|
||||||
ch chan *entity.Page
|
pages Pages
|
||||||
|
ch chan *entity.Page
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Service) GetPage(ctx context.Context, params openapi.GetPageParams) (openapi.GetPageRes, error) {
|
func (s *Service) GetPage(ctx context.Context, params openapi.GetPageParams) (openapi.GetPageRes, error) {
|
||||||
@@ -79,7 +81,8 @@ func (s *Service) AddPage(ctx context.Context, req openapi.OptAddPageReq, params
|
|||||||
}
|
}
|
||||||
|
|
||||||
page := entity.NewPage(url, description, domainFormats...)
|
page := entity.NewPage(url, description, domainFormats...)
|
||||||
page.Status = entity.StatusProcessing
|
page.Status = entity.StatusNew
|
||||||
|
page.Prepare(ctx, s.processor)
|
||||||
|
|
||||||
if err := s.pages.Save(ctx, page); err != nil {
|
if err := s.pages.Save(ctx, page); err != nil {
|
||||||
return nil, fmt.Errorf("save page: %w", err)
|
return nil, fmt.Errorf("save page: %w", err)
|
||||||
|
|||||||
Reference in New Issue
Block a user