Refactoring

This commit is contained in:
2023-04-13 18:22:17 +03:00
parent f47dbefb67
commit 2b7a33e72d
9 changed files with 73 additions and 43 deletions

6
.idea/jsonSchemas.xml generated
View File

@@ -3,11 +3,11 @@
<component name="JsonSchemaMappingsProjectConfiguration">
<state>
<map>
<entry key="openapi">
<entry key="OpenAPI 3.0">
<value>
<SchemaInfo>
<option name="name" value="openapi" />
<option name="relativePathToSchema" value="https://raw.githubusercontent.com/OAI/OpenAPI-Specification/main/schemas/v3.1/schema.json" />
<option name="name" value="OpenAPI 3.0" />
<option name="relativePathToSchema" value="https://raw.githubusercontent.com/OAI/OpenAPI-Specification/master/schemas/v3.0/schema.json" />
<option name="applicationDefined" value="true" />
<option name="patterns">
<list>

View File

@@ -116,3 +116,4 @@ curl -X GET --location "http://localhost:5001/api/v1/pages" | jq .
- [ ] Multi-user access
- [ ] Support SQL database with or without separate files storage
- [ ] Tags/Categories
- [ ] Save page to markdown

View File

@@ -40,6 +40,7 @@ func (p *PDF) Process(_ context.Context, url string) ([]entity.File, error) {
page := wkhtmltopdf.NewPage(url)
page.PrintMediaType.Set(p.cfg.MediaPrint)
page.JavascriptDelay.Set(200)
page.LoadErrorHandling.Set("ignore")
page.LoadMediaErrorHandling.Set("ignore")
page.FooterRight.Set("[page]")
page.HeaderLeft.Set(url)

View File

@@ -112,17 +112,8 @@ func (s *SingleFile) findAndReplaceResources(ctx context.Context, node *html.Nod
func (s *SingleFile) replaceResource(ctx context.Context, node *html.Node, baseURL string) error {
for i, attribute := range node.Attr {
if attribute.Key == "src" || attribute.Key == "href" {
encoded, contentType, err := s.loadResource(ctx, attribute.Val, baseURL)
if err != nil {
return fmt.Errorf("load resource for %s: %w", node.Data, err)
}
if len(encoded) == 0 {
attribute.Val = ""
} else {
attribute.Val = fmt.Sprintf("data:%s;base64, %s", contentType, encoded)
}
raw, contentType := s.loadResource(ctx, attribute.Val, baseURL)
setResource(raw, attribute, contentType, node)
node.Attr[i] = attribute
}
@@ -131,27 +122,68 @@ func (s *SingleFile) replaceResource(ctx context.Context, node *html.Node, baseU
return nil
}
func (s *SingleFile) loadResource(ctx context.Context, val, baseURL string) ([]byte, string, error) {
func setResource(raw []byte, attribute html.Attribute, contentType string, node *html.Node) {
if len(raw) == 0 {
attribute.Val = ""
} else {
if strings.HasPrefix(contentType, "image") {
encoded := make([]byte, base64.StdEncoding.EncodedLen(len(raw)))
base64.StdEncoding.Encode(encoded, raw)
attribute.Val = fmt.Sprintf("data:%s;base64, %s", contentType, encoded)
} else {
attribute.Val = ""
var atomValue atom.Atom
var data string
for _, attr := range node.Attr {
if attr.Key == "type" {
switch attr.Val {
case "script":
atomValue = atom.Script
data = "script"
case "stylesheet":
atomValue = atom.Style
data = "style"
}
}
}
newNode := &html.Node{
NextSibling: node.NextSibling,
Type: html.ElementNode,
DataAtom: atomValue,
Data: data,
}
newNode.AppendChild(&html.Node{
Type: html.RawNode,
DataAtom: atom.Data,
Data: string(raw),
})
node.NextSibling = newNode
}
}
}
func (s *SingleFile) loadResource(ctx context.Context, val, baseURL string) ([]byte, string) {
if !strings.HasPrefix(val, "http://") && !strings.HasPrefix(val, "https://") {
var err error
val, err = url.JoinPath(baseURL, val)
if err != nil {
return nil, "", fmt.Errorf("join base path %s and url %s: %w", baseURL, val, err)
return nil, ""
}
val, err = url.PathUnescape(val)
if err != nil {
return nil, "", fmt.Errorf("unescape path %s: %w", val, err)
return nil, ""
}
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, val, nil)
if err != nil {
return nil, "", fmt.Errorf("new request: %w", err)
return nil, ""
}
response, err := s.client.Do(req)
if err != nil {
return nil, "", fmt.Errorf("do request: %w", err)
return nil, ""
}
defer func() {
@@ -161,18 +193,15 @@ func (s *SingleFile) loadResource(ctx context.Context, val, baseURL string) ([]b
}()
if response.StatusCode != http.StatusOK {
return []byte{}, "", nil
return []byte{}, ""
}
raw, err := io.ReadAll(response.Body)
if err != nil {
return nil, "", fmt.Errorf("read body: %w", err)
return nil, ""
}
encoded := make([]byte, base64.StdEncoding.EncodedLen(len(raw)))
base64.StdEncoding.Encode(encoded, raw)
return encoded, response.Header.Get("Content-Type"), nil
return raw, response.Header.Get("Content-Type")
}
func (s *SingleFile) setCharset(node *html.Node, encoding string) {

View File

@@ -1,4 +1,5 @@
openapi: 3.1.0
---
openapi: 3.0.3
info:
title: Sample API
description: API description in Markdown.

View File

@@ -49,7 +49,7 @@ func NewApplication(cfg config.Config) (Application, error) {
worker := entity.NewWorker(workerCh, pageRepo, processor, log.Named("worker"))
server, err := openapi.NewServer(
rest.NewService(pageRepo, workerCh, processor),
rest.NewService(pageRepo, workerCh),
openapi.WithPathPrefix("/api/v1"),
openapi.WithMiddleware(
func(r middleware.Request, next middleware.Next) (middleware.Response, error) {

View File

@@ -75,6 +75,13 @@ func (p *Page) Process(ctx context.Context, processor Processor) {
innerWG := sync.WaitGroup{}
innerWG.Add(len(p.Formats))
meta, err := processor.GetMeta(ctx, p.URL)
if err != nil {
p.Meta.Error = err.Error()
} else {
p.Meta = meta
}
for _, format := range p.Formats {
go func(format Format) {
defer innerWG.Done()

2
go.mod
View File

@@ -18,6 +18,7 @@ require (
go.opentelemetry.io/otel/trace v1.14.0
go.uber.org/multierr v1.10.0
go.uber.org/zap v1.24.0
golang.org/x/net v0.8.0
)
require (
@@ -49,7 +50,6 @@ require (
go.opencensus.io v0.24.0 // indirect
go.uber.org/atomic v1.9.0 // indirect
golang.org/x/exp v0.0.0-20230206171751-46f607a40771 // indirect
golang.org/x/net v0.8.0 // indirect
golang.org/x/sync v0.1.0 // indirect
golang.org/x/sys v0.6.0 // indirect
golang.org/x/text v0.8.0 // indirect

View File

@@ -20,11 +20,10 @@ type Pages interface {
GetFile(ctx context.Context, pageID, fileID uuid.UUID) (*entity.File, error)
}
func NewService(pages Pages, ch chan *entity.Page, processor entity.Processor) *Service {
func NewService(pages Pages, ch chan *entity.Page) *Service {
return &Service{
pages: pages,
ch: ch,
processor: processor,
}
}
@@ -32,7 +31,6 @@ type Service struct {
openapi.UnimplementedHandler
pages Pages
ch chan *entity.Page
processor entity.Processor
}
func (s *Service) GetPage(ctx context.Context, params openapi.GetPageParams) (openapi.GetPageRes, error) {
@@ -83,13 +81,6 @@ func (s *Service) AddPage(ctx context.Context, req openapi.OptAddPageReq, params
page := entity.NewPage(url, description, domainFormats...)
page.Status = entity.StatusProcessing
meta, err := s.processor.GetMeta(ctx, page.URL)
if err != nil {
page.Meta.Error = err.Error()
} else {
page.Meta = meta
}
if err := s.pages.Save(ctx, page); err != nil {
return nil, fmt.Errorf("save page: %w", err)
}