mirror of
https://github.com/derfenix/webarchive.git
synced 2026-03-11 21:35:34 +03:00
Initial commit
This commit is contained in:
93
adapters/processors/processors.go
Normal file
93
adapters/processors/processors.go
Normal file
@@ -0,0 +1,93 @@
|
||||
package processors
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/cookiejar"
|
||||
"time"
|
||||
|
||||
"github.com/derfenix/webarchive/entity"
|
||||
)
|
||||
|
||||
type processor interface {
|
||||
Process(ctx context.Context, url string) ([]entity.File, error)
|
||||
}
|
||||
|
||||
func NewProcessors() (*Processors, error) {
|
||||
jar, err := cookiejar.New(&cookiejar.Options{
|
||||
PublicSuffixList: nil,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("create cookie jar: %w", err)
|
||||
}
|
||||
|
||||
httpClient := &http.Client{
|
||||
Transport: &http.Transport{
|
||||
DialContext: (&net.Dialer{
|
||||
Timeout: time.Second * 10,
|
||||
KeepAlive: time.Second * 10,
|
||||
}).DialContext,
|
||||
MaxIdleConns: 20,
|
||||
MaxIdleConnsPerHost: 5,
|
||||
MaxConnsPerHost: 10,
|
||||
IdleConnTimeout: time.Second * 60,
|
||||
ResponseHeaderTimeout: time.Second * 20,
|
||||
MaxResponseHeaderBytes: 1024 * 1024 * 50,
|
||||
WriteBufferSize: 256,
|
||||
ReadBufferSize: 1024 * 64,
|
||||
ForceAttemptHTTP2: true,
|
||||
},
|
||||
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||
if len(via) > 3 {
|
||||
return fmt.Errorf("too many redirects")
|
||||
}
|
||||
|
||||
return nil
|
||||
},
|
||||
Jar: jar,
|
||||
Timeout: time.Second * 30,
|
||||
}
|
||||
|
||||
procs := Processors{
|
||||
processors: map[entity.Format]processor{
|
||||
entity.FormatHeaders: NewHeaders(httpClient),
|
||||
entity.FormatPDF: NewPDF(),
|
||||
},
|
||||
}
|
||||
|
||||
return &procs, nil
|
||||
}
|
||||
|
||||
type Processors struct {
|
||||
processors map[entity.Format]processor
|
||||
}
|
||||
|
||||
func (p *Processors) Process(ctx context.Context, format entity.Format, url string) entity.Result {
|
||||
result := entity.Result{Format: format}
|
||||
|
||||
proc, ok := p.processors[format]
|
||||
if !ok {
|
||||
result.Err = fmt.Errorf("no processor registered for format %v", format)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
files, err := proc.Process(ctx, url)
|
||||
if err != nil {
|
||||
result.Err = fmt.Errorf("process: %w", err)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
result.Files = files
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (p *Processors) Override(format entity.Format, proc processor) error {
|
||||
p.processors[format] = proc
|
||||
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user