web ui: index and basic details page, api refactoring

This commit is contained in:
2023-04-04 21:51:45 +03:00
parent 2a8b94136f
commit f47dbefb67
21 changed files with 821 additions and 58 deletions

View File

@@ -24,13 +24,17 @@ variables:
* **LOGGING_DEBUG** — enable debug logs (default `false`) * **LOGGING_DEBUG** — enable debug logs (default `false`)
* **API** * **API**
* **API_ADDRESS** — address the API server will listen (default `0.0.0.0:5001`) * **API_ADDRESS** — address the API server will listen (default `0.0.0.0:5001`)
* **UI**
* **UI_ENABLED** — Enable builtin web UI (default `true`)
* **UI_PREFIX** — Prefix for the web UI (default `/`)
* **UI_THEME** — UI theme name (default `basic`). No other values available yet
* **PDF** * **PDF**
* **PDF_LANDSCAPE** — use landscape page orientation instead of portrait (default `false`) * **PDF_LANDSCAPE** — use landscape page orientation instead of portrait (default `false`)
* **PDF_GRAYSCALE** — use grayscale filter for the output pdf (default `false`) * **PDF_GRAYSCALE** — use grayscale filter for the output pdf (default `false`)
* **PDF_MEDIA_PRINT** — use media type `print` for the request (default `true`) * **PDF_MEDIA_PRINT** — use media type `print` for the request (default `true`)
* **PDF_ZOOM** — zoom page (default `1.0` i.e. no actual zoom) * **PDF_ZOOM** — zoom page (default `1.0` i.e. no actual zoom)
* **PDF_VIEWPORT** — use specified viewport value (default `1920x1080`) * **PDF_VIEWPORT** — use specified viewport value (default `1280x720`)
* **PDF_DPI** — use specified DPI value for the output pdf (default `300`) * **PDF_DPI** — use specified DPI value for the output pdf (default `150`)
* **PDF_FILENAME** — use specified name for output pdf file (default `page.pdf`) * **PDF_FILENAME** — use specified name for output pdf file (default `page.pdf`)
@@ -60,7 +64,7 @@ docker compose up -d webarchive
### 2. Add a page ### 2. Add a page
```shell ```shell
curl -X POST --location "http://localhost:5001/pages" \ curl -X POST --location "http://localhost:5001/api/v1/pages" \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d "{ -d "{
\"url\": \"https://github.com/wkhtmltopdf/wkhtmltopdf/issues/1937\", \"url\": \"https://github.com/wkhtmltopdf/wkhtmltopdf/issues/1937\",
@@ -75,13 +79,13 @@ or
```shell ```shell
curl -X POST --location \ curl -X POST --location \
"http://localhost:5001/pages?url=https%3A%2F%2Fgithub.com%2Fwkhtmltopdf%2Fwkhtmltopdf%2Fissues%2F1937&formats=pdf%2Cheaders&description=Foo+Bar" "http://localhost:5001/api/v1/pages?url=https%3A%2F%2Fgithub.com%2Fwkhtmltopdf%2Fwkhtmltopdf%2Fissues%2F1937&formats=pdf%2Cheaders&description=Foo+Bar"
``` ```
### 3. Get the page's info ### 3. Get the page's info
```shell ```shell
curl -X GET --location "http://localhost:5001/pages/$page_id" | jq . curl -X GET --location "http://localhost:5001/api/v1/pages/$page_id" | jq .
``` ```
where `$page_id` — value of the `id` field from previous command response. where `$page_id` — value of the `id` field from previous command response.
If `status` field in response is `success` (or `with_errors`) - the `results` field If `status` field in response is `success` (or `with_errors`) - the `results` field
@@ -90,7 +94,7 @@ will contain all processed formats with ids of the stored files.
### 4. Open file in browser ### 4. Open file in browser
```shell ```shell
xdg-open "http://localhost:5001/pages/$page_id/file/$file_id" xdg-open "http://localhost:5001/api/v1/pages/$page_id/file/$file_id"
``` ```
Where `$page_id` — value of the `id` field from previous command response, and Where `$page_id` — value of the `id` field from previous command response, and
`$file_id` — the id of interesting file. `$file_id` — the id of interesting file.
@@ -98,7 +102,7 @@ Where `$page_id` — value of the `id` field from previous command response, an
### 5. List all stored pages ### 5. List all stored pages
```shell ```shell
curl -X GET --location "http://localhost:5001/pages" | jq . curl -X GET --location "http://localhost:5001/api/v1/pages" | jq .
``` ```
## Roadmap ## Roadmap

View File

@@ -47,6 +47,7 @@ func (p *PDF) Process(_ context.Context, url string) ([]entity.File, error) {
page.FooterFontSize.Set(10) page.FooterFontSize.Set(10)
page.Zoom.Set(p.cfg.Zoom) page.Zoom.Set(p.cfg.Zoom)
page.ViewportSize.Set(p.cfg.Viewport) page.ViewportSize.Set(p.cfg.Viewport)
page.NoBackground.Set(true)
gen.AddPage(page) gen.AddPage(page)

View File

@@ -8,6 +8,8 @@ import (
"net/http/cookiejar" "net/http/cookiejar"
"time" "time"
"golang.org/x/net/html"
"github.com/derfenix/webarchive/config" "github.com/derfenix/webarchive/config"
"github.com/derfenix/webarchive/entity" "github.com/derfenix/webarchive/entity"
) )
@@ -52,6 +54,7 @@ func NewProcessors(cfg config.Config) (*Processors, error) {
} }
procs := Processors{ procs := Processors{
client: httpClient,
processors: map[entity.Format]processor{ processors: map[entity.Format]processor{
entity.FormatHeaders: NewHeaders(httpClient), entity.FormatHeaders: NewHeaders(httpClient),
entity.FormatPDF: NewPDF(cfg.PDF), entity.FormatPDF: NewPDF(cfg.PDF),
@@ -64,6 +67,7 @@ func NewProcessors(cfg config.Config) (*Processors, error) {
type Processors struct { type Processors struct {
processors map[entity.Format]processor processors map[entity.Format]processor
client *http.Client
} }
func (p *Processors) Process(ctx context.Context, format entity.Format, url string) entity.Result { func (p *Processors) Process(ctx context.Context, format entity.Format, url string) entity.Result {
@@ -93,3 +97,62 @@ func (p *Processors) OverrideProcessor(format entity.Format, proc processor) err
return nil return nil
} }
func (p *Processors) GetMeta(ctx context.Context, url string) (entity.Meta, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return entity.Meta{}, fmt.Errorf("new request: %w", err)
}
response, err := p.client.Do(req)
if err != nil {
return entity.Meta{}, fmt.Errorf("do request: %w", err)
}
if response.StatusCode != http.StatusOK {
return entity.Meta{}, fmt.Errorf("want status 200, got %d", response.StatusCode)
}
if response.Body == nil {
return entity.Meta{}, fmt.Errorf("empty response body")
}
defer func() {
_ = response.Body.Close()
}()
htmlNode, err := html.Parse(response.Body)
if err != nil {
return entity.Meta{}, fmt.Errorf("parse response body: %w", err)
}
meta := entity.Meta{}
getMetaData(htmlNode, &meta)
return meta, nil
}
func getMetaData(n *html.Node, meta *entity.Meta) {
if n == nil {
return
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type == html.ElementNode && c.Data == "title" {
meta.Title = c.FirstChild.Data
}
if c.Type == html.ElementNode && c.Data == "meta" {
attrs := make(map[string]string)
for _, attr := range c.Attr {
attrs[attr.Key] = attr.Val
}
name, ok := attrs["name"]
if ok && name == "description" {
meta.Description = attrs["content"]
}
}
getMetaData(c, meta)
}
}

View File

@@ -64,18 +64,18 @@ func (p *Page) GetFile(_ context.Context, pageID, fileID uuid.UUID) (*entity.Fil
return file, nil return file, nil
} }
func (p *Page) Save(_ context.Context, site *entity.Page) error { func (p *Page) Save(_ context.Context, page *entity.Page) error {
if p.db.IsClosed() { if p.db.IsClosed() {
return ErrDBClosed return ErrDBClosed
} }
marshaled, err := marshal(site) marshaled, err := marshal(page)
if err != nil { if err != nil {
return fmt.Errorf("marshal data: %w", err) return fmt.Errorf("marshal data: %w", err)
} }
if err := p.db.Update(func(txn *badger.Txn) error { if err := p.db.Update(func(txn *badger.Txn) error {
if err := txn.Set(p.key(site), marshaled); err != nil { if err := txn.Set(p.key(page), marshaled); err != nil {
return fmt.Errorf("put data: %w", err) return fmt.Errorf("put data: %w", err)
} }
@@ -151,6 +151,64 @@ func (p *Page) ListAll(ctx context.Context) ([]*entity.Page, error) {
Formats: page.Formats, Formats: page.Formats,
Version: page.Version, Version: page.Version,
Status: page.Status, Status: page.Status,
Meta: page.Meta,
})
}
return nil
})
if err != nil {
return nil, fmt.Errorf("view: %w", err)
}
sort.Slice(pages, func(i, j int) bool {
return pages[i].Created.After(pages[j].Created)
})
return pages, nil
}
func (p *Page) ListUnprocessed(ctx context.Context) ([]*entity.Page, error) {
pages := make([]*entity.Page, 0, 100)
err := p.db.View(func(txn *badger.Txn) error {
iterator := txn.NewIterator(badger.DefaultIteratorOptions)
defer iterator.Close()
for iterator.Seek(p.prefix); iterator.ValidForPrefix(p.prefix); iterator.Next() {
if err := ctx.Err(); err != nil {
return fmt.Errorf("context canceled: %w", err)
}
var page entity.Page
err := iterator.Item().Value(func(val []byte) error {
if err := unmarshal(val, &page); err != nil {
return fmt.Errorf("unmarshal: %w", err)
}
return nil
})
if err != nil {
return fmt.Errorf("get item: %w", err)
}
if page.Status != entity.StatusProcessing {
continue
}
pages = append(pages, &entity.Page{
ID: page.ID,
URL: page.URL,
Description: page.Description,
Created: page.Created,
Formats: page.Formats,
Version: page.Version,
Status: page.Status,
Meta: page.Meta,
}) })
} }

View File

@@ -4,7 +4,7 @@ info:
description: API description in Markdown. description: API description in Markdown.
version: 1.0.0 version: 1.0.0
servers: servers:
- url: 'https://api.example.com' - url: 'https://api.example.com/api/v1'
paths: paths:
/pages: /pages:
get: get:
@@ -183,12 +183,25 @@ components:
$ref: '#/components/schemas/format' $ref: '#/components/schemas/format'
status: status:
$ref: '#/components/schemas/status' $ref: '#/components/schemas/status'
meta:
type: object
properties:
title:
type: string
description:
type: string
error:
type: string
required:
- title
- description
required: required:
- id - id
- url - url
- formats - formats
- status - status
- created - created
- meta
result: result:
type: object type: object
properties: properties:

View File

@@ -534,14 +534,20 @@ func (s *Page) encodeFields(e *jx.Encoder) {
e.FieldStart("status") e.FieldStart("status")
s.Status.Encode(e) s.Status.Encode(e)
} }
{
e.FieldStart("meta")
s.Meta.Encode(e)
}
} }
var jsonFieldsNameOfPage = [5]string{ var jsonFieldsNameOfPage = [6]string{
0: "id", 0: "id",
1: "url", 1: "url",
2: "created", 2: "created",
3: "formats", 3: "formats",
4: "status", 4: "status",
5: "meta",
} }
// Decode decodes Page from json. // Decode decodes Page from json.
@@ -617,6 +623,16 @@ func (s *Page) Decode(d *jx.Decoder) error {
}(); err != nil { }(); err != nil {
return errors.Wrap(err, "decode field \"status\"") return errors.Wrap(err, "decode field \"status\"")
} }
case "meta":
requiredBitSet[0] |= 1 << 5
if err := func() error {
if err := s.Meta.Decode(d); err != nil {
return err
}
return nil
}(); err != nil {
return errors.Wrap(err, "decode field \"meta\"")
}
default: default:
return d.Skip() return d.Skip()
} }
@@ -627,7 +643,7 @@ func (s *Page) Decode(d *jx.Decoder) error {
// Validate required fields. // Validate required fields.
var failures []validate.FieldError var failures []validate.FieldError
for i, mask := range [1]uint8{ for i, mask := range [1]uint8{
0b00011111, 0b00111111,
} { } {
if result := (requiredBitSet[i] & mask) ^ mask; result != 0 { if result := (requiredBitSet[i] & mask) ^ mask; result != 0 {
// Mask only required fields and check equality to mask using XOR. // Mask only required fields and check equality to mask using XOR.
@@ -673,6 +689,138 @@ func (s *Page) UnmarshalJSON(data []byte) error {
return s.Decode(d) return s.Decode(d)
} }
// Encode implements json.Marshaler.
func (s *PageMeta) Encode(e *jx.Encoder) {
e.ObjStart()
s.encodeFields(e)
e.ObjEnd()
}
// encodeFields encodes fields.
func (s *PageMeta) encodeFields(e *jx.Encoder) {
{
e.FieldStart("title")
e.Str(s.Title)
}
{
e.FieldStart("description")
e.Str(s.Description)
}
{
if s.Error.Set {
e.FieldStart("error")
s.Error.Encode(e)
}
}
}
var jsonFieldsNameOfPageMeta = [3]string{
0: "title",
1: "description",
2: "error",
}
// Decode decodes PageMeta from json.
func (s *PageMeta) Decode(d *jx.Decoder) error {
if s == nil {
return errors.New("invalid: unable to decode PageMeta to nil")
}
var requiredBitSet [1]uint8
if err := d.ObjBytes(func(d *jx.Decoder, k []byte) error {
switch string(k) {
case "title":
requiredBitSet[0] |= 1 << 0
if err := func() error {
v, err := d.Str()
s.Title = string(v)
if err != nil {
return err
}
return nil
}(); err != nil {
return errors.Wrap(err, "decode field \"title\"")
}
case "description":
requiredBitSet[0] |= 1 << 1
if err := func() error {
v, err := d.Str()
s.Description = string(v)
if err != nil {
return err
}
return nil
}(); err != nil {
return errors.Wrap(err, "decode field \"description\"")
}
case "error":
if err := func() error {
s.Error.Reset()
if err := s.Error.Decode(d); err != nil {
return err
}
return nil
}(); err != nil {
return errors.Wrap(err, "decode field \"error\"")
}
default:
return d.Skip()
}
return nil
}); err != nil {
return errors.Wrap(err, "decode PageMeta")
}
// Validate required fields.
var failures []validate.FieldError
for i, mask := range [1]uint8{
0b00000011,
} {
if result := (requiredBitSet[i] & mask) ^ mask; result != 0 {
// Mask only required fields and check equality to mask using XOR.
//
// If XOR result is not zero, result is not equal to expected, so some fields are missed.
// Bits of fields which would be set are actually bits of missed fields.
missed := bits.OnesCount8(result)
for bitN := 0; bitN < missed; bitN++ {
bitIdx := bits.TrailingZeros8(result)
fieldIdx := i*8 + bitIdx
var name string
if fieldIdx < len(jsonFieldsNameOfPageMeta) {
name = jsonFieldsNameOfPageMeta[fieldIdx]
} else {
name = strconv.Itoa(fieldIdx)
}
failures = append(failures, validate.FieldError{
Name: name,
Error: validate.ErrFieldRequired,
})
// Reset bit.
result &^= 1 << bitIdx
}
}
}
if len(failures) > 0 {
return &validate.Error{Fields: failures}
}
return nil
}
// MarshalJSON implements stdjson.Marshaler.
func (s *PageMeta) MarshalJSON() ([]byte, error) {
e := jx.Encoder{}
s.Encode(&e)
return e.Bytes(), nil
}
// UnmarshalJSON implements stdjson.Unmarshaler.
func (s *PageMeta) UnmarshalJSON(data []byte) error {
d := jx.DecodeBytes(data)
return s.Decode(d)
}
// Encode implements json.Marshaler. // Encode implements json.Marshaler.
func (s *PageWithResults) Encode(e *jx.Encoder) { func (s *PageWithResults) Encode(e *jx.Encoder) {
e.ObjStart() e.ObjStart()
@@ -711,6 +859,11 @@ func (s *PageWithResults) encodeFields(e *jx.Encoder) {
e.FieldStart("status") e.FieldStart("status")
s.Status.Encode(e) s.Status.Encode(e)
} }
{
e.FieldStart("meta")
s.Meta.Encode(e)
}
{ {
e.FieldStart("results") e.FieldStart("results")
@@ -722,13 +875,14 @@ func (s *PageWithResults) encodeFields(e *jx.Encoder) {
} }
} }
var jsonFieldsNameOfPageWithResults = [6]string{ var jsonFieldsNameOfPageWithResults = [7]string{
0: "id", 0: "id",
1: "url", 1: "url",
2: "created", 2: "created",
3: "formats", 3: "formats",
4: "status", 4: "status",
5: "results", 5: "meta",
6: "results",
} }
// Decode decodes PageWithResults from json. // Decode decodes PageWithResults from json.
@@ -804,8 +958,18 @@ func (s *PageWithResults) Decode(d *jx.Decoder) error {
}(); err != nil { }(); err != nil {
return errors.Wrap(err, "decode field \"status\"") return errors.Wrap(err, "decode field \"status\"")
} }
case "results": case "meta":
requiredBitSet[0] |= 1 << 5 requiredBitSet[0] |= 1 << 5
if err := func() error {
if err := s.Meta.Decode(d); err != nil {
return err
}
return nil
}(); err != nil {
return errors.Wrap(err, "decode field \"meta\"")
}
case "results":
requiredBitSet[0] |= 1 << 6
if err := func() error { if err := func() error {
s.Results = make([]Result, 0) s.Results = make([]Result, 0)
if err := d.Arr(func(d *jx.Decoder) error { if err := d.Arr(func(d *jx.Decoder) error {
@@ -832,7 +996,7 @@ func (s *PageWithResults) Decode(d *jx.Decoder) error {
// Validate required fields. // Validate required fields.
var failures []validate.FieldError var failures []validate.FieldError
for i, mask := range [1]uint8{ for i, mask := range [1]uint8{
0b00111111, 0b01111111,
} { } {
if result := (requiredBitSet[i] & mask) ^ mask; result != 0 { if result := (requiredBitSet[i] & mask) ^ mask; result != 0 {
// Mask only required fields and check equality to mask using XOR. // Mask only required fields and check equality to mask using XOR.
@@ -878,6 +1042,138 @@ func (s *PageWithResults) UnmarshalJSON(data []byte) error {
return s.Decode(d) return s.Decode(d)
} }
// Encode implements json.Marshaler.
func (s *PageWithResultsMeta) Encode(e *jx.Encoder) {
e.ObjStart()
s.encodeFields(e)
e.ObjEnd()
}
// encodeFields encodes fields.
func (s *PageWithResultsMeta) encodeFields(e *jx.Encoder) {
{
e.FieldStart("title")
e.Str(s.Title)
}
{
e.FieldStart("description")
e.Str(s.Description)
}
{
if s.Error.Set {
e.FieldStart("error")
s.Error.Encode(e)
}
}
}
var jsonFieldsNameOfPageWithResultsMeta = [3]string{
0: "title",
1: "description",
2: "error",
}
// Decode decodes PageWithResultsMeta from json.
func (s *PageWithResultsMeta) Decode(d *jx.Decoder) error {
if s == nil {
return errors.New("invalid: unable to decode PageWithResultsMeta to nil")
}
var requiredBitSet [1]uint8
if err := d.ObjBytes(func(d *jx.Decoder, k []byte) error {
switch string(k) {
case "title":
requiredBitSet[0] |= 1 << 0
if err := func() error {
v, err := d.Str()
s.Title = string(v)
if err != nil {
return err
}
return nil
}(); err != nil {
return errors.Wrap(err, "decode field \"title\"")
}
case "description":
requiredBitSet[0] |= 1 << 1
if err := func() error {
v, err := d.Str()
s.Description = string(v)
if err != nil {
return err
}
return nil
}(); err != nil {
return errors.Wrap(err, "decode field \"description\"")
}
case "error":
if err := func() error {
s.Error.Reset()
if err := s.Error.Decode(d); err != nil {
return err
}
return nil
}(); err != nil {
return errors.Wrap(err, "decode field \"error\"")
}
default:
return d.Skip()
}
return nil
}); err != nil {
return errors.Wrap(err, "decode PageWithResultsMeta")
}
// Validate required fields.
var failures []validate.FieldError
for i, mask := range [1]uint8{
0b00000011,
} {
if result := (requiredBitSet[i] & mask) ^ mask; result != 0 {
// Mask only required fields and check equality to mask using XOR.
//
// If XOR result is not zero, result is not equal to expected, so some fields are missed.
// Bits of fields which would be set are actually bits of missed fields.
missed := bits.OnesCount8(result)
for bitN := 0; bitN < missed; bitN++ {
bitIdx := bits.TrailingZeros8(result)
fieldIdx := i*8 + bitIdx
var name string
if fieldIdx < len(jsonFieldsNameOfPageWithResultsMeta) {
name = jsonFieldsNameOfPageWithResultsMeta[fieldIdx]
} else {
name = strconv.Itoa(fieldIdx)
}
failures = append(failures, validate.FieldError{
Name: name,
Error: validate.ErrFieldRequired,
})
// Reset bit.
result &^= 1 << bitIdx
}
}
}
if len(failures) > 0 {
return &validate.Error{Fields: failures}
}
return nil
}
// MarshalJSON implements stdjson.Marshaler.
func (s *PageWithResultsMeta) MarshalJSON() ([]byte, error) {
e := jx.Encoder{}
s.Encode(&e)
return e.Bytes(), nil
}
// UnmarshalJSON implements stdjson.Unmarshaler.
func (s *PageWithResultsMeta) UnmarshalJSON(data []byte) error {
d := jx.DecodeBytes(data)
return s.Decode(d)
}
// Encode encodes Pages as json. // Encode encodes Pages as json.
func (s Pages) Encode(e *jx.Encoder) { func (s Pages) Encode(e *jx.Encoder) {
unwrapped := []Page(s) unwrapped := []Page(s)

View File

@@ -324,6 +324,7 @@ type Page struct {
Created time.Time `json:"created"` Created time.Time `json:"created"`
Formats []Format `json:"formats"` Formats []Format `json:"formats"`
Status Status `json:"status"` Status Status `json:"status"`
Meta PageMeta `json:"meta"`
} }
// GetID returns the value of ID. // GetID returns the value of ID.
@@ -351,6 +352,11 @@ func (s *Page) GetStatus() Status {
return s.Status return s.Status
} }
// GetMeta returns the value of Meta.
func (s *Page) GetMeta() PageMeta {
return s.Meta
}
// SetID sets the value of ID. // SetID sets the value of ID.
func (s *Page) SetID(val uuid.UUID) { func (s *Page) SetID(val uuid.UUID) {
s.ID = val s.ID = val
@@ -376,8 +382,49 @@ func (s *Page) SetStatus(val Status) {
s.Status = val s.Status = val
} }
// SetMeta sets the value of Meta.
func (s *Page) SetMeta(val PageMeta) {
s.Meta = val
}
func (*Page) addPageRes() {} func (*Page) addPageRes() {}
type PageMeta struct {
Title string `json:"title"`
Description string `json:"description"`
Error OptString `json:"error"`
}
// GetTitle returns the value of Title.
func (s *PageMeta) GetTitle() string {
return s.Title
}
// GetDescription returns the value of Description.
func (s *PageMeta) GetDescription() string {
return s.Description
}
// GetError returns the value of Error.
func (s *PageMeta) GetError() OptString {
return s.Error
}
// SetTitle sets the value of Title.
func (s *PageMeta) SetTitle(val string) {
s.Title = val
}
// SetDescription sets the value of Description.
func (s *PageMeta) SetDescription(val string) {
s.Description = val
}
// SetError sets the value of Error.
func (s *PageMeta) SetError(val OptString) {
s.Error = val
}
// Merged schema. // Merged schema.
// Ref: #/components/schemas/pageWithResults // Ref: #/components/schemas/pageWithResults
type PageWithResults struct { type PageWithResults struct {
@@ -386,6 +433,7 @@ type PageWithResults struct {
Created time.Time `json:"created"` Created time.Time `json:"created"`
Formats []Format `json:"formats"` Formats []Format `json:"formats"`
Status Status `json:"status"` Status Status `json:"status"`
Meta PageWithResultsMeta `json:"meta"`
Results []Result `json:"results"` Results []Result `json:"results"`
} }
@@ -414,6 +462,11 @@ func (s *PageWithResults) GetStatus() Status {
return s.Status return s.Status
} }
// GetMeta returns the value of Meta.
func (s *PageWithResults) GetMeta() PageWithResultsMeta {
return s.Meta
}
// GetResults returns the value of Results. // GetResults returns the value of Results.
func (s *PageWithResults) GetResults() []Result { func (s *PageWithResults) GetResults() []Result {
return s.Results return s.Results
@@ -444,6 +497,11 @@ func (s *PageWithResults) SetStatus(val Status) {
s.Status = val s.Status = val
} }
// SetMeta sets the value of Meta.
func (s *PageWithResults) SetMeta(val PageWithResultsMeta) {
s.Meta = val
}
// SetResults sets the value of Results. // SetResults sets the value of Results.
func (s *PageWithResults) SetResults(val []Result) { func (s *PageWithResults) SetResults(val []Result) {
s.Results = val s.Results = val
@@ -451,6 +509,42 @@ func (s *PageWithResults) SetResults(val []Result) {
func (*PageWithResults) getPageRes() {} func (*PageWithResults) getPageRes() {}
type PageWithResultsMeta struct {
Title string `json:"title"`
Description string `json:"description"`
Error OptString `json:"error"`
}
// GetTitle returns the value of Title.
func (s *PageWithResultsMeta) GetTitle() string {
return s.Title
}
// GetDescription returns the value of Description.
func (s *PageWithResultsMeta) GetDescription() string {
return s.Description
}
// GetError returns the value of Error.
func (s *PageWithResultsMeta) GetError() OptString {
return s.Error
}
// SetTitle sets the value of Title.
func (s *PageWithResultsMeta) SetTitle(val string) {
s.Title = val
}
// SetDescription sets the value of Description.
func (s *PageWithResultsMeta) SetDescription(val string) {
s.Description = val
}
// SetError sets the value of Error.
func (s *PageWithResultsMeta) SetError(val OptString) {
s.Error = val
}
type Pages []Page type Pages []Page
// Ref: #/components/schemas/result // Ref: #/components/schemas/result

View File

@@ -6,6 +6,7 @@ import (
"fmt" "fmt"
"net" "net"
"net/http" "net/http"
"strings"
"sync" "sync"
"time" "time"
@@ -48,7 +49,8 @@ func NewApplication(cfg config.Config) (Application, error) {
worker := entity.NewWorker(workerCh, pageRepo, processor, log.Named("worker")) worker := entity.NewWorker(workerCh, pageRepo, processor, log.Named("worker"))
server, err := openapi.NewServer( server, err := openapi.NewServer(
rest.NewService(pageRepo, workerCh), rest.NewService(pageRepo, workerCh, processor),
openapi.WithPathPrefix("/api/v1"),
openapi.WithMiddleware( openapi.WithMiddleware(
func(r middleware.Request, next middleware.Next) (middleware.Response, error) { func(r middleware.Request, next middleware.Next) (middleware.Response, error) {
start := time.Now() start := time.Now()
@@ -79,13 +81,13 @@ func NewApplication(cfg config.Config) (Application, error) {
ui := rest.NewUI(cfg.UI) ui := rest.NewUI(cfg.UI)
httpHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { httpHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if ui.IsUIRequest(r) { if strings.HasPrefix(r.URL.Path, "/api/") {
ui.ServeHTTP(w, r) server.ServeHTTP(w, r)
return return
} }
server.ServeHTTP(w, r) ui.ServeHTTP(w, r)
}) })
} }

View File

@@ -37,19 +37,19 @@ type PDF struct {
Grayscale bool `env:"GRAYSCALE,default=false"` Grayscale bool `env:"GRAYSCALE,default=false"`
MediaPrint bool `env:"MEDIA_PRINT,default=true"` MediaPrint bool `env:"MEDIA_PRINT,default=true"`
Zoom float64 `env:"ZOOM,default=1"` Zoom float64 `env:"ZOOM,default=1"`
Viewport string `env:"VIEWPORT,default=1920x1080"` Viewport string `env:"VIEWPORT,default=1280x720"`
DPI uint `env:"DPI,default=300"` DPI uint `env:"DPI,default=150"`
Filename string `env:"FILENAME,default=page.pdf"` Filename string `env:"FILENAME,default=page.pdf"`
} }
type API struct { type API struct {
Prefix string `env:"PREFIX,default=/"`
Address string `env:"ADDRESS,default=0.0.0.0:5001"` Address string `env:"ADDRESS,default=0.0.0.0:5001"`
} }
type UI struct { type UI struct {
Enabled bool `env:"ENABLED,default=true"` Enabled bool `env:"ENABLED,default=true"`
Prefix string `env:"PREFIX,default=/"` Prefix string `env:"PREFIX,default=/"`
Theme string `env:"THEME,default=basic"`
} }
type DB struct { type DB struct {

View File

@@ -11,6 +11,7 @@ import (
type Processor interface { type Processor interface {
Process(ctx context.Context, format Format, url string) Result Process(ctx context.Context, format Format, url string) Result
GetMeta(ctx context.Context, url string) (Meta, error)
} }
type Format uint8 type Format uint8
@@ -37,6 +38,12 @@ const (
StatusWithErrors StatusWithErrors
) )
type Meta struct {
Title string
Description string
Error string
}
func NewPage(url string, description string, formats ...Format) *Page { func NewPage(url string, description string, formats ...Format) *Page {
return &Page{ return &Page{
ID: uuid.New(), ID: uuid.New(),
@@ -57,6 +64,7 @@ type Page struct {
Results Results Results Results
Version uint16 Version uint16
Status Status Status Status
Meta Meta
} }
func (p *Page) SetProcessing() { func (p *Page) SetProcessing() {

View File

@@ -9,6 +9,7 @@ import (
type Pages interface { type Pages interface {
Save(ctx context.Context, page *Page) error Save(ctx context.Context, page *Page) error
ListUnprocessed(ctx context.Context) ([]*Page, error)
} }
func NewWorker(ch chan *Page, pages Pages, processor Processor, log *zap.Logger) *Worker { func NewWorker(ch chan *Page, pages Pages, processor Processor, log *zap.Logger) *Worker {
@@ -27,6 +28,20 @@ func (w *Worker) Start(ctx context.Context, wg *sync.WaitGroup) {
w.log.Info("starting") w.log.Info("starting")
wg.Add(1)
go func() {
defer wg.Done()
unprocessed, err := w.pages.ListUnprocessed(ctx)
if err != nil {
w.log.Error("failed to get unprocessed pages", zap.Error(err))
} else {
for i := range unprocessed {
w.ch <- unprocessed[i]
}
}
}()
for { for {
select { select {
case <-ctx.Done(): case <-ctx.Done():

View File

@@ -2,6 +2,7 @@ package rest
import ( import (
"fmt" "fmt"
"html"
"github.com/derfenix/webarchive/api/openapi" "github.com/derfenix/webarchive/api/openapi"
"github.com/derfenix/webarchive/entity" "github.com/derfenix/webarchive/entity"
@@ -22,6 +23,11 @@ func PageToRestWithResults(page *entity.Page) openapi.PageWithResults {
return res return res
}(), }(),
Status: StatusToRest(page.Status), Status: StatusToRest(page.Status),
Meta: openapi.PageWithResultsMeta{
Title: html.EscapeString(page.Meta.Title),
Description: html.EscapeString(page.Meta.Description),
Error: openapi.NewOptString(page.Meta.Error),
},
Results: func() []openapi.Result { Results: func() []openapi.Result {
results := make([]openapi.Result, len(page.Results.Results())) results := make([]openapi.Result, len(page.Results.Results()))
@@ -65,6 +71,11 @@ func PageToRest(page *entity.Page) openapi.Page {
ID: page.ID, ID: page.ID,
URL: page.URL, URL: page.URL,
Created: page.Created, Created: page.Created,
Meta: openapi.PageMeta{
Title: html.EscapeString(page.Meta.Title),
Description: html.EscapeString(page.Meta.Description),
Error: openapi.NewOptString(page.Meta.Error),
},
Formats: func() []openapi.Format { Formats: func() []openapi.Format {
res := make([]openapi.Format, len(page.Formats)) res := make([]openapi.Format, len(page.Formats))

View File

@@ -20,14 +20,19 @@ type Pages interface {
GetFile(ctx context.Context, pageID, fileID uuid.UUID) (*entity.File, error) GetFile(ctx context.Context, pageID, fileID uuid.UUID) (*entity.File, error)
} }
func NewService(sites Pages, ch chan *entity.Page) *Service { func NewService(pages Pages, ch chan *entity.Page, processor entity.Processor) *Service {
return &Service{pages: sites, ch: ch} return &Service{
pages: pages,
ch: ch,
processor: processor,
}
} }
type Service struct { type Service struct {
openapi.UnimplementedHandler openapi.UnimplementedHandler
pages Pages pages Pages
ch chan *entity.Page ch chan *entity.Page
processor entity.Processor
} }
func (s *Service) GetPage(ctx context.Context, params openapi.GetPageParams) (openapi.GetPageRes, error) { func (s *Service) GetPage(ctx context.Context, params openapi.GetPageParams) (openapi.GetPageRes, error) {
@@ -78,6 +83,13 @@ func (s *Service) AddPage(ctx context.Context, req openapi.OptAddPageReq, params
page := entity.NewPage(url, description, domainFormats...) page := entity.NewPage(url, description, domainFormats...)
page.Status = entity.StatusProcessing page.Status = entity.StatusProcessing
meta, err := s.processor.GetMeta(ctx, page.URL)
if err != nil {
page.Meta.Error = err.Error()
} else {
page.Meta = meta
}
if err := s.pages.Save(ctx, page); err != nil { if err := s.pages.Save(ctx, page); err != nil {
return nil, fmt.Errorf("save page: %w", err) return nil, fmt.Errorf("save page: %w", err)
} }

View File

@@ -10,15 +10,19 @@ import (
) )
func NewUI(cfg config.UI) *UI { func NewUI(cfg config.UI) *UI {
return &UI{prefix: cfg.Prefix} return &UI{
prefix: cfg.Prefix,
theme: cfg.Theme,
}
} }
type UI struct { type UI struct {
prefix string prefix string
theme string
} }
func (u *UI) ServeHTTP(w http.ResponseWriter, r *http.Request) { func (u *UI) ServeHTTP(w http.ResponseWriter, r *http.Request) {
serveRoot, err := fs.Sub(ui.StaticFiles, "static") serveRoot, err := fs.Sub(ui.StaticFiles, u.theme)
if err != nil { if err != nil {
w.WriteHeader(http.StatusInternalServerError) w.WriteHeader(http.StatusInternalServerError)
return return
@@ -27,12 +31,11 @@ func (u *UI) ServeHTTP(w http.ResponseWriter, r *http.Request) {
if strings.HasPrefix(r.URL.Path, u.prefix) { if strings.HasPrefix(r.URL.Path, u.prefix) {
r.URL.Path = "/" + strings.TrimPrefix(r.URL.Path, u.prefix) r.URL.Path = "/" + strings.TrimPrefix(r.URL.Path, u.prefix)
} }
if !strings.HasPrefix(r.URL.Path, "/static") {
r.URL.Path = "/"
}
r.URL.Path = strings.TrimPrefix(r.URL.Path, "/static") r.URL.Path = strings.TrimPrefix(r.URL.Path, "/static")
http.FileServer(http.FS(serveRoot)).ServeHTTP(w, r) http.FileServer(http.FS(serveRoot)).ServeHTTP(w, r)
} }
func (u *UI) IsUIRequest(r *http.Request) bool {
return r.URL.Path == u.prefix || strings.HasPrefix(r.URL.Path, "/static/")
}

47
ui/basic/index.html Normal file
View File

@@ -0,0 +1,47 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>WebArchive</title>
<link rel="stylesheet" href="/static/style.css">
<script src="/static/lib.js"></script>
<script src="/static/main.js"></script>
</head>
<body>
<template id="pages_tmpl">
<div class="page_item">
<a class="url link"><span class="title"></span><span class="status"></span></a>
<div class="description"></div>
<div class="created"></div>
<hr>
</div>
</template>
<template id="page_tmpl">
<a onclick="history.back()" class="link">Back</a>
<div class="page">
<h2 id="page_title"></h2>
<h3 id="page_description"></h3>
<h5 id="page_url" class="link" onclick="window.open(this.innerHTML, '_blank')"></h5>
<h4>Results</h4>
<div id="results"></div>
</div>
</template>
<template id="result_tmpl">
<div class="result_item">
<span class="format"></span>
<span class="result_link link"></span>
</div>
</template>
<h1 id="site_title"></h1>
<div id="data">
None
</div>
</body>
</html>

2
ui/basic/lib.js Normal file

File diff suppressed because one or more lines are too long

90
ui/basic/main.js Normal file
View File

@@ -0,0 +1,90 @@
function index() {
$.ajax({
url: "/api/v1/pages", success: function (data, status, xhr) {
if (status !== "success") {
gotError(status);
return;
}
let elem = document.getElementById("data");
elem.innerHTML = "";
// elem.attachShadow({mode: 'open'});
data.forEach(function (v) {
let page_elem = pages_tmpl.content.cloneNode(true);
$(page_elem).find(".url").attr("onclick", "goToPage('" + v.id + "');");
$(page_elem).find(".status").addClass(v.status);
$(page_elem).find(".status").attr("title", v.status);
$(page_elem).find(".created").html(v.created);
$(page_elem).find(".title").html(v.meta.title);
$(page_elem).find(".description").html(v.meta.description);
elem.append(page_elem); // (*)
})
}
})
}
function goToPage(id) {
history.pushState({"page": id}, null, id);
page(id);
}
function page(id) {
$.ajax({
url: "/api/v1/pages/" + id, success: function (data, status, xhr) {
if (status !== "success") {
gotError(status);
return;
}
let elem = document.getElementById("data");
elem.innerHTML = "";
let page_elem = page_tmpl.content.cloneNode(true);
$(page_elem).find("#page_title").html(data.meta.title);
$(page_elem).find("#page_description").html(data.meta.description);
$(page_elem).find("#page_url").html(data.url);
data.results.forEach(function (result) {
let result_elem = result_tmpl.content.cloneNode(true);
$(result_elem).find(".format").html(result.format);
if (result.error !== "" && result.error !== undefined) {
$(result_elem).find(".format").addClass("error");
$(result_elem).find(".result_link").html("⚠");
$(result_elem).find(".result_link").attr("title", result.error);
} else {
result.files.forEach(function (file) {
$(result_elem).find(".result_link").attr("onclick", "window.open('/api/v1/pages/" + data.id + "/file/" + file.id + "', '_blank');");
$(result_elem).find(".result_link").html(file.name);
})
}
$(page_elem).find("#results").append(result_elem);
})
elem.append(page_elem); // (*)
}
})
}
function gotError(err) {
console.log(err);
}
document.addEventListener("DOMContentLoaded", function () {
$("#site_title").html("WebArchive " + window.location.hostname);
document.title = "WebArchive " + window.location.hostname;
if (window.location.pathname.endsWith("/")) {
index();
} else {
page(window.location.pathname.slice(1));
}
});
window.addEventListener('popstate', function (event) {
if (event.state === null) {
index();
} else {
page(event.state.page);
}
});

61
ui/basic/style.css Normal file

File diff suppressed because one or more lines are too long

View File

@@ -4,5 +4,5 @@ import (
"embed" "embed"
) )
//go:embed static/* //go:embed */*.html */*.css */*.js
var StaticFiles embed.FS var StaticFiles embed.FS

View File

@@ -1,14 +0,0 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Document</title>
<link rel="stylesheet" href="/static/style.css">
</head>
<body>
<h1>Hello World!</h1>
</body>
</html>

View File

@@ -1,3 +0,0 @@
h1 {
background-color: azure;
}