web ui: index and basic details page, api refactoring

This commit is contained in:
2023-04-04 21:51:45 +03:00
parent 2a8b94136f
commit f47dbefb67
21 changed files with 821 additions and 58 deletions

View File

@@ -24,13 +24,17 @@ variables:
* **LOGGING_DEBUG** — enable debug logs (default `false`)
* **API**
* **API_ADDRESS** — address the API server will listen (default `0.0.0.0:5001`)
* **UI**
* **UI_ENABLED** — Enable builtin web UI (default `true`)
* **UI_PREFIX** — Prefix for the web UI (default `/`)
* **UI_THEME** — UI theme name (default `basic`). No other values available yet
* **PDF**
* **PDF_LANDSCAPE** — use landscape page orientation instead of portrait (default `false`)
* **PDF_GRAYSCALE** — use grayscale filter for the output pdf (default `false`)
* **PDF_MEDIA_PRINT** — use media type `print` for the request (default `true`)
* **PDF_ZOOM** — zoom page (default `1.0` i.e. no actual zoom)
* **PDF_VIEWPORT** — use specified viewport value (default `1920x1080`)
* **PDF_DPI** — use specified DPI value for the output pdf (default `300`)
* **PDF_VIEWPORT** — use specified viewport value (default `1280x720`)
* **PDF_DPI** — use specified DPI value for the output pdf (default `150`)
* **PDF_FILENAME** — use specified name for output pdf file (default `page.pdf`)
@@ -60,7 +64,7 @@ docker compose up -d webarchive
### 2. Add a page
```shell
curl -X POST --location "http://localhost:5001/pages" \
curl -X POST --location "http://localhost:5001/api/v1/pages" \
-H "Content-Type: application/json" \
-d "{
\"url\": \"https://github.com/wkhtmltopdf/wkhtmltopdf/issues/1937\",
@@ -75,13 +79,13 @@ or
```shell
curl -X POST --location \
"http://localhost:5001/pages?url=https%3A%2F%2Fgithub.com%2Fwkhtmltopdf%2Fwkhtmltopdf%2Fissues%2F1937&formats=pdf%2Cheaders&description=Foo+Bar"
"http://localhost:5001/api/v1/pages?url=https%3A%2F%2Fgithub.com%2Fwkhtmltopdf%2Fwkhtmltopdf%2Fissues%2F1937&formats=pdf%2Cheaders&description=Foo+Bar"
```
### 3. Get the page's info
```shell
curl -X GET --location "http://localhost:5001/pages/$page_id" | jq .
curl -X GET --location "http://localhost:5001/api/v1/pages/$page_id" | jq .
```
where `$page_id` — value of the `id` field from previous command response.
If `status` field in response is `success` (or `with_errors`) - the `results` field
@@ -90,7 +94,7 @@ will contain all processed formats with ids of the stored files.
### 4. Open file in browser
```shell
xdg-open "http://localhost:5001/pages/$page_id/file/$file_id"
xdg-open "http://localhost:5001/api/v1/pages/$page_id/file/$file_id"
```
Where `$page_id` — value of the `id` field from previous command response, and
`$file_id` — the id of interesting file.
@@ -98,7 +102,7 @@ Where `$page_id` — value of the `id` field from previous command response, an
### 5. List all stored pages
```shell
curl -X GET --location "http://localhost:5001/pages" | jq .
curl -X GET --location "http://localhost:5001/api/v1/pages" | jq .
```
## Roadmap

View File

@@ -47,6 +47,7 @@ func (p *PDF) Process(_ context.Context, url string) ([]entity.File, error) {
page.FooterFontSize.Set(10)
page.Zoom.Set(p.cfg.Zoom)
page.ViewportSize.Set(p.cfg.Viewport)
page.NoBackground.Set(true)
gen.AddPage(page)

View File

@@ -8,6 +8,8 @@ import (
"net/http/cookiejar"
"time"
"golang.org/x/net/html"
"github.com/derfenix/webarchive/config"
"github.com/derfenix/webarchive/entity"
)
@@ -52,6 +54,7 @@ func NewProcessors(cfg config.Config) (*Processors, error) {
}
procs := Processors{
client: httpClient,
processors: map[entity.Format]processor{
entity.FormatHeaders: NewHeaders(httpClient),
entity.FormatPDF: NewPDF(cfg.PDF),
@@ -64,6 +67,7 @@ func NewProcessors(cfg config.Config) (*Processors, error) {
type Processors struct {
processors map[entity.Format]processor
client *http.Client
}
func (p *Processors) Process(ctx context.Context, format entity.Format, url string) entity.Result {
@@ -93,3 +97,62 @@ func (p *Processors) OverrideProcessor(format entity.Format, proc processor) err
return nil
}
func (p *Processors) GetMeta(ctx context.Context, url string) (entity.Meta, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return entity.Meta{}, fmt.Errorf("new request: %w", err)
}
response, err := p.client.Do(req)
if err != nil {
return entity.Meta{}, fmt.Errorf("do request: %w", err)
}
if response.StatusCode != http.StatusOK {
return entity.Meta{}, fmt.Errorf("want status 200, got %d", response.StatusCode)
}
if response.Body == nil {
return entity.Meta{}, fmt.Errorf("empty response body")
}
defer func() {
_ = response.Body.Close()
}()
htmlNode, err := html.Parse(response.Body)
if err != nil {
return entity.Meta{}, fmt.Errorf("parse response body: %w", err)
}
meta := entity.Meta{}
getMetaData(htmlNode, &meta)
return meta, nil
}
func getMetaData(n *html.Node, meta *entity.Meta) {
if n == nil {
return
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type == html.ElementNode && c.Data == "title" {
meta.Title = c.FirstChild.Data
}
if c.Type == html.ElementNode && c.Data == "meta" {
attrs := make(map[string]string)
for _, attr := range c.Attr {
attrs[attr.Key] = attr.Val
}
name, ok := attrs["name"]
if ok && name == "description" {
meta.Description = attrs["content"]
}
}
getMetaData(c, meta)
}
}

View File

@@ -64,18 +64,18 @@ func (p *Page) GetFile(_ context.Context, pageID, fileID uuid.UUID) (*entity.Fil
return file, nil
}
func (p *Page) Save(_ context.Context, site *entity.Page) error {
func (p *Page) Save(_ context.Context, page *entity.Page) error {
if p.db.IsClosed() {
return ErrDBClosed
}
marshaled, err := marshal(site)
marshaled, err := marshal(page)
if err != nil {
return fmt.Errorf("marshal data: %w", err)
}
if err := p.db.Update(func(txn *badger.Txn) error {
if err := txn.Set(p.key(site), marshaled); err != nil {
if err := txn.Set(p.key(page), marshaled); err != nil {
return fmt.Errorf("put data: %w", err)
}
@@ -151,6 +151,64 @@ func (p *Page) ListAll(ctx context.Context) ([]*entity.Page, error) {
Formats: page.Formats,
Version: page.Version,
Status: page.Status,
Meta: page.Meta,
})
}
return nil
})
if err != nil {
return nil, fmt.Errorf("view: %w", err)
}
sort.Slice(pages, func(i, j int) bool {
return pages[i].Created.After(pages[j].Created)
})
return pages, nil
}
func (p *Page) ListUnprocessed(ctx context.Context) ([]*entity.Page, error) {
pages := make([]*entity.Page, 0, 100)
err := p.db.View(func(txn *badger.Txn) error {
iterator := txn.NewIterator(badger.DefaultIteratorOptions)
defer iterator.Close()
for iterator.Seek(p.prefix); iterator.ValidForPrefix(p.prefix); iterator.Next() {
if err := ctx.Err(); err != nil {
return fmt.Errorf("context canceled: %w", err)
}
var page entity.Page
err := iterator.Item().Value(func(val []byte) error {
if err := unmarshal(val, &page); err != nil {
return fmt.Errorf("unmarshal: %w", err)
}
return nil
})
if err != nil {
return fmt.Errorf("get item: %w", err)
}
if page.Status != entity.StatusProcessing {
continue
}
pages = append(pages, &entity.Page{
ID: page.ID,
URL: page.URL,
Description: page.Description,
Created: page.Created,
Formats: page.Formats,
Version: page.Version,
Status: page.Status,
Meta: page.Meta,
})
}

View File

@@ -4,7 +4,7 @@ info:
description: API description in Markdown.
version: 1.0.0
servers:
- url: 'https://api.example.com'
- url: 'https://api.example.com/api/v1'
paths:
/pages:
get:
@@ -183,12 +183,25 @@ components:
$ref: '#/components/schemas/format'
status:
$ref: '#/components/schemas/status'
meta:
type: object
properties:
title:
type: string
description:
type: string
error:
type: string
required:
- title
- description
required:
- id
- url
- formats
- status
- created
- meta
result:
type: object
properties:

View File

@@ -534,14 +534,20 @@ func (s *Page) encodeFields(e *jx.Encoder) {
e.FieldStart("status")
s.Status.Encode(e)
}
{
e.FieldStart("meta")
s.Meta.Encode(e)
}
}
var jsonFieldsNameOfPage = [5]string{
var jsonFieldsNameOfPage = [6]string{
0: "id",
1: "url",
2: "created",
3: "formats",
4: "status",
5: "meta",
}
// Decode decodes Page from json.
@@ -617,6 +623,16 @@ func (s *Page) Decode(d *jx.Decoder) error {
}(); err != nil {
return errors.Wrap(err, "decode field \"status\"")
}
case "meta":
requiredBitSet[0] |= 1 << 5
if err := func() error {
if err := s.Meta.Decode(d); err != nil {
return err
}
return nil
}(); err != nil {
return errors.Wrap(err, "decode field \"meta\"")
}
default:
return d.Skip()
}
@@ -627,7 +643,7 @@ func (s *Page) Decode(d *jx.Decoder) error {
// Validate required fields.
var failures []validate.FieldError
for i, mask := range [1]uint8{
0b00011111,
0b00111111,
} {
if result := (requiredBitSet[i] & mask) ^ mask; result != 0 {
// Mask only required fields and check equality to mask using XOR.
@@ -673,6 +689,138 @@ func (s *Page) UnmarshalJSON(data []byte) error {
return s.Decode(d)
}
// Encode implements json.Marshaler.
func (s *PageMeta) Encode(e *jx.Encoder) {
e.ObjStart()
s.encodeFields(e)
e.ObjEnd()
}
// encodeFields encodes fields.
func (s *PageMeta) encodeFields(e *jx.Encoder) {
{
e.FieldStart("title")
e.Str(s.Title)
}
{
e.FieldStart("description")
e.Str(s.Description)
}
{
if s.Error.Set {
e.FieldStart("error")
s.Error.Encode(e)
}
}
}
var jsonFieldsNameOfPageMeta = [3]string{
0: "title",
1: "description",
2: "error",
}
// Decode decodes PageMeta from json.
func (s *PageMeta) Decode(d *jx.Decoder) error {
if s == nil {
return errors.New("invalid: unable to decode PageMeta to nil")
}
var requiredBitSet [1]uint8
if err := d.ObjBytes(func(d *jx.Decoder, k []byte) error {
switch string(k) {
case "title":
requiredBitSet[0] |= 1 << 0
if err := func() error {
v, err := d.Str()
s.Title = string(v)
if err != nil {
return err
}
return nil
}(); err != nil {
return errors.Wrap(err, "decode field \"title\"")
}
case "description":
requiredBitSet[0] |= 1 << 1
if err := func() error {
v, err := d.Str()
s.Description = string(v)
if err != nil {
return err
}
return nil
}(); err != nil {
return errors.Wrap(err, "decode field \"description\"")
}
case "error":
if err := func() error {
s.Error.Reset()
if err := s.Error.Decode(d); err != nil {
return err
}
return nil
}(); err != nil {
return errors.Wrap(err, "decode field \"error\"")
}
default:
return d.Skip()
}
return nil
}); err != nil {
return errors.Wrap(err, "decode PageMeta")
}
// Validate required fields.
var failures []validate.FieldError
for i, mask := range [1]uint8{
0b00000011,
} {
if result := (requiredBitSet[i] & mask) ^ mask; result != 0 {
// Mask only required fields and check equality to mask using XOR.
//
// If XOR result is not zero, result is not equal to expected, so some fields are missed.
// Bits of fields which would be set are actually bits of missed fields.
missed := bits.OnesCount8(result)
for bitN := 0; bitN < missed; bitN++ {
bitIdx := bits.TrailingZeros8(result)
fieldIdx := i*8 + bitIdx
var name string
if fieldIdx < len(jsonFieldsNameOfPageMeta) {
name = jsonFieldsNameOfPageMeta[fieldIdx]
} else {
name = strconv.Itoa(fieldIdx)
}
failures = append(failures, validate.FieldError{
Name: name,
Error: validate.ErrFieldRequired,
})
// Reset bit.
result &^= 1 << bitIdx
}
}
}
if len(failures) > 0 {
return &validate.Error{Fields: failures}
}
return nil
}
// MarshalJSON implements stdjson.Marshaler.
func (s *PageMeta) MarshalJSON() ([]byte, error) {
e := jx.Encoder{}
s.Encode(&e)
return e.Bytes(), nil
}
// UnmarshalJSON implements stdjson.Unmarshaler.
func (s *PageMeta) UnmarshalJSON(data []byte) error {
d := jx.DecodeBytes(data)
return s.Decode(d)
}
// Encode implements json.Marshaler.
func (s *PageWithResults) Encode(e *jx.Encoder) {
e.ObjStart()
@@ -711,6 +859,11 @@ func (s *PageWithResults) encodeFields(e *jx.Encoder) {
e.FieldStart("status")
s.Status.Encode(e)
}
{
e.FieldStart("meta")
s.Meta.Encode(e)
}
{
e.FieldStart("results")
@@ -722,13 +875,14 @@ func (s *PageWithResults) encodeFields(e *jx.Encoder) {
}
}
var jsonFieldsNameOfPageWithResults = [6]string{
var jsonFieldsNameOfPageWithResults = [7]string{
0: "id",
1: "url",
2: "created",
3: "formats",
4: "status",
5: "results",
5: "meta",
6: "results",
}
// Decode decodes PageWithResults from json.
@@ -804,8 +958,18 @@ func (s *PageWithResults) Decode(d *jx.Decoder) error {
}(); err != nil {
return errors.Wrap(err, "decode field \"status\"")
}
case "results":
case "meta":
requiredBitSet[0] |= 1 << 5
if err := func() error {
if err := s.Meta.Decode(d); err != nil {
return err
}
return nil
}(); err != nil {
return errors.Wrap(err, "decode field \"meta\"")
}
case "results":
requiredBitSet[0] |= 1 << 6
if err := func() error {
s.Results = make([]Result, 0)
if err := d.Arr(func(d *jx.Decoder) error {
@@ -832,7 +996,7 @@ func (s *PageWithResults) Decode(d *jx.Decoder) error {
// Validate required fields.
var failures []validate.FieldError
for i, mask := range [1]uint8{
0b00111111,
0b01111111,
} {
if result := (requiredBitSet[i] & mask) ^ mask; result != 0 {
// Mask only required fields and check equality to mask using XOR.
@@ -878,6 +1042,138 @@ func (s *PageWithResults) UnmarshalJSON(data []byte) error {
return s.Decode(d)
}
// Encode implements json.Marshaler.
func (s *PageWithResultsMeta) Encode(e *jx.Encoder) {
e.ObjStart()
s.encodeFields(e)
e.ObjEnd()
}
// encodeFields encodes fields.
func (s *PageWithResultsMeta) encodeFields(e *jx.Encoder) {
{
e.FieldStart("title")
e.Str(s.Title)
}
{
e.FieldStart("description")
e.Str(s.Description)
}
{
if s.Error.Set {
e.FieldStart("error")
s.Error.Encode(e)
}
}
}
var jsonFieldsNameOfPageWithResultsMeta = [3]string{
0: "title",
1: "description",
2: "error",
}
// Decode decodes PageWithResultsMeta from json.
func (s *PageWithResultsMeta) Decode(d *jx.Decoder) error {
if s == nil {
return errors.New("invalid: unable to decode PageWithResultsMeta to nil")
}
var requiredBitSet [1]uint8
if err := d.ObjBytes(func(d *jx.Decoder, k []byte) error {
switch string(k) {
case "title":
requiredBitSet[0] |= 1 << 0
if err := func() error {
v, err := d.Str()
s.Title = string(v)
if err != nil {
return err
}
return nil
}(); err != nil {
return errors.Wrap(err, "decode field \"title\"")
}
case "description":
requiredBitSet[0] |= 1 << 1
if err := func() error {
v, err := d.Str()
s.Description = string(v)
if err != nil {
return err
}
return nil
}(); err != nil {
return errors.Wrap(err, "decode field \"description\"")
}
case "error":
if err := func() error {
s.Error.Reset()
if err := s.Error.Decode(d); err != nil {
return err
}
return nil
}(); err != nil {
return errors.Wrap(err, "decode field \"error\"")
}
default:
return d.Skip()
}
return nil
}); err != nil {
return errors.Wrap(err, "decode PageWithResultsMeta")
}
// Validate required fields.
var failures []validate.FieldError
for i, mask := range [1]uint8{
0b00000011,
} {
if result := (requiredBitSet[i] & mask) ^ mask; result != 0 {
// Mask only required fields and check equality to mask using XOR.
//
// If XOR result is not zero, result is not equal to expected, so some fields are missed.
// Bits of fields which would be set are actually bits of missed fields.
missed := bits.OnesCount8(result)
for bitN := 0; bitN < missed; bitN++ {
bitIdx := bits.TrailingZeros8(result)
fieldIdx := i*8 + bitIdx
var name string
if fieldIdx < len(jsonFieldsNameOfPageWithResultsMeta) {
name = jsonFieldsNameOfPageWithResultsMeta[fieldIdx]
} else {
name = strconv.Itoa(fieldIdx)
}
failures = append(failures, validate.FieldError{
Name: name,
Error: validate.ErrFieldRequired,
})
// Reset bit.
result &^= 1 << bitIdx
}
}
}
if len(failures) > 0 {
return &validate.Error{Fields: failures}
}
return nil
}
// MarshalJSON implements stdjson.Marshaler.
func (s *PageWithResultsMeta) MarshalJSON() ([]byte, error) {
e := jx.Encoder{}
s.Encode(&e)
return e.Bytes(), nil
}
// UnmarshalJSON implements stdjson.Unmarshaler.
func (s *PageWithResultsMeta) UnmarshalJSON(data []byte) error {
d := jx.DecodeBytes(data)
return s.Decode(d)
}
// Encode encodes Pages as json.
func (s Pages) Encode(e *jx.Encoder) {
unwrapped := []Page(s)

View File

@@ -324,6 +324,7 @@ type Page struct {
Created time.Time `json:"created"`
Formats []Format `json:"formats"`
Status Status `json:"status"`
Meta PageMeta `json:"meta"`
}
// GetID returns the value of ID.
@@ -351,6 +352,11 @@ func (s *Page) GetStatus() Status {
return s.Status
}
// GetMeta returns the value of Meta.
func (s *Page) GetMeta() PageMeta {
return s.Meta
}
// SetID sets the value of ID.
func (s *Page) SetID(val uuid.UUID) {
s.ID = val
@@ -376,17 +382,59 @@ func (s *Page) SetStatus(val Status) {
s.Status = val
}
// SetMeta sets the value of Meta.
func (s *Page) SetMeta(val PageMeta) {
s.Meta = val
}
func (*Page) addPageRes() {}
type PageMeta struct {
Title string `json:"title"`
Description string `json:"description"`
Error OptString `json:"error"`
}
// GetTitle returns the value of Title.
func (s *PageMeta) GetTitle() string {
return s.Title
}
// GetDescription returns the value of Description.
func (s *PageMeta) GetDescription() string {
return s.Description
}
// GetError returns the value of Error.
func (s *PageMeta) GetError() OptString {
return s.Error
}
// SetTitle sets the value of Title.
func (s *PageMeta) SetTitle(val string) {
s.Title = val
}
// SetDescription sets the value of Description.
func (s *PageMeta) SetDescription(val string) {
s.Description = val
}
// SetError sets the value of Error.
func (s *PageMeta) SetError(val OptString) {
s.Error = val
}
// Merged schema.
// Ref: #/components/schemas/pageWithResults
type PageWithResults struct {
ID uuid.UUID `json:"id"`
URL string `json:"url"`
Created time.Time `json:"created"`
Formats []Format `json:"formats"`
Status Status `json:"status"`
Results []Result `json:"results"`
ID uuid.UUID `json:"id"`
URL string `json:"url"`
Created time.Time `json:"created"`
Formats []Format `json:"formats"`
Status Status `json:"status"`
Meta PageWithResultsMeta `json:"meta"`
Results []Result `json:"results"`
}
// GetID returns the value of ID.
@@ -414,6 +462,11 @@ func (s *PageWithResults) GetStatus() Status {
return s.Status
}
// GetMeta returns the value of Meta.
func (s *PageWithResults) GetMeta() PageWithResultsMeta {
return s.Meta
}
// GetResults returns the value of Results.
func (s *PageWithResults) GetResults() []Result {
return s.Results
@@ -444,6 +497,11 @@ func (s *PageWithResults) SetStatus(val Status) {
s.Status = val
}
// SetMeta sets the value of Meta.
func (s *PageWithResults) SetMeta(val PageWithResultsMeta) {
s.Meta = val
}
// SetResults sets the value of Results.
func (s *PageWithResults) SetResults(val []Result) {
s.Results = val
@@ -451,6 +509,42 @@ func (s *PageWithResults) SetResults(val []Result) {
func (*PageWithResults) getPageRes() {}
type PageWithResultsMeta struct {
Title string `json:"title"`
Description string `json:"description"`
Error OptString `json:"error"`
}
// GetTitle returns the value of Title.
func (s *PageWithResultsMeta) GetTitle() string {
return s.Title
}
// GetDescription returns the value of Description.
func (s *PageWithResultsMeta) GetDescription() string {
return s.Description
}
// GetError returns the value of Error.
func (s *PageWithResultsMeta) GetError() OptString {
return s.Error
}
// SetTitle sets the value of Title.
func (s *PageWithResultsMeta) SetTitle(val string) {
s.Title = val
}
// SetDescription sets the value of Description.
func (s *PageWithResultsMeta) SetDescription(val string) {
s.Description = val
}
// SetError sets the value of Error.
func (s *PageWithResultsMeta) SetError(val OptString) {
s.Error = val
}
type Pages []Page
// Ref: #/components/schemas/result

View File

@@ -6,6 +6,7 @@ import (
"fmt"
"net"
"net/http"
"strings"
"sync"
"time"
@@ -48,7 +49,8 @@ func NewApplication(cfg config.Config) (Application, error) {
worker := entity.NewWorker(workerCh, pageRepo, processor, log.Named("worker"))
server, err := openapi.NewServer(
rest.NewService(pageRepo, workerCh),
rest.NewService(pageRepo, workerCh, processor),
openapi.WithPathPrefix("/api/v1"),
openapi.WithMiddleware(
func(r middleware.Request, next middleware.Next) (middleware.Response, error) {
start := time.Now()
@@ -79,13 +81,13 @@ func NewApplication(cfg config.Config) (Application, error) {
ui := rest.NewUI(cfg.UI)
httpHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if ui.IsUIRequest(r) {
ui.ServeHTTP(w, r)
if strings.HasPrefix(r.URL.Path, "/api/") {
server.ServeHTTP(w, r)
return
}
server.ServeHTTP(w, r)
ui.ServeHTTP(w, r)
})
}

View File

@@ -37,19 +37,19 @@ type PDF struct {
Grayscale bool `env:"GRAYSCALE,default=false"`
MediaPrint bool `env:"MEDIA_PRINT,default=true"`
Zoom float64 `env:"ZOOM,default=1"`
Viewport string `env:"VIEWPORT,default=1920x1080"`
DPI uint `env:"DPI,default=300"`
Viewport string `env:"VIEWPORT,default=1280x720"`
DPI uint `env:"DPI,default=150"`
Filename string `env:"FILENAME,default=page.pdf"`
}
type API struct {
Prefix string `env:"PREFIX,default=/"`
Address string `env:"ADDRESS,default=0.0.0.0:5001"`
}
type UI struct {
Enabled bool `env:"ENABLED,default=true"`
Prefix string `env:"PREFIX,default=/"`
Theme string `env:"THEME,default=basic"`
}
type DB struct {

View File

@@ -11,6 +11,7 @@ import (
type Processor interface {
Process(ctx context.Context, format Format, url string) Result
GetMeta(ctx context.Context, url string) (Meta, error)
}
type Format uint8
@@ -37,6 +38,12 @@ const (
StatusWithErrors
)
type Meta struct {
Title string
Description string
Error string
}
func NewPage(url string, description string, formats ...Format) *Page {
return &Page{
ID: uuid.New(),
@@ -57,6 +64,7 @@ type Page struct {
Results Results
Version uint16
Status Status
Meta Meta
}
func (p *Page) SetProcessing() {

View File

@@ -9,6 +9,7 @@ import (
type Pages interface {
Save(ctx context.Context, page *Page) error
ListUnprocessed(ctx context.Context) ([]*Page, error)
}
func NewWorker(ch chan *Page, pages Pages, processor Processor, log *zap.Logger) *Worker {
@@ -27,6 +28,20 @@ func (w *Worker) Start(ctx context.Context, wg *sync.WaitGroup) {
w.log.Info("starting")
wg.Add(1)
go func() {
defer wg.Done()
unprocessed, err := w.pages.ListUnprocessed(ctx)
if err != nil {
w.log.Error("failed to get unprocessed pages", zap.Error(err))
} else {
for i := range unprocessed {
w.ch <- unprocessed[i]
}
}
}()
for {
select {
case <-ctx.Done():

View File

@@ -2,6 +2,7 @@ package rest
import (
"fmt"
"html"
"github.com/derfenix/webarchive/api/openapi"
"github.com/derfenix/webarchive/entity"
@@ -22,6 +23,11 @@ func PageToRestWithResults(page *entity.Page) openapi.PageWithResults {
return res
}(),
Status: StatusToRest(page.Status),
Meta: openapi.PageWithResultsMeta{
Title: html.EscapeString(page.Meta.Title),
Description: html.EscapeString(page.Meta.Description),
Error: openapi.NewOptString(page.Meta.Error),
},
Results: func() []openapi.Result {
results := make([]openapi.Result, len(page.Results.Results()))
@@ -65,6 +71,11 @@ func PageToRest(page *entity.Page) openapi.Page {
ID: page.ID,
URL: page.URL,
Created: page.Created,
Meta: openapi.PageMeta{
Title: html.EscapeString(page.Meta.Title),
Description: html.EscapeString(page.Meta.Description),
Error: openapi.NewOptString(page.Meta.Error),
},
Formats: func() []openapi.Format {
res := make([]openapi.Format, len(page.Formats))

View File

@@ -20,14 +20,19 @@ type Pages interface {
GetFile(ctx context.Context, pageID, fileID uuid.UUID) (*entity.File, error)
}
func NewService(sites Pages, ch chan *entity.Page) *Service {
return &Service{pages: sites, ch: ch}
func NewService(pages Pages, ch chan *entity.Page, processor entity.Processor) *Service {
return &Service{
pages: pages,
ch: ch,
processor: processor,
}
}
type Service struct {
openapi.UnimplementedHandler
pages Pages
ch chan *entity.Page
pages Pages
ch chan *entity.Page
processor entity.Processor
}
func (s *Service) GetPage(ctx context.Context, params openapi.GetPageParams) (openapi.GetPageRes, error) {
@@ -78,6 +83,13 @@ func (s *Service) AddPage(ctx context.Context, req openapi.OptAddPageReq, params
page := entity.NewPage(url, description, domainFormats...)
page.Status = entity.StatusProcessing
meta, err := s.processor.GetMeta(ctx, page.URL)
if err != nil {
page.Meta.Error = err.Error()
} else {
page.Meta = meta
}
if err := s.pages.Save(ctx, page); err != nil {
return nil, fmt.Errorf("save page: %w", err)
}

View File

@@ -10,15 +10,19 @@ import (
)
func NewUI(cfg config.UI) *UI {
return &UI{prefix: cfg.Prefix}
return &UI{
prefix: cfg.Prefix,
theme: cfg.Theme,
}
}
type UI struct {
prefix string
theme string
}
func (u *UI) ServeHTTP(w http.ResponseWriter, r *http.Request) {
serveRoot, err := fs.Sub(ui.StaticFiles, "static")
serveRoot, err := fs.Sub(ui.StaticFiles, u.theme)
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
return
@@ -27,12 +31,11 @@ func (u *UI) ServeHTTP(w http.ResponseWriter, r *http.Request) {
if strings.HasPrefix(r.URL.Path, u.prefix) {
r.URL.Path = "/" + strings.TrimPrefix(r.URL.Path, u.prefix)
}
if !strings.HasPrefix(r.URL.Path, "/static") {
r.URL.Path = "/"
}
r.URL.Path = strings.TrimPrefix(r.URL.Path, "/static")
http.FileServer(http.FS(serveRoot)).ServeHTTP(w, r)
}
func (u *UI) IsUIRequest(r *http.Request) bool {
return r.URL.Path == u.prefix || strings.HasPrefix(r.URL.Path, "/static/")
}

47
ui/basic/index.html Normal file
View File

@@ -0,0 +1,47 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>WebArchive</title>
<link rel="stylesheet" href="/static/style.css">
<script src="/static/lib.js"></script>
<script src="/static/main.js"></script>
</head>
<body>
<template id="pages_tmpl">
<div class="page_item">
<a class="url link"><span class="title"></span><span class="status"></span></a>
<div class="description"></div>
<div class="created"></div>
<hr>
</div>
</template>
<template id="page_tmpl">
<a onclick="history.back()" class="link">Back</a>
<div class="page">
<h2 id="page_title"></h2>
<h3 id="page_description"></h3>
<h5 id="page_url" class="link" onclick="window.open(this.innerHTML, '_blank')"></h5>
<h4>Results</h4>
<div id="results"></div>
</div>
</template>
<template id="result_tmpl">
<div class="result_item">
<span class="format"></span>
<span class="result_link link"></span>
</div>
</template>
<h1 id="site_title"></h1>
<div id="data">
None
</div>
</body>
</html>

2
ui/basic/lib.js Normal file

File diff suppressed because one or more lines are too long

90
ui/basic/main.js Normal file
View File

@@ -0,0 +1,90 @@
function index() {
$.ajax({
url: "/api/v1/pages", success: function (data, status, xhr) {
if (status !== "success") {
gotError(status);
return;
}
let elem = document.getElementById("data");
elem.innerHTML = "";
// elem.attachShadow({mode: 'open'});
data.forEach(function (v) {
let page_elem = pages_tmpl.content.cloneNode(true);
$(page_elem).find(".url").attr("onclick", "goToPage('" + v.id + "');");
$(page_elem).find(".status").addClass(v.status);
$(page_elem).find(".status").attr("title", v.status);
$(page_elem).find(".created").html(v.created);
$(page_elem).find(".title").html(v.meta.title);
$(page_elem).find(".description").html(v.meta.description);
elem.append(page_elem); // (*)
})
}
})
}
function goToPage(id) {
history.pushState({"page": id}, null, id);
page(id);
}
function page(id) {
$.ajax({
url: "/api/v1/pages/" + id, success: function (data, status, xhr) {
if (status !== "success") {
gotError(status);
return;
}
let elem = document.getElementById("data");
elem.innerHTML = "";
let page_elem = page_tmpl.content.cloneNode(true);
$(page_elem).find("#page_title").html(data.meta.title);
$(page_elem).find("#page_description").html(data.meta.description);
$(page_elem).find("#page_url").html(data.url);
data.results.forEach(function (result) {
let result_elem = result_tmpl.content.cloneNode(true);
$(result_elem).find(".format").html(result.format);
if (result.error !== "" && result.error !== undefined) {
$(result_elem).find(".format").addClass("error");
$(result_elem).find(".result_link").html("⚠");
$(result_elem).find(".result_link").attr("title", result.error);
} else {
result.files.forEach(function (file) {
$(result_elem).find(".result_link").attr("onclick", "window.open('/api/v1/pages/" + data.id + "/file/" + file.id + "', '_blank');");
$(result_elem).find(".result_link").html(file.name);
})
}
$(page_elem).find("#results").append(result_elem);
})
elem.append(page_elem); // (*)
}
})
}
function gotError(err) {
console.log(err);
}
document.addEventListener("DOMContentLoaded", function () {
$("#site_title").html("WebArchive " + window.location.hostname);
document.title = "WebArchive " + window.location.hostname;
if (window.location.pathname.endsWith("/")) {
index();
} else {
page(window.location.pathname.slice(1));
}
});
window.addEventListener('popstate', function (event) {
if (event.state === null) {
index();
} else {
page(event.state.page);
}
});

61
ui/basic/style.css Normal file

File diff suppressed because one or more lines are too long

View File

@@ -4,5 +4,5 @@ import (
"embed"
)
//go:embed static/*
//go:embed */*.html */*.css */*.js
var StaticFiles embed.FS

View File

@@ -1,14 +0,0 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Document</title>
<link rel="stylesheet" href="/static/style.css">
</head>
<body>
<h1>Hello World!</h1>
</body>
</html>

View File

@@ -1,3 +0,0 @@
h1 {
background-color: azure;
}