diff --git a/README.md b/README.md index 18fa00b..ce0798b 100644 --- a/README.md +++ b/README.md @@ -24,13 +24,17 @@ variables: * **LOGGING_DEBUG** — enable debug logs (default `false`) * **API** * **API_ADDRESS** — address the API server will listen (default `0.0.0.0:5001`) +* **UI** + * **UI_ENABLED** — Enable builtin web UI (default `true`) + * **UI_PREFIX** — Prefix for the web UI (default `/`) + * **UI_THEME** — UI theme name (default `basic`). No other values available yet * **PDF** * **PDF_LANDSCAPE** — use landscape page orientation instead of portrait (default `false`) * **PDF_GRAYSCALE** — use grayscale filter for the output pdf (default `false`) * **PDF_MEDIA_PRINT** — use media type `print` for the request (default `true`) * **PDF_ZOOM** — zoom page (default `1.0` i.e. no actual zoom) - * **PDF_VIEWPORT** — use specified viewport value (default `1920x1080`) - * **PDF_DPI** — use specified DPI value for the output pdf (default `300`) + * **PDF_VIEWPORT** — use specified viewport value (default `1280x720`) + * **PDF_DPI** — use specified DPI value for the output pdf (default `150`) * **PDF_FILENAME** — use specified name for output pdf file (default `page.pdf`) @@ -60,7 +64,7 @@ docker compose up -d webarchive ### 2. Add a page ```shell -curl -X POST --location "http://localhost:5001/pages" \ +curl -X POST --location "http://localhost:5001/api/v1/pages" \ -H "Content-Type: application/json" \ -d "{ \"url\": \"https://github.com/wkhtmltopdf/wkhtmltopdf/issues/1937\", @@ -75,13 +79,13 @@ or ```shell curl -X POST --location \ - "http://localhost:5001/pages?url=https%3A%2F%2Fgithub.com%2Fwkhtmltopdf%2Fwkhtmltopdf%2Fissues%2F1937&formats=pdf%2Cheaders&description=Foo+Bar" + "http://localhost:5001/api/v1/pages?url=https%3A%2F%2Fgithub.com%2Fwkhtmltopdf%2Fwkhtmltopdf%2Fissues%2F1937&formats=pdf%2Cheaders&description=Foo+Bar" ``` ### 3. Get the page's info ```shell -curl -X GET --location "http://localhost:5001/pages/$page_id" | jq . +curl -X GET --location "http://localhost:5001/api/v1/pages/$page_id" | jq . ``` where `$page_id` — value of the `id` field from previous command response. If `status` field in response is `success` (or `with_errors`) - the `results` field @@ -90,7 +94,7 @@ will contain all processed formats with ids of the stored files. ### 4. Open file in browser ```shell -xdg-open "http://localhost:5001/pages/$page_id/file/$file_id" +xdg-open "http://localhost:5001/api/v1/pages/$page_id/file/$file_id" ``` Where `$page_id` — value of the `id` field from previous command response, and `$file_id` — the id of interesting file. @@ -98,7 +102,7 @@ Where `$page_id` — value of the `id` field from previous command response, an ### 5. List all stored pages ```shell -curl -X GET --location "http://localhost:5001/pages" | jq . +curl -X GET --location "http://localhost:5001/api/v1/pages" | jq . ``` ## Roadmap diff --git a/adapters/processors/pdf.go b/adapters/processors/pdf.go index 235d250..abd171a 100644 --- a/adapters/processors/pdf.go +++ b/adapters/processors/pdf.go @@ -47,6 +47,7 @@ func (p *PDF) Process(_ context.Context, url string) ([]entity.File, error) { page.FooterFontSize.Set(10) page.Zoom.Set(p.cfg.Zoom) page.ViewportSize.Set(p.cfg.Viewport) + page.NoBackground.Set(true) gen.AddPage(page) diff --git a/adapters/processors/processors.go b/adapters/processors/processors.go index 5e2c975..55c225d 100644 --- a/adapters/processors/processors.go +++ b/adapters/processors/processors.go @@ -8,6 +8,8 @@ import ( "net/http/cookiejar" "time" + "golang.org/x/net/html" + "github.com/derfenix/webarchive/config" "github.com/derfenix/webarchive/entity" ) @@ -52,6 +54,7 @@ func NewProcessors(cfg config.Config) (*Processors, error) { } procs := Processors{ + client: httpClient, processors: map[entity.Format]processor{ entity.FormatHeaders: NewHeaders(httpClient), entity.FormatPDF: NewPDF(cfg.PDF), @@ -64,6 +67,7 @@ func NewProcessors(cfg config.Config) (*Processors, error) { type Processors struct { processors map[entity.Format]processor + client *http.Client } func (p *Processors) Process(ctx context.Context, format entity.Format, url string) entity.Result { @@ -93,3 +97,62 @@ func (p *Processors) OverrideProcessor(format entity.Format, proc processor) err return nil } + +func (p *Processors) GetMeta(ctx context.Context, url string) (entity.Meta, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return entity.Meta{}, fmt.Errorf("new request: %w", err) + } + + response, err := p.client.Do(req) + if err != nil { + return entity.Meta{}, fmt.Errorf("do request: %w", err) + } + + if response.StatusCode != http.StatusOK { + return entity.Meta{}, fmt.Errorf("want status 200, got %d", response.StatusCode) + } + + if response.Body == nil { + return entity.Meta{}, fmt.Errorf("empty response body") + } + + defer func() { + _ = response.Body.Close() + }() + + htmlNode, err := html.Parse(response.Body) + if err != nil { + return entity.Meta{}, fmt.Errorf("parse response body: %w", err) + } + + meta := entity.Meta{} + getMetaData(htmlNode, &meta) + + return meta, nil +} + +func getMetaData(n *html.Node, meta *entity.Meta) { + if n == nil { + return + } + + for c := n.FirstChild; c != nil; c = c.NextSibling { + if c.Type == html.ElementNode && c.Data == "title" { + meta.Title = c.FirstChild.Data + } + if c.Type == html.ElementNode && c.Data == "meta" { + attrs := make(map[string]string) + for _, attr := range c.Attr { + attrs[attr.Key] = attr.Val + } + + name, ok := attrs["name"] + if ok && name == "description" { + meta.Description = attrs["content"] + } + } + + getMetaData(c, meta) + } +} diff --git a/adapters/repository/badger/page.go b/adapters/repository/badger/page.go index 2c2f360..807c877 100644 --- a/adapters/repository/badger/page.go +++ b/adapters/repository/badger/page.go @@ -64,18 +64,18 @@ func (p *Page) GetFile(_ context.Context, pageID, fileID uuid.UUID) (*entity.Fil return file, nil } -func (p *Page) Save(_ context.Context, site *entity.Page) error { +func (p *Page) Save(_ context.Context, page *entity.Page) error { if p.db.IsClosed() { return ErrDBClosed } - marshaled, err := marshal(site) + marshaled, err := marshal(page) if err != nil { return fmt.Errorf("marshal data: %w", err) } if err := p.db.Update(func(txn *badger.Txn) error { - if err := txn.Set(p.key(site), marshaled); err != nil { + if err := txn.Set(p.key(page), marshaled); err != nil { return fmt.Errorf("put data: %w", err) } @@ -151,6 +151,64 @@ func (p *Page) ListAll(ctx context.Context) ([]*entity.Page, error) { Formats: page.Formats, Version: page.Version, Status: page.Status, + Meta: page.Meta, + }) + } + + return nil + }) + + if err != nil { + return nil, fmt.Errorf("view: %w", err) + } + + sort.Slice(pages, func(i, j int) bool { + return pages[i].Created.After(pages[j].Created) + }) + + return pages, nil +} + +func (p *Page) ListUnprocessed(ctx context.Context) ([]*entity.Page, error) { + pages := make([]*entity.Page, 0, 100) + + err := p.db.View(func(txn *badger.Txn) error { + iterator := txn.NewIterator(badger.DefaultIteratorOptions) + + defer iterator.Close() + + for iterator.Seek(p.prefix); iterator.ValidForPrefix(p.prefix); iterator.Next() { + if err := ctx.Err(); err != nil { + return fmt.Errorf("context canceled: %w", err) + } + + var page entity.Page + + err := iterator.Item().Value(func(val []byte) error { + if err := unmarshal(val, &page); err != nil { + return fmt.Errorf("unmarshal: %w", err) + } + + return nil + }) + + if err != nil { + return fmt.Errorf("get item: %w", err) + } + + if page.Status != entity.StatusProcessing { + continue + } + + pages = append(pages, &entity.Page{ + ID: page.ID, + URL: page.URL, + Description: page.Description, + Created: page.Created, + Formats: page.Formats, + Version: page.Version, + Status: page.Status, + Meta: page.Meta, }) } diff --git a/api/openapi.yaml b/api/openapi.yaml index e5719c2..690a821 100644 --- a/api/openapi.yaml +++ b/api/openapi.yaml @@ -4,7 +4,7 @@ info: description: API description in Markdown. version: 1.0.0 servers: - - url: 'https://api.example.com' + - url: 'https://api.example.com/api/v1' paths: /pages: get: @@ -183,12 +183,25 @@ components: $ref: '#/components/schemas/format' status: $ref: '#/components/schemas/status' + meta: + type: object + properties: + title: + type: string + description: + type: string + error: + type: string + required: + - title + - description required: - id - url - formats - status - created + - meta result: type: object properties: diff --git a/api/openapi/oas_json_gen.go b/api/openapi/oas_json_gen.go index 5f5e1e0..0568e88 100644 --- a/api/openapi/oas_json_gen.go +++ b/api/openapi/oas_json_gen.go @@ -534,14 +534,20 @@ func (s *Page) encodeFields(e *jx.Encoder) { e.FieldStart("status") s.Status.Encode(e) } + { + + e.FieldStart("meta") + s.Meta.Encode(e) + } } -var jsonFieldsNameOfPage = [5]string{ +var jsonFieldsNameOfPage = [6]string{ 0: "id", 1: "url", 2: "created", 3: "formats", 4: "status", + 5: "meta", } // Decode decodes Page from json. @@ -617,6 +623,16 @@ func (s *Page) Decode(d *jx.Decoder) error { }(); err != nil { return errors.Wrap(err, "decode field \"status\"") } + case "meta": + requiredBitSet[0] |= 1 << 5 + if err := func() error { + if err := s.Meta.Decode(d); err != nil { + return err + } + return nil + }(); err != nil { + return errors.Wrap(err, "decode field \"meta\"") + } default: return d.Skip() } @@ -627,7 +643,7 @@ func (s *Page) Decode(d *jx.Decoder) error { // Validate required fields. var failures []validate.FieldError for i, mask := range [1]uint8{ - 0b00011111, + 0b00111111, } { if result := (requiredBitSet[i] & mask) ^ mask; result != 0 { // Mask only required fields and check equality to mask using XOR. @@ -673,6 +689,138 @@ func (s *Page) UnmarshalJSON(data []byte) error { return s.Decode(d) } +// Encode implements json.Marshaler. +func (s *PageMeta) Encode(e *jx.Encoder) { + e.ObjStart() + s.encodeFields(e) + e.ObjEnd() +} + +// encodeFields encodes fields. +func (s *PageMeta) encodeFields(e *jx.Encoder) { + { + + e.FieldStart("title") + e.Str(s.Title) + } + { + + e.FieldStart("description") + e.Str(s.Description) + } + { + if s.Error.Set { + e.FieldStart("error") + s.Error.Encode(e) + } + } +} + +var jsonFieldsNameOfPageMeta = [3]string{ + 0: "title", + 1: "description", + 2: "error", +} + +// Decode decodes PageMeta from json. +func (s *PageMeta) Decode(d *jx.Decoder) error { + if s == nil { + return errors.New("invalid: unable to decode PageMeta to nil") + } + var requiredBitSet [1]uint8 + + if err := d.ObjBytes(func(d *jx.Decoder, k []byte) error { + switch string(k) { + case "title": + requiredBitSet[0] |= 1 << 0 + if err := func() error { + v, err := d.Str() + s.Title = string(v) + if err != nil { + return err + } + return nil + }(); err != nil { + return errors.Wrap(err, "decode field \"title\"") + } + case "description": + requiredBitSet[0] |= 1 << 1 + if err := func() error { + v, err := d.Str() + s.Description = string(v) + if err != nil { + return err + } + return nil + }(); err != nil { + return errors.Wrap(err, "decode field \"description\"") + } + case "error": + if err := func() error { + s.Error.Reset() + if err := s.Error.Decode(d); err != nil { + return err + } + return nil + }(); err != nil { + return errors.Wrap(err, "decode field \"error\"") + } + default: + return d.Skip() + } + return nil + }); err != nil { + return errors.Wrap(err, "decode PageMeta") + } + // Validate required fields. + var failures []validate.FieldError + for i, mask := range [1]uint8{ + 0b00000011, + } { + if result := (requiredBitSet[i] & mask) ^ mask; result != 0 { + // Mask only required fields and check equality to mask using XOR. + // + // If XOR result is not zero, result is not equal to expected, so some fields are missed. + // Bits of fields which would be set are actually bits of missed fields. + missed := bits.OnesCount8(result) + for bitN := 0; bitN < missed; bitN++ { + bitIdx := bits.TrailingZeros8(result) + fieldIdx := i*8 + bitIdx + var name string + if fieldIdx < len(jsonFieldsNameOfPageMeta) { + name = jsonFieldsNameOfPageMeta[fieldIdx] + } else { + name = strconv.Itoa(fieldIdx) + } + failures = append(failures, validate.FieldError{ + Name: name, + Error: validate.ErrFieldRequired, + }) + // Reset bit. + result &^= 1 << bitIdx + } + } + } + if len(failures) > 0 { + return &validate.Error{Fields: failures} + } + + return nil +} + +// MarshalJSON implements stdjson.Marshaler. +func (s *PageMeta) MarshalJSON() ([]byte, error) { + e := jx.Encoder{} + s.Encode(&e) + return e.Bytes(), nil +} + +// UnmarshalJSON implements stdjson.Unmarshaler. +func (s *PageMeta) UnmarshalJSON(data []byte) error { + d := jx.DecodeBytes(data) + return s.Decode(d) +} + // Encode implements json.Marshaler. func (s *PageWithResults) Encode(e *jx.Encoder) { e.ObjStart() @@ -711,6 +859,11 @@ func (s *PageWithResults) encodeFields(e *jx.Encoder) { e.FieldStart("status") s.Status.Encode(e) } + { + + e.FieldStart("meta") + s.Meta.Encode(e) + } { e.FieldStart("results") @@ -722,13 +875,14 @@ func (s *PageWithResults) encodeFields(e *jx.Encoder) { } } -var jsonFieldsNameOfPageWithResults = [6]string{ +var jsonFieldsNameOfPageWithResults = [7]string{ 0: "id", 1: "url", 2: "created", 3: "formats", 4: "status", - 5: "results", + 5: "meta", + 6: "results", } // Decode decodes PageWithResults from json. @@ -804,8 +958,18 @@ func (s *PageWithResults) Decode(d *jx.Decoder) error { }(); err != nil { return errors.Wrap(err, "decode field \"status\"") } - case "results": + case "meta": requiredBitSet[0] |= 1 << 5 + if err := func() error { + if err := s.Meta.Decode(d); err != nil { + return err + } + return nil + }(); err != nil { + return errors.Wrap(err, "decode field \"meta\"") + } + case "results": + requiredBitSet[0] |= 1 << 6 if err := func() error { s.Results = make([]Result, 0) if err := d.Arr(func(d *jx.Decoder) error { @@ -832,7 +996,7 @@ func (s *PageWithResults) Decode(d *jx.Decoder) error { // Validate required fields. var failures []validate.FieldError for i, mask := range [1]uint8{ - 0b00111111, + 0b01111111, } { if result := (requiredBitSet[i] & mask) ^ mask; result != 0 { // Mask only required fields and check equality to mask using XOR. @@ -878,6 +1042,138 @@ func (s *PageWithResults) UnmarshalJSON(data []byte) error { return s.Decode(d) } +// Encode implements json.Marshaler. +func (s *PageWithResultsMeta) Encode(e *jx.Encoder) { + e.ObjStart() + s.encodeFields(e) + e.ObjEnd() +} + +// encodeFields encodes fields. +func (s *PageWithResultsMeta) encodeFields(e *jx.Encoder) { + { + + e.FieldStart("title") + e.Str(s.Title) + } + { + + e.FieldStart("description") + e.Str(s.Description) + } + { + if s.Error.Set { + e.FieldStart("error") + s.Error.Encode(e) + } + } +} + +var jsonFieldsNameOfPageWithResultsMeta = [3]string{ + 0: "title", + 1: "description", + 2: "error", +} + +// Decode decodes PageWithResultsMeta from json. +func (s *PageWithResultsMeta) Decode(d *jx.Decoder) error { + if s == nil { + return errors.New("invalid: unable to decode PageWithResultsMeta to nil") + } + var requiredBitSet [1]uint8 + + if err := d.ObjBytes(func(d *jx.Decoder, k []byte) error { + switch string(k) { + case "title": + requiredBitSet[0] |= 1 << 0 + if err := func() error { + v, err := d.Str() + s.Title = string(v) + if err != nil { + return err + } + return nil + }(); err != nil { + return errors.Wrap(err, "decode field \"title\"") + } + case "description": + requiredBitSet[0] |= 1 << 1 + if err := func() error { + v, err := d.Str() + s.Description = string(v) + if err != nil { + return err + } + return nil + }(); err != nil { + return errors.Wrap(err, "decode field \"description\"") + } + case "error": + if err := func() error { + s.Error.Reset() + if err := s.Error.Decode(d); err != nil { + return err + } + return nil + }(); err != nil { + return errors.Wrap(err, "decode field \"error\"") + } + default: + return d.Skip() + } + return nil + }); err != nil { + return errors.Wrap(err, "decode PageWithResultsMeta") + } + // Validate required fields. + var failures []validate.FieldError + for i, mask := range [1]uint8{ + 0b00000011, + } { + if result := (requiredBitSet[i] & mask) ^ mask; result != 0 { + // Mask only required fields and check equality to mask using XOR. + // + // If XOR result is not zero, result is not equal to expected, so some fields are missed. + // Bits of fields which would be set are actually bits of missed fields. + missed := bits.OnesCount8(result) + for bitN := 0; bitN < missed; bitN++ { + bitIdx := bits.TrailingZeros8(result) + fieldIdx := i*8 + bitIdx + var name string + if fieldIdx < len(jsonFieldsNameOfPageWithResultsMeta) { + name = jsonFieldsNameOfPageWithResultsMeta[fieldIdx] + } else { + name = strconv.Itoa(fieldIdx) + } + failures = append(failures, validate.FieldError{ + Name: name, + Error: validate.ErrFieldRequired, + }) + // Reset bit. + result &^= 1 << bitIdx + } + } + } + if len(failures) > 0 { + return &validate.Error{Fields: failures} + } + + return nil +} + +// MarshalJSON implements stdjson.Marshaler. +func (s *PageWithResultsMeta) MarshalJSON() ([]byte, error) { + e := jx.Encoder{} + s.Encode(&e) + return e.Bytes(), nil +} + +// UnmarshalJSON implements stdjson.Unmarshaler. +func (s *PageWithResultsMeta) UnmarshalJSON(data []byte) error { + d := jx.DecodeBytes(data) + return s.Decode(d) +} + // Encode encodes Pages as json. func (s Pages) Encode(e *jx.Encoder) { unwrapped := []Page(s) diff --git a/api/openapi/oas_schemas_gen.go b/api/openapi/oas_schemas_gen.go index dda0d7e..8cc81a8 100644 --- a/api/openapi/oas_schemas_gen.go +++ b/api/openapi/oas_schemas_gen.go @@ -324,6 +324,7 @@ type Page struct { Created time.Time `json:"created"` Formats []Format `json:"formats"` Status Status `json:"status"` + Meta PageMeta `json:"meta"` } // GetID returns the value of ID. @@ -351,6 +352,11 @@ func (s *Page) GetStatus() Status { return s.Status } +// GetMeta returns the value of Meta. +func (s *Page) GetMeta() PageMeta { + return s.Meta +} + // SetID sets the value of ID. func (s *Page) SetID(val uuid.UUID) { s.ID = val @@ -376,17 +382,59 @@ func (s *Page) SetStatus(val Status) { s.Status = val } +// SetMeta sets the value of Meta. +func (s *Page) SetMeta(val PageMeta) { + s.Meta = val +} + func (*Page) addPageRes() {} +type PageMeta struct { + Title string `json:"title"` + Description string `json:"description"` + Error OptString `json:"error"` +} + +// GetTitle returns the value of Title. +func (s *PageMeta) GetTitle() string { + return s.Title +} + +// GetDescription returns the value of Description. +func (s *PageMeta) GetDescription() string { + return s.Description +} + +// GetError returns the value of Error. +func (s *PageMeta) GetError() OptString { + return s.Error +} + +// SetTitle sets the value of Title. +func (s *PageMeta) SetTitle(val string) { + s.Title = val +} + +// SetDescription sets the value of Description. +func (s *PageMeta) SetDescription(val string) { + s.Description = val +} + +// SetError sets the value of Error. +func (s *PageMeta) SetError(val OptString) { + s.Error = val +} + // Merged schema. // Ref: #/components/schemas/pageWithResults type PageWithResults struct { - ID uuid.UUID `json:"id"` - URL string `json:"url"` - Created time.Time `json:"created"` - Formats []Format `json:"formats"` - Status Status `json:"status"` - Results []Result `json:"results"` + ID uuid.UUID `json:"id"` + URL string `json:"url"` + Created time.Time `json:"created"` + Formats []Format `json:"formats"` + Status Status `json:"status"` + Meta PageWithResultsMeta `json:"meta"` + Results []Result `json:"results"` } // GetID returns the value of ID. @@ -414,6 +462,11 @@ func (s *PageWithResults) GetStatus() Status { return s.Status } +// GetMeta returns the value of Meta. +func (s *PageWithResults) GetMeta() PageWithResultsMeta { + return s.Meta +} + // GetResults returns the value of Results. func (s *PageWithResults) GetResults() []Result { return s.Results @@ -444,6 +497,11 @@ func (s *PageWithResults) SetStatus(val Status) { s.Status = val } +// SetMeta sets the value of Meta. +func (s *PageWithResults) SetMeta(val PageWithResultsMeta) { + s.Meta = val +} + // SetResults sets the value of Results. func (s *PageWithResults) SetResults(val []Result) { s.Results = val @@ -451,6 +509,42 @@ func (s *PageWithResults) SetResults(val []Result) { func (*PageWithResults) getPageRes() {} +type PageWithResultsMeta struct { + Title string `json:"title"` + Description string `json:"description"` + Error OptString `json:"error"` +} + +// GetTitle returns the value of Title. +func (s *PageWithResultsMeta) GetTitle() string { + return s.Title +} + +// GetDescription returns the value of Description. +func (s *PageWithResultsMeta) GetDescription() string { + return s.Description +} + +// GetError returns the value of Error. +func (s *PageWithResultsMeta) GetError() OptString { + return s.Error +} + +// SetTitle sets the value of Title. +func (s *PageWithResultsMeta) SetTitle(val string) { + s.Title = val +} + +// SetDescription sets the value of Description. +func (s *PageWithResultsMeta) SetDescription(val string) { + s.Description = val +} + +// SetError sets the value of Error. +func (s *PageWithResultsMeta) SetError(val OptString) { + s.Error = val +} + type Pages []Page // Ref: #/components/schemas/result diff --git a/application/application.go b/application/application.go index 60d791d..1f76550 100644 --- a/application/application.go +++ b/application/application.go @@ -6,6 +6,7 @@ import ( "fmt" "net" "net/http" + "strings" "sync" "time" @@ -48,7 +49,8 @@ func NewApplication(cfg config.Config) (Application, error) { worker := entity.NewWorker(workerCh, pageRepo, processor, log.Named("worker")) server, err := openapi.NewServer( - rest.NewService(pageRepo, workerCh), + rest.NewService(pageRepo, workerCh, processor), + openapi.WithPathPrefix("/api/v1"), openapi.WithMiddleware( func(r middleware.Request, next middleware.Next) (middleware.Response, error) { start := time.Now() @@ -79,13 +81,13 @@ func NewApplication(cfg config.Config) (Application, error) { ui := rest.NewUI(cfg.UI) httpHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if ui.IsUIRequest(r) { - ui.ServeHTTP(w, r) + if strings.HasPrefix(r.URL.Path, "/api/") { + server.ServeHTTP(w, r) return } - server.ServeHTTP(w, r) + ui.ServeHTTP(w, r) }) } diff --git a/config/config.go b/config/config.go index f7f3599..a4be3e0 100644 --- a/config/config.go +++ b/config/config.go @@ -37,19 +37,19 @@ type PDF struct { Grayscale bool `env:"GRAYSCALE,default=false"` MediaPrint bool `env:"MEDIA_PRINT,default=true"` Zoom float64 `env:"ZOOM,default=1"` - Viewport string `env:"VIEWPORT,default=1920x1080"` - DPI uint `env:"DPI,default=300"` + Viewport string `env:"VIEWPORT,default=1280x720"` + DPI uint `env:"DPI,default=150"` Filename string `env:"FILENAME,default=page.pdf"` } type API struct { - Prefix string `env:"PREFIX,default=/"` Address string `env:"ADDRESS,default=0.0.0.0:5001"` } type UI struct { Enabled bool `env:"ENABLED,default=true"` Prefix string `env:"PREFIX,default=/"` + Theme string `env:"THEME,default=basic"` } type DB struct { diff --git a/entity/page.go b/entity/page.go index 53edd20..c7287c3 100644 --- a/entity/page.go +++ b/entity/page.go @@ -11,6 +11,7 @@ import ( type Processor interface { Process(ctx context.Context, format Format, url string) Result + GetMeta(ctx context.Context, url string) (Meta, error) } type Format uint8 @@ -37,6 +38,12 @@ const ( StatusWithErrors ) +type Meta struct { + Title string + Description string + Error string +} + func NewPage(url string, description string, formats ...Format) *Page { return &Page{ ID: uuid.New(), @@ -57,6 +64,7 @@ type Page struct { Results Results Version uint16 Status Status + Meta Meta } func (p *Page) SetProcessing() { diff --git a/entity/worker.go b/entity/worker.go index 8630aae..8822f90 100644 --- a/entity/worker.go +++ b/entity/worker.go @@ -9,6 +9,7 @@ import ( type Pages interface { Save(ctx context.Context, page *Page) error + ListUnprocessed(ctx context.Context) ([]*Page, error) } func NewWorker(ch chan *Page, pages Pages, processor Processor, log *zap.Logger) *Worker { @@ -27,6 +28,20 @@ func (w *Worker) Start(ctx context.Context, wg *sync.WaitGroup) { w.log.Info("starting") + wg.Add(1) + go func() { + defer wg.Done() + + unprocessed, err := w.pages.ListUnprocessed(ctx) + if err != nil { + w.log.Error("failed to get unprocessed pages", zap.Error(err)) + } else { + for i := range unprocessed { + w.ch <- unprocessed[i] + } + } + }() + for { select { case <-ctx.Done(): diff --git a/ports/rest/converter.go b/ports/rest/converter.go index efa3ff6..1d36851 100644 --- a/ports/rest/converter.go +++ b/ports/rest/converter.go @@ -2,6 +2,7 @@ package rest import ( "fmt" + "html" "github.com/derfenix/webarchive/api/openapi" "github.com/derfenix/webarchive/entity" @@ -22,6 +23,11 @@ func PageToRestWithResults(page *entity.Page) openapi.PageWithResults { return res }(), Status: StatusToRest(page.Status), + Meta: openapi.PageWithResultsMeta{ + Title: html.EscapeString(page.Meta.Title), + Description: html.EscapeString(page.Meta.Description), + Error: openapi.NewOptString(page.Meta.Error), + }, Results: func() []openapi.Result { results := make([]openapi.Result, len(page.Results.Results())) @@ -65,6 +71,11 @@ func PageToRest(page *entity.Page) openapi.Page { ID: page.ID, URL: page.URL, Created: page.Created, + Meta: openapi.PageMeta{ + Title: html.EscapeString(page.Meta.Title), + Description: html.EscapeString(page.Meta.Description), + Error: openapi.NewOptString(page.Meta.Error), + }, Formats: func() []openapi.Format { res := make([]openapi.Format, len(page.Formats)) diff --git a/ports/rest/service.go b/ports/rest/service.go index bb9ad16..794e5dd 100644 --- a/ports/rest/service.go +++ b/ports/rest/service.go @@ -20,14 +20,19 @@ type Pages interface { GetFile(ctx context.Context, pageID, fileID uuid.UUID) (*entity.File, error) } -func NewService(sites Pages, ch chan *entity.Page) *Service { - return &Service{pages: sites, ch: ch} +func NewService(pages Pages, ch chan *entity.Page, processor entity.Processor) *Service { + return &Service{ + pages: pages, + ch: ch, + processor: processor, + } } type Service struct { openapi.UnimplementedHandler - pages Pages - ch chan *entity.Page + pages Pages + ch chan *entity.Page + processor entity.Processor } func (s *Service) GetPage(ctx context.Context, params openapi.GetPageParams) (openapi.GetPageRes, error) { @@ -78,6 +83,13 @@ func (s *Service) AddPage(ctx context.Context, req openapi.OptAddPageReq, params page := entity.NewPage(url, description, domainFormats...) page.Status = entity.StatusProcessing + meta, err := s.processor.GetMeta(ctx, page.URL) + if err != nil { + page.Meta.Error = err.Error() + } else { + page.Meta = meta + } + if err := s.pages.Save(ctx, page); err != nil { return nil, fmt.Errorf("save page: %w", err) } diff --git a/ports/rest/ui.go b/ports/rest/ui.go index e6b33f5..ac2514e 100644 --- a/ports/rest/ui.go +++ b/ports/rest/ui.go @@ -10,15 +10,19 @@ import ( ) func NewUI(cfg config.UI) *UI { - return &UI{prefix: cfg.Prefix} + return &UI{ + prefix: cfg.Prefix, + theme: cfg.Theme, + } } type UI struct { prefix string + theme string } func (u *UI) ServeHTTP(w http.ResponseWriter, r *http.Request) { - serveRoot, err := fs.Sub(ui.StaticFiles, "static") + serveRoot, err := fs.Sub(ui.StaticFiles, u.theme) if err != nil { w.WriteHeader(http.StatusInternalServerError) return @@ -27,12 +31,11 @@ func (u *UI) ServeHTTP(w http.ResponseWriter, r *http.Request) { if strings.HasPrefix(r.URL.Path, u.prefix) { r.URL.Path = "/" + strings.TrimPrefix(r.URL.Path, u.prefix) } + if !strings.HasPrefix(r.URL.Path, "/static") { + r.URL.Path = "/" + } r.URL.Path = strings.TrimPrefix(r.URL.Path, "/static") http.FileServer(http.FS(serveRoot)).ServeHTTP(w, r) } - -func (u *UI) IsUIRequest(r *http.Request) bool { - return r.URL.Path == u.prefix || strings.HasPrefix(r.URL.Path, "/static/") -} diff --git a/ui/basic/index.html b/ui/basic/index.html new file mode 100644 index 0000000..27173c4 --- /dev/null +++ b/ui/basic/index.html @@ -0,0 +1,47 @@ + + +
+ + + +