From 1f3e5ec7203dbbe1301b587f03bcd5a68ae10bd8 Mon Sep 17 00:00:00 2001 From: derfenix Date: Sat, 15 Apr 2023 20:10:58 +0300 Subject: [PATCH] Refactoring --- adapters/processors/processors.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/adapters/processors/processors.go b/adapters/processors/processors.go index 55c225d..bab3c90 100644 --- a/adapters/processors/processors.go +++ b/adapters/processors/processors.go @@ -6,6 +6,7 @@ import ( "net" "net/http" "net/http/cookiejar" + "strings" "time" "golang.org/x/net/html" @@ -14,6 +15,8 @@ import ( "github.com/derfenix/webarchive/entity" ) +const defaultEncoding = "utf-8" + type processor interface { Process(ctx context.Context, url string) ([]entity.File, error) } @@ -128,6 +131,7 @@ func (p *Processors) GetMeta(ctx context.Context, url string) (entity.Meta, erro meta := entity.Meta{} getMetaData(htmlNode, &meta) + meta.Encoding = encodingFromHeader(response.Header) return meta, nil } @@ -156,3 +160,19 @@ func getMetaData(n *html.Node, meta *entity.Meta) { getMetaData(c, meta) } } + +func encodingFromHeader(headers http.Header) string { + var foundEncoding bool + var encoding string + + _, encoding, foundEncoding = strings.Cut(headers.Get("Content-Type"), "; ") + if foundEncoding { + _, encoding, foundEncoding = strings.Cut(encoding, "=") + } + + if !foundEncoding { + encoding = defaultEncoding + } + + return encoding +}