mirror of
https://github.com/derfenix/webarchive.git
synced 2026-03-11 12:41:54 +03:00
Fix page meta retrieve
This commit is contained in:
@@ -129,8 +129,29 @@ func (p *Processors) GetMeta(ctx context.Context, url string) (entity.Meta, erro
|
|||||||
return entity.Meta{}, fmt.Errorf("parse response body: %w", err)
|
return entity.Meta{}, fmt.Errorf("parse response body: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var fc *html.Node
|
||||||
|
for fc = htmlNode.FirstChild; fc != nil && fc.Data != "html"; fc = fc.NextSibling {
|
||||||
|
}
|
||||||
|
|
||||||
|
if fc == nil {
|
||||||
|
return entity.Meta{}, fmt.Errorf("failed to find html tag")
|
||||||
|
}
|
||||||
|
|
||||||
|
fc = fc.NextSibling
|
||||||
|
if fc == nil {
|
||||||
|
return entity.Meta{}, fmt.Errorf("failed to find html tag")
|
||||||
|
}
|
||||||
|
|
||||||
|
for fc = fc.FirstChild; fc != nil && fc.Data != "head"; fc = fc.NextSibling {
|
||||||
|
fmt.Println(fc.Data)
|
||||||
|
}
|
||||||
|
|
||||||
|
if fc == nil {
|
||||||
|
return entity.Meta{}, fmt.Errorf("failed to find html tag")
|
||||||
|
}
|
||||||
|
|
||||||
meta := entity.Meta{}
|
meta := entity.Meta{}
|
||||||
getMetaData(htmlNode, &meta)
|
getMetaData(fc, &meta)
|
||||||
meta.Encoding = encodingFromHeader(response.Header)
|
meta.Encoding = encodingFromHeader(response.Header)
|
||||||
|
|
||||||
return meta, nil
|
return meta, nil
|
||||||
|
|||||||
26
adapters/processors/processors_test.go
Normal file
26
adapters/processors/processors_test.go
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
package processors
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/derfenix/webarchive/config"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestProcessors_GetMeta(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
cfg, err := config.NewConfig(ctx)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
procs, err := NewProcessors(cfg)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
meta, err := procs.GetMeta(ctx, "https://habr.com/ru/companies/wirenboard/articles/722718/")
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "Сколько стоит умный дом? Рассказываю, как строил свой и что получилось за 1000 руб./м² / Хабр", meta.Title)
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user