mirror of
https://github.com/derfenix/webarchive.git
synced 2026-03-11 12:41:54 +03:00
Fix page meta retrieve
This commit is contained in:
@@ -129,8 +129,29 @@ func (p *Processors) GetMeta(ctx context.Context, url string) (entity.Meta, erro
|
||||
return entity.Meta{}, fmt.Errorf("parse response body: %w", err)
|
||||
}
|
||||
|
||||
var fc *html.Node
|
||||
for fc = htmlNode.FirstChild; fc != nil && fc.Data != "html"; fc = fc.NextSibling {
|
||||
}
|
||||
|
||||
if fc == nil {
|
||||
return entity.Meta{}, fmt.Errorf("failed to find html tag")
|
||||
}
|
||||
|
||||
fc = fc.NextSibling
|
||||
if fc == nil {
|
||||
return entity.Meta{}, fmt.Errorf("failed to find html tag")
|
||||
}
|
||||
|
||||
for fc = fc.FirstChild; fc != nil && fc.Data != "head"; fc = fc.NextSibling {
|
||||
fmt.Println(fc.Data)
|
||||
}
|
||||
|
||||
if fc == nil {
|
||||
return entity.Meta{}, fmt.Errorf("failed to find html tag")
|
||||
}
|
||||
|
||||
meta := entity.Meta{}
|
||||
getMetaData(htmlNode, &meta)
|
||||
getMetaData(fc, &meta)
|
||||
meta.Encoding = encodingFromHeader(response.Header)
|
||||
|
||||
return meta, nil
|
||||
|
||||
26
adapters/processors/processors_test.go
Normal file
26
adapters/processors/processors_test.go
Normal file
@@ -0,0 +1,26 @@
|
||||
package processors
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/derfenix/webarchive/config"
|
||||
)
|
||||
|
||||
func TestProcessors_GetMeta(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx := context.Background()
|
||||
cfg, err := config.NewConfig(ctx)
|
||||
require.NoError(t, err)
|
||||
|
||||
procs, err := NewProcessors(cfg)
|
||||
require.NoError(t, err)
|
||||
|
||||
meta, err := procs.GetMeta(ctx, "https://habr.com/ru/companies/wirenboard/articles/722718/")
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "Сколько стоит умный дом? Рассказываю, как строил свой и что получилось за 1000 руб./м² / Хабр", meta.Title)
|
||||
}
|
||||
Reference in New Issue
Block a user