58 lines
1.1 KiB
Go
58 lines
1.1 KiB
Go
package htmlx
|
|
|
|
import (
|
|
"io"
|
|
"strings"
|
|
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
// ExtractAnchors returns all hrefs from <a> tags.
|
|
func ExtractAnchors(r io.Reader) []string {
|
|
tokens := html.NewTokenizer(r)
|
|
var hrefs []string
|
|
for {
|
|
t := tokens.Next()
|
|
switch t {
|
|
case html.StartTagToken, html.SelfClosingTagToken:
|
|
tn, hasAttr := tokens.TagName()
|
|
if string(tn) != "a" || !hasAttr {
|
|
continue
|
|
}
|
|
for {
|
|
key, val, more := tokens.TagAttr()
|
|
if string(key) == "href" {
|
|
v := strings.TrimSpace(string(val))
|
|
if v != "" {
|
|
hrefs = append(hrefs, v)
|
|
}
|
|
}
|
|
if !more {
|
|
break
|
|
}
|
|
}
|
|
case html.ErrorToken:
|
|
return hrefs
|
|
}
|
|
}
|
|
}
|
|
|
|
// ExtractTitle returns the text content of the first <title> element.
|
|
func ExtractTitle(r io.Reader) string {
|
|
tokens := html.NewTokenizer(r)
|
|
for {
|
|
switch tokens.Next() {
|
|
case html.StartTagToken:
|
|
name, _ := tokens.TagName()
|
|
if string(name) == "title" {
|
|
if tokens.Next() == html.TextToken {
|
|
t := strings.TrimSpace(string(tokens.Text()))
|
|
return t
|
|
}
|
|
}
|
|
case html.ErrorToken:
|
|
return ""
|
|
}
|
|
}
|
|
}
|