package htmlx
import (
"io"
"strings"
"golang.org/x/net/html"
)
// ExtractAnchors returns all hrefs from tags.
func ExtractAnchors(r io.Reader) []string {
tokens := html.NewTokenizer(r)
var hrefs []string
for {
t := tokens.Next()
switch t {
case html.StartTagToken, html.SelfClosingTagToken:
tn, hasAttr := tokens.TagName()
if string(tn) != "a" || !hasAttr {
continue
}
for {
key, val, more := tokens.TagAttr()
if string(key) == "href" {
v := strings.TrimSpace(string(val))
if v != "" {
hrefs = append(hrefs, v)
}
}
if !more {
break
}
}
case html.ErrorToken:
return hrefs
}
}
}
// ExtractTitle returns the text content of the first element.
func ExtractTitle(r io.Reader) string {
tokens := html.NewTokenizer(r)
for {
switch tokens.Next() {
case html.StartTagToken:
name, _ := tokens.TagName()
if string(name) == "title" {
if tokens.Next() == html.TextToken {
t := strings.TrimSpace(string(tokens.Text()))
return t
}
}
case html.ErrorToken:
return ""
}
}
}