39 lines
670 B
Go
39 lines
670 B
Go
package htmlx
|
|
|
|
import (
|
|
"io"
|
|
"strings"
|
|
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
// ExtractAnchors returns all hrefs from <a> tags.
|
|
func ExtractAnchors(r io.Reader) []string {
|
|
tokens := html.NewTokenizer(r)
|
|
var hrefs []string
|
|
for {
|
|
t := tokens.Next()
|
|
switch t {
|
|
case html.StartTagToken, html.SelfClosingTagToken:
|
|
tn, hasAttr := tokens.TagName()
|
|
if string(tn) != "a" || !hasAttr {
|
|
continue
|
|
}
|
|
for {
|
|
key, val, more := tokens.TagAttr()
|
|
if string(key) == "href" {
|
|
v := strings.TrimSpace(string(val))
|
|
if v != "" {
|
|
hrefs = append(hrefs, v)
|
|
}
|
|
}
|
|
if !more {
|
|
break
|
|
}
|
|
}
|
|
case html.ErrorToken:
|
|
return hrefs
|
|
}
|
|
}
|
|
}
|