package htmlx import ( "io" "strings" "golang.org/x/net/html" ) // ExtractAnchors returns all hrefs from tags. func ExtractAnchors(r io.Reader) []string { tokens := html.NewTokenizer(r) var hrefs []string for { t := tokens.Next() switch t { case html.StartTagToken, html.SelfClosingTagToken: tn, hasAttr := tokens.TagName() if string(tn) != "a" || !hasAttr { continue } for { key, val, more := tokens.TagAttr() if string(key) == "href" { v := strings.TrimSpace(string(val)) if v != "" { hrefs = append(hrefs, v) } } if !more { break } } case html.ErrorToken: return hrefs } } } // ExtractTitle returns the text content of the first element. func ExtractTitle(r io.Reader) string { tokens := html.NewTokenizer(r) for { switch tokens.Next() { case html.StartTagToken: name, _ := tokens.TagName() if string(name) == "title" { if tokens.Next() == html.TextToken { t := strings.TrimSpace(string(tokens.Text())) return t } } case html.ErrorToken: return "" } } }