NOTE: This package is deprecated. Recommends use htmlquery and xmlquery package, get latest version to fixed some issues.
Golang package, lets you extract data from HTML/XML documents using XPath expression.
List of supported XPath functions you can found here XPath Package.
go get github.com/antchfx/xquery
Extract data from HTML document.
package main
import (
"github.com/antchfx/xpath"
"github.com/antchfx/xquery/html"
)
func main() {
// Load HTML file.
f, err := os.Open(`./examples/test.html`)
if err != nil {
panic(err)
}
// Parse HTML document.
doc, err := htmlquery.Parse(f)
if err != nil{
panic(err)
}
// Option 1: using xpath's expr to matches nodes.
expr := xpath.MustCompile("count(//div[@class='article'])")
fmt.Printf("%f \n", expr.Evaluate(htmlquery.CreateXPathNavigator(doc)).(float64))
expr = xpath.MustCompile("//a/@href")
iter := expr.Evaluate(htmlquery.CreateXPathNavigator(doc)).(*xpath.NodeIterator)
for iter.MoveNext() {
fmt.Printf("%s \n", iter.Current().Value()) // output href
}
// Option 2: using build-in functions Find() to matches nodes.
for _, n := range htmlquery.Find(doc, "//a/@href") {
fmt.Printf("%s \n", htmlquery.SelectAttr(n, "href")) // output href
}
}
Extract data from XML document.
package main
import (
"github.com/antchfx/xpath"
"github.com/antchfx/xquery/xml"
)
func main() {
// Load XML document from file.
f, err := os.Open(`./examples/test.xml`)
if err != nil {
panic(err)
}
// Parse XML document.
doc, err := xmlquery.Parse(f)
if err != nil{
panic(err)
}
// Option 1: using xpath's expr to matches nodes.
// sum all book's price via Evaluate()
expr, err := xpath.Compile("sum(//book/price)")
if err != nil {
panic(err)
}
fmt.Printf("total price: %f\n", expr.Evaluate(xmlquery.CreateXPathNavigator(doc)).(float64))
for _, n := range xmlquery.Find(doc, "//book") {
fmt.Printf("%s : %s \n", n.SelectAttr("id"), xmlquery.FindOne(n, "title").InnerText())
}
// Option 2: using build-in functions FindOne() to matches node.
n := xmlquery.FindOne(doc, "//book[@id='bk104']")
fmt.Printf("%s \n", n.OutputXML(true))
}