diff --git a/go.mod b/go.mod
index 58c65c96..d1bd6731 100644
--- a/go.mod
+++ b/go.mod
@@ -33,8 +33,8 @@ require (
github.com/PuerkitoBio/goquery v1.5.1 // indirect
github.com/andybalholm/cascadia v1.2.0 // indirect
github.com/antchfx/htmlquery v1.2.3 // indirect
- github.com/antchfx/xmlquery v1.2.4 // indirect
- github.com/antchfx/xpath v1.1.8 // indirect
+ github.com/antchfx/xmlquery v1.3.1 // indirect
+ github.com/antchfx/xpath v1.1.10 // indirect
github.com/denisenkom/go-mssqldb v0.0.0-20190915052044-aa4949efa320 // indirect
github.com/erikstmartin/go-testdb v0.0.0-20160219214506-8d10e4a1bae5 // indirect
github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a // indirect
diff --git a/go.sum b/go.sum
index 9d50739e..4a83870d 100644
--- a/go.sum
+++ b/go.sum
@@ -11,11 +11,13 @@ github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5
github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
github.com/antchfx/htmlquery v1.2.3 h1:sP3NFDneHx2stfNXCKbhHFo8XgNjCACnU/4AO5gWz6M=
github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0=
-github.com/antchfx/xmlquery v1.2.4 h1:T/SH1bYdzdjTMoz2RgsfVKbM5uWh3gjDYYepFqQmFv4=
github.com/antchfx/xmlquery v1.2.4/go.mod h1:KQQuESaxSlqugE2ZBcM/qn+ebIpt+d+4Xx7YcSGAIrM=
+github.com/antchfx/xmlquery v1.3.1 h1:nIKWdtnhrXtj0/IRUAAw2I7TfpHUa3zMnHvNmPXFg+w=
+github.com/antchfx/xmlquery v1.3.1/go.mod h1:64w0Xesg2sTaawIdNqMB+7qaW/bSqkQm+ssPaCMWNnc=
github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
-github.com/antchfx/xpath v1.1.8 h1:PcL6bIX42Px5usSx6xRYw/wjB3wYGkj0MJ9MBzEKVgk=
github.com/antchfx/xpath v1.1.8/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
+github.com/antchfx/xpath v1.1.10 h1:cJ0pOvEdN/WvYXxvRrzQH9x5QWKpzHacYO8qzCcDYAg=
+github.com/antchfx/xpath v1.1.10/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
github.com/apokalyptik/cfg v0.0.0-20160401174707-703f89116901 h1:0yiOsd1b8gHxSfR/1ROHSAjKNrhoXNY3CVcPPLl/rp0=
github.com/apokalyptik/cfg v0.0.0-20160401174707-703f89116901/go.mod h1:5a6I8lR9NZj4USqYDHMR/0eZgjYivY+a1syWE0NO1po=
github.com/bearcherian/rollzap v1.0.2 h1:Q74bycIl4F4VruPdcc7Py5zpByKaobUGk4PwVymVmUg=
@@ -200,6 +202,7 @@ golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
+golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20221002022538-bcab6841153b h1:6e93nYa3hNqAvLr0pD4PN1fFS+gKzp2zAXqrnTCstqU=
diff --git a/vendor/github.com/antchfx/xmlquery/.travis.yml b/vendor/github.com/antchfx/xmlquery/.travis.yml
index b99bee6a..c4d55b39 100644
--- a/vendor/github.com/antchfx/xmlquery/.travis.yml
+++ b/vendor/github.com/antchfx/xmlquery/.travis.yml
@@ -12,4 +12,4 @@ install:
- go get github.com/golang/groupcache
script:
- - $HOME/gopath/bin/goveralls -service=travis-ci
\ No newline at end of file
+ - $HOME/gopath/bin/goveralls -service=travis-ci
diff --git a/vendor/github.com/antchfx/xmlquery/README.md b/vendor/github.com/antchfx/xmlquery/README.md
index 8b3c35ee..410ae444 100644
--- a/vendor/github.com/antchfx/xmlquery/README.md
+++ b/vendor/github.com/antchfx/xmlquery/README.md
@@ -15,6 +15,9 @@ Overview
Change Logs
===
+2020-08-??
+- Add XML stream loading and parsing support.
+
2019-11-11
- Add XPath query caching.
@@ -48,26 +51,58 @@ if err != nil {
}
```
-#### Parse a XML from URL.
+#### Parse an XML from URL.
```go
doc, err := xmlquery.LoadURL("http://www.example.com/sitemap.xml")
```
-#### Parse a XML from string.
+#### Parse an XML from string.
```go
s := ``
doc, err := xmlquery.Parse(strings.NewReader(s))
```
-#### Parse a XML from io.Reader.
+#### Parse an XML from io.Reader.
```go
f, err := os.Open("../books.xml")
doc, err := xmlquery.Parse(f)
```
+#### Parse an XML in a stream fashion (simple case without element filtering).
+
+```go
+f, err := os.Open("../books.xml")
+p, err := xmlquery.CreateStreamParser(f, "/bookstore/book")
+for {
+ n, err := p.Read()
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ ...
+ }
+}
+```
+
+#### Parse an XML in a stream fashion (simple case advanced element filtering).
+
+```go
+f, err := os.Open("../books.xml")
+p, err := xmlquery.CreateStreamParser(f, "/bookstore/book", "/bookstore/book[price>=10]")
+for {
+ n, err := p.Read()
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ ...
+ }
+}
+```
+
#### Find authors of all books in the bookstore.
```go
@@ -210,11 +245,11 @@ func main(){
List of supported XPath query packages
===
-|Name |Description |
-|--------------------------|----------------|
-|[htmlquery](https://github.com/antchfx/htmlquery) | XPath query package for the HTML document|
-|[xmlquery](https://github.com/antchfx/xmlquery) | XPath query package for the XML document|
-|[jsonquery](https://github.com/antchfx/jsonquery) | XPath query package for the JSON document|
+| Name | Description |
+| ------------------------------------------------- | ----------------------------------------- |
+| [htmlquery](https://github.com/antchfx/htmlquery) | XPath query package for the HTML document |
+| [xmlquery](https://github.com/antchfx/xmlquery) | XPath query package for the XML document |
+| [jsonquery](https://github.com/antchfx/jsonquery) | XPath query package for the JSON document |
Questions
===
diff --git a/vendor/github.com/antchfx/xmlquery/node.go b/vendor/github.com/antchfx/xmlquery/node.go
index c57b0ed1..e0537482 100644
--- a/vendor/github.com/antchfx/xmlquery/node.go
+++ b/vendor/github.com/antchfx/xmlquery/node.go
@@ -3,13 +3,8 @@ package xmlquery
import (
"bytes"
"encoding/xml"
- "errors"
"fmt"
- "io"
- "net/http"
"strings"
-
- "golang.org/x/net/html/charset"
)
// A NodeType is the type of a Node.
@@ -146,7 +141,8 @@ func (n *Node) OutputXML(self bool) string {
return buf.String()
}
-func addAttr(n *Node, key, val string) {
+// AddAttr adds a new attribute specified by 'key' and 'val' to a node 'n'.
+func AddAttr(n *Node, key, val string) {
var attr xml.Attr
if i := strings.Index(key, ":"); i > 0 {
attr = xml.Attr{
@@ -163,10 +159,13 @@ func addAttr(n *Node, key, val string) {
n.Attr = append(n.Attr, attr)
}
-func addChild(parent, n *Node) {
+// AddChild adds a new node 'n' to a node 'parent' as its last child.
+func AddChild(parent, n *Node) {
n.Parent = parent
+ n.NextSibling = nil
if parent.FirstChild == nil {
parent.FirstChild = n
+ n.PrevSibling = nil
} else {
parent.LastChild.NextSibling = n
n.PrevSibling = parent.LastChild
@@ -175,153 +174,48 @@ func addChild(parent, n *Node) {
parent.LastChild = n
}
-func addSibling(sibling, n *Node) {
+// AddSibling adds a new node 'n' as a sibling of a given node 'sibling'.
+// Note it is not necessarily true that the new node 'n' would be added
+// immediately after 'sibling'. If 'sibling' isn't the last child of its
+// parent, then the new node 'n' will be added at the end of the sibling
+// chain of their parent.
+func AddSibling(sibling, n *Node) {
for t := sibling.NextSibling; t != nil; t = t.NextSibling {
sibling = t
}
n.Parent = sibling.Parent
sibling.NextSibling = n
n.PrevSibling = sibling
+ n.NextSibling = nil
if sibling.Parent != nil {
sibling.Parent.LastChild = n
}
}
-// LoadURL loads the XML document from the specified URL.
-func LoadURL(url string) (*Node, error) {
- resp, err := http.Get(url)
- if err != nil {
- return nil, err
+// RemoveFromTree removes a node and its subtree from the document
+// tree it is in. If the node is the root of the tree, then it's no-op.
+func RemoveFromTree(n *Node) {
+ if n.Parent == nil {
+ return
}
- defer resp.Body.Close()
- return parse(resp.Body)
-}
-
-func parse(r io.Reader) (*Node, error) {
- var (
- decoder = xml.NewDecoder(r)
- doc = &Node{Type: DocumentNode}
- space2prefix = make(map[string]string)
- level = 0
- )
- // http://www.w3.org/XML/1998/namespace is bound by definition to the prefix xml.
- space2prefix["http://www.w3.org/XML/1998/namespace"] = "xml"
- decoder.CharsetReader = charset.NewReaderLabel
- prev := doc
- for {
- tok, err := decoder.Token()
- switch {
- case err == io.EOF:
- goto quit
- case err != nil:
- return nil, err
+ if n.Parent.FirstChild == n {
+ if n.Parent.LastChild == n {
+ n.Parent.FirstChild = nil
+ n.Parent.LastChild = nil
+ } else {
+ n.Parent.FirstChild = n.NextSibling
+ n.NextSibling.PrevSibling = nil
}
-
- switch tok := tok.(type) {
- case xml.StartElement:
- if level == 0 {
- // mising XML declaration
- node := &Node{Type: DeclarationNode, Data: "xml", level: 1}
- addChild(prev, node)
- level = 1
- prev = node
- }
- // https://www.w3.org/TR/xml-names/#scoping-defaulting
- for _, att := range tok.Attr {
- if att.Name.Local == "xmlns" {
- space2prefix[att.Value] = ""
- } else if att.Name.Space == "xmlns" {
- space2prefix[att.Value] = att.Name.Local
- }
- }
-
- if tok.Name.Space != "" {
- if _, found := space2prefix[tok.Name.Space]; !found {
- return nil, errors.New("xmlquery: invalid XML document, namespace is missing")
- }
- }
-
- for i := 0; i < len(tok.Attr); i++ {
- att := &tok.Attr[i]
- if prefix, ok := space2prefix[att.Name.Space]; ok {
- att.Name.Space = prefix
- }
- }
-
- node := &Node{
- Type: ElementNode,
- Data: tok.Name.Local,
- Prefix: space2prefix[tok.Name.Space],
- NamespaceURI: tok.Name.Space,
- Attr: tok.Attr,
- level: level,
- }
- //fmt.Println(fmt.Sprintf("start > %s : %d", node.Data, level))
- if level == prev.level {
- addSibling(prev, node)
- } else if level > prev.level {
- addChild(prev, node)
- } else if level < prev.level {
- for i := prev.level - level; i > 1; i-- {
- prev = prev.Parent
- }
- addSibling(prev.Parent, node)
- }
- prev = node
- level++
- case xml.EndElement:
- level--
- case xml.CharData:
- node := &Node{Type: CharDataNode, Data: string(tok), level: level}
- if level == prev.level {
- addSibling(prev, node)
- } else if level > prev.level {
- addChild(prev, node)
- } else if level < prev.level {
- for i := prev.level - level; i > 1; i-- {
- prev = prev.Parent
- }
- addSibling(prev.Parent, node)
- }
- case xml.Comment:
- node := &Node{Type: CommentNode, Data: string(tok), level: level}
- if level == prev.level {
- addSibling(prev, node)
- } else if level > prev.level {
- addChild(prev, node)
- } else if level < prev.level {
- for i := prev.level - level; i > 1; i-- {
- prev = prev.Parent
- }
- addSibling(prev.Parent, node)
- }
- case xml.ProcInst: // Processing Instruction
- if prev.Type != DeclarationNode {
- level++
- }
- node := &Node{Type: DeclarationNode, Data: tok.Target, level: level}
- pairs := strings.Split(string(tok.Inst), " ")
- for _, pair := range pairs {
- pair = strings.TrimSpace(pair)
- if i := strings.Index(pair, "="); i > 0 {
- addAttr(node, pair[:i], strings.Trim(pair[i+1:], `"`))
- }
- }
- if level == prev.level {
- addSibling(prev, node)
- } else if level > prev.level {
- addChild(prev, node)
- }
- prev = node
- case xml.Directive:
+ } else {
+ if n.Parent.LastChild == n {
+ n.Parent.LastChild = n.PrevSibling
+ n.PrevSibling.NextSibling = nil
+ } else {
+ n.PrevSibling.NextSibling = n.NextSibling
+ n.NextSibling.PrevSibling = n.PrevSibling
}
-
}
-quit:
- return doc, nil
-}
-
-// Parse returns the parse tree for the XML from the given Reader.
-func Parse(r io.Reader) (*Node, error) {
- return parse(r)
+ n.Parent = nil
+ n.PrevSibling = nil
+ n.NextSibling = nil
}
diff --git a/vendor/github.com/antchfx/xmlquery/parse.go b/vendor/github.com/antchfx/xmlquery/parse.go
new file mode 100644
index 00000000..853ea018
--- /dev/null
+++ b/vendor/github.com/antchfx/xmlquery/parse.go
@@ -0,0 +1,311 @@
+package xmlquery
+
+import (
+ "encoding/xml"
+ "errors"
+ "fmt"
+ "io"
+ "net/http"
+ "strings"
+
+ "github.com/antchfx/xpath"
+ "golang.org/x/net/html/charset"
+)
+
+// LoadURL loads the XML document from the specified URL.
+func LoadURL(url string) (*Node, error) {
+ resp, err := http.Get(url)
+ if err != nil {
+ return nil, err
+ }
+ defer resp.Body.Close()
+ // Checking the HTTP Content-Type value from the response headers.(#39)
+ v := strings.ToLower(resp.Header.Get("Content-Type"))
+ if v == "text/xml" || v == "application/xml" {
+ return Parse(resp.Body)
+ }
+ return nil, fmt.Errorf("invalid XML document(%s)", v)
+}
+
+// Parse returns the parse tree for the XML from the given Reader.
+func Parse(r io.Reader) (*Node, error) {
+ p := createParser(r)
+ for {
+ _, err := p.parse()
+ if err == io.EOF {
+ return p.doc, nil
+ }
+ if err != nil {
+ return nil, err
+ }
+ }
+}
+
+type parser struct {
+ decoder *xml.Decoder
+ doc *Node
+ space2prefix map[string]string
+ level int
+ prev *Node
+ streamElementXPath *xpath.Expr // Under streaming mode, this specifies the xpath to the target element node(s).
+ streamElementFilter *xpath.Expr // If specified, it provides a futher filtering on the target element.
+ streamNode *Node // Need to remmeber the last target node So we can clean it up upon next Read() call.
+ streamNodePrev *Node // Need to remember target node's prev so upon target node removal, we can restore correct prev.
+}
+
+func createParser(r io.Reader) *parser {
+ p := &parser{
+ decoder: xml.NewDecoder(r),
+ doc: &Node{Type: DocumentNode},
+ space2prefix: make(map[string]string),
+ level: 0,
+ }
+ // http://www.w3.org/XML/1998/namespace is bound by definition to the prefix xml.
+ p.space2prefix["http://www.w3.org/XML/1998/namespace"] = "xml"
+ p.decoder.CharsetReader = charset.NewReaderLabel
+ p.prev = p.doc
+ return p
+}
+
+func (p *parser) parse() (*Node, error) {
+ var streamElementNodeCounter int
+
+ for {
+ tok, err := p.decoder.Token()
+ if err != nil {
+ return nil, err
+ }
+
+ switch tok := tok.(type) {
+ case xml.StartElement:
+ if p.level == 0 {
+ // mising XML declaration
+ node := &Node{Type: DeclarationNode, Data: "xml", level: 1}
+ AddChild(p.prev, node)
+ p.level = 1
+ p.prev = node
+ }
+ // https://www.w3.org/TR/xml-names/#scoping-defaulting
+ for _, att := range tok.Attr {
+ if att.Name.Local == "xmlns" {
+ p.space2prefix[att.Value] = ""
+ } else if att.Name.Space == "xmlns" {
+ p.space2prefix[att.Value] = att.Name.Local
+ }
+ }
+
+ if tok.Name.Space != "" {
+ if _, found := p.space2prefix[tok.Name.Space]; !found {
+ return nil, errors.New("xmlquery: invalid XML document, namespace is missing")
+ }
+ }
+
+ for i := 0; i < len(tok.Attr); i++ {
+ att := &tok.Attr[i]
+ if prefix, ok := p.space2prefix[att.Name.Space]; ok {
+ att.Name.Space = prefix
+ }
+ }
+
+ node := &Node{
+ Type: ElementNode,
+ Data: tok.Name.Local,
+ Prefix: p.space2prefix[tok.Name.Space],
+ NamespaceURI: tok.Name.Space,
+ Attr: tok.Attr,
+ level: p.level,
+ }
+ //fmt.Println(fmt.Sprintf("start > %s : %d", node.Data, node.level))
+ if p.level == p.prev.level {
+ AddSibling(p.prev, node)
+ } else if p.level > p.prev.level {
+ AddChild(p.prev, node)
+ } else if p.level < p.prev.level {
+ for i := p.prev.level - p.level; i > 1; i-- {
+ p.prev = p.prev.Parent
+ }
+ AddSibling(p.prev.Parent, node)
+ }
+ // If we're in the streaming mode, we need to remember the node if it is the target node
+ // so that when we finish processing the node's EndElement, we know how/what to return to
+ // caller. Also we need to remove the target node from the tree upon next Read() call so
+ // memory doesn't grow unbounded.
+ if p.streamElementXPath != nil {
+ if p.streamNode == nil {
+ if QuerySelector(p.doc, p.streamElementXPath) != nil {
+ p.streamNode = node
+ p.streamNodePrev = p.prev
+ streamElementNodeCounter = 1
+ }
+ } else {
+ streamElementNodeCounter++
+ }
+ }
+ p.prev = node
+ p.level++
+ case xml.EndElement:
+ p.level--
+ // If we're in streaming mode, and we already have a potential streaming
+ // target node identified (p.streamNode != nil) then we need to check if
+ // this is the real one we want to return to caller.
+ if p.streamNode != nil {
+ streamElementNodeCounter--
+ if streamElementNodeCounter == 0 {
+ // Now we know this element node is the at least passing the initial
+ // p.streamElementXPath check and is a potential target node candidate.
+ // We need to have 1 more check with p.streamElementFilter (if given) to
+ // ensure it is really the element node we want.
+ // The reason we need a two-step check process is because the following
+ // situation:
+ // b1
+ // And say the p.streamElementXPath = "/AAA/BBB[. != 'b1']". Now during
+ // xml.StartElement time, the node is still empty, so it will pass
+ // the p.streamElementXPath check. However, eventually we know this
+ // shouldn't be returned to the caller. Having a second more fine-grained
+ // filter check ensures that. So in this case, the caller should really
+ // setup the stream parser with:
+ // streamElementXPath = "/AAA/BBB["
+ // streamElementFilter = "/AAA/BBB[. != 'b1']"
+ if p.streamElementFilter == nil || QuerySelector(p.doc, p.streamElementFilter) != nil {
+ return p.streamNode, nil
+ }
+ // otherwise, this isn't our target node, clean things up.
+ // note we also remove the underlying *Node from the node tree, to prevent
+ // future stream node candidate selection error.
+ RemoveFromTree(p.streamNode)
+ p.prev = p.streamNodePrev
+ p.streamNode = nil
+ p.streamNodePrev = nil
+ }
+ }
+ case xml.CharData:
+ node := &Node{Type: CharDataNode, Data: string(tok), level: p.level}
+ if p.level == p.prev.level {
+ AddSibling(p.prev, node)
+ } else if p.level > p.prev.level {
+ AddChild(p.prev, node)
+ } else if p.level < p.prev.level {
+ for i := p.prev.level - p.level; i > 1; i-- {
+ p.prev = p.prev.Parent
+ }
+ AddSibling(p.prev.Parent, node)
+ }
+ case xml.Comment:
+ node := &Node{Type: CommentNode, Data: string(tok), level: p.level}
+ if p.level == p.prev.level {
+ AddSibling(p.prev, node)
+ } else if p.level > p.prev.level {
+ AddChild(p.prev, node)
+ } else if p.level < p.prev.level {
+ for i := p.prev.level - p.level; i > 1; i-- {
+ p.prev = p.prev.Parent
+ }
+ AddSibling(p.prev.Parent, node)
+ }
+ case xml.ProcInst: // Processing Instruction
+ if p.prev.Type != DeclarationNode {
+ p.level++
+ }
+ node := &Node{Type: DeclarationNode, Data: tok.Target, level: p.level}
+ pairs := strings.Split(string(tok.Inst), " ")
+ for _, pair := range pairs {
+ pair = strings.TrimSpace(pair)
+ if i := strings.Index(pair, "="); i > 0 {
+ AddAttr(node, pair[:i], strings.Trim(pair[i+1:], `"`))
+ }
+ }
+ if p.level == p.prev.level {
+ AddSibling(p.prev, node)
+ } else if p.level > p.prev.level {
+ AddChild(p.prev, node)
+ }
+ p.prev = node
+ case xml.Directive:
+ }
+ }
+}
+
+// StreamParser enables loading and parsing an XML document in a streaming fashion.
+type StreamParser struct {
+ p *parser
+}
+
+// CreateStreamParser creates a StreamParser. Argument streamElementXPath is required.
+// Argument streamElementFilter is optional and should only be used in advanced scenarios.
+//
+// Scenario 1: simple case:
+// xml := `b1b2`
+// sp, err := CreateStreamParser(strings.NewReader(xml), "/AAA/BBB")
+// if err != nil {
+// panic(err)
+// }
+// for {
+// n, err := sp.Read()
+// if err != nil {
+// break
+// }
+// fmt.Println(n.OutputXML(true))
+// }
+// Output will be:
+// b1
+// b2
+//
+// Scenario 2: advanced case:
+// xml := `b1b2`
+// sp, err := CreateStreamParser(strings.NewReader(xml), "/AAA/BBB", "/AAA/BBB[. != 'b1']")
+// if err != nil {
+// panic(err)
+// }
+// for {
+// n, err := sp.Read()
+// if err != nil {
+// break
+// }
+// fmt.Println(n.OutputXML(true))
+// }
+// Output will be:
+// b2
+//
+// As the argument names indicate, streamElementXPath should be used for providing xpath query pointing
+// to the target element node only, no extra filtering on the element itself or its children; while
+// streamElementFilter, if needed, can provide additional filtering on the target element and its children.
+//
+// CreateStreamParser returns error if either streamElementXPath or streamElementFilter, if provided, cannot
+// be successfully parsed and compiled into a valid xpath query.
+func CreateStreamParser(r io.Reader, streamElementXPath string, streamElementFilter ...string) (*StreamParser, error) {
+ elemXPath, err := getQuery(streamElementXPath)
+ if err != nil {
+ return nil, fmt.Errorf("invalid streamElementXPath '%s', err: %s", streamElementXPath, err.Error())
+ }
+ elemFilter := (*xpath.Expr)(nil)
+ if len(streamElementFilter) > 0 {
+ elemFilter, err = getQuery(streamElementFilter[0])
+ if err != nil {
+ return nil, fmt.Errorf("invalid streamElementFilter '%s', err: %s", streamElementFilter[0], err.Error())
+ }
+ }
+ sp := &StreamParser{
+ p: createParser(r),
+ }
+ sp.p.streamElementXPath = elemXPath
+ sp.p.streamElementFilter = elemFilter
+ return sp, nil
+}
+
+// Read returns a target node that satisifies the XPath specified by caller at StreamParser creation
+// time. If there is no more satisifying target node after reading the rest of the XML document, io.EOF
+// will be returned. At any time, any XML parsing error encountered, the error will be returned and
+// the stream parsing is stopped. Calling Read() after an error is returned (including io.EOF) is not
+// allowed the behavior will be undefined. Also note, due to the streaming nature, calling Read() will
+// automatically remove any previous target node(s) from the document tree.
+func (sp *StreamParser) Read() (*Node, error) {
+ // Because this is a streaming read, we need to release/remove last
+ // target node from the node tree to free up memory.
+ if sp.p.streamNode != nil {
+ RemoveFromTree(sp.p.streamNode)
+ sp.p.prev = sp.p.streamNodePrev
+ sp.p.streamNode = nil
+ sp.p.streamNodePrev = nil
+ }
+ return sp.p.parse()
+}
diff --git a/vendor/github.com/antchfx/xpath/func.go b/vendor/github.com/antchfx/xpath/func.go
index 3873e33f..bcfee55b 100644
--- a/vendor/github.com/antchfx/xpath/func.go
+++ b/vendor/github.com/antchfx/xpath/func.go
@@ -4,11 +4,26 @@ import (
"errors"
"fmt"
"math"
- "regexp"
"strconv"
"strings"
+ "sync"
+ "unicode"
)
+// Defined an interface of stringBuilder that compatible with
+// strings.Builder(go 1.10) and bytes.Buffer(< go 1.10)
+type stringBuilder interface {
+ WriteRune(r rune) (n int, err error)
+ WriteString(s string) (int, error)
+ Reset()
+ Grow(n int)
+ String() string
+}
+
+var builderPool = sync.Pool{New: func() interface{} {
+ return newStringBuilder()
+}}
+
// The XPath function list.
func predicate(q query) func(NodeNavigator) bool {
@@ -58,6 +73,7 @@ func lastFunc(q query, t iterator) interface{} {
// countFunc is a XPath Node Set functions count(node-set).
func countFunc(q query, t iterator) interface{} {
var count = 0
+ q = functionArgs(q)
test := predicate(q)
switch typ := q.Evaluate(t).(type) {
case query:
@@ -73,7 +89,7 @@ func countFunc(q query, t iterator) interface{} {
// sumFunc is a XPath Node Set functions sum(node-set).
func sumFunc(q query, t iterator) interface{} {
var sum float64
- switch typ := q.Evaluate(t).(type) {
+ switch typ := functionArgs(q).Evaluate(t).(type) {
case query:
for node := typ.Select(t); node != nil; node = typ.Select(t) {
if v, err := strconv.ParseFloat(node.Value(), 64); err == nil {
@@ -116,19 +132,19 @@ func asNumber(t iterator, o interface{}) float64 {
// ceilingFunc is a XPath Node Set functions ceiling(node-set).
func ceilingFunc(q query, t iterator) interface{} {
- val := asNumber(t, q.Evaluate(t))
+ val := asNumber(t, functionArgs(q).Evaluate(t))
return math.Ceil(val)
}
// floorFunc is a XPath Node Set functions floor(node-set).
func floorFunc(q query, t iterator) interface{} {
- val := asNumber(t, q.Evaluate(t))
+ val := asNumber(t, functionArgs(q).Evaluate(t))
return math.Floor(val)
}
// roundFunc is a XPath Node Set functions round(node-set).
func roundFunc(q query, t iterator) interface{} {
- val := asNumber(t, q.Evaluate(t))
+ val := asNumber(t, functionArgs(q).Evaluate(t))
//return math.Round(val)
return round(val)
}
@@ -201,7 +217,7 @@ func asBool(t iterator, v interface{}) bool {
case *NodeIterator:
return v.MoveNext()
case bool:
- return bool(v)
+ return v
case float64:
return v != 0
case string:
@@ -239,19 +255,19 @@ func asString(t iterator, v interface{}) string {
// booleanFunc is a XPath functions boolean([node-set]).
func booleanFunc(q query, t iterator) interface{} {
- v := q.Evaluate(t)
+ v := functionArgs(q).Evaluate(t)
return asBool(t, v)
}
// numberFunc is a XPath functions number([node-set]).
func numberFunc(q query, t iterator) interface{} {
- v := q.Evaluate(t)
+ v := functionArgs(q).Evaluate(t)
return asNumber(t, v)
}
// stringFunc is a XPath functions string([node-set]).
func stringFunc(q query, t iterator) interface{} {
- v := q.Evaluate(t)
+ v := functionArgs(q).Evaluate(t)
return asString(t, v)
}
@@ -338,15 +354,10 @@ func containsFunc(arg1, arg2 query) func(query, iterator) interface{} {
}
}
-var (
- regnewline = regexp.MustCompile(`[\r\n\t]`)
- regseqspace = regexp.MustCompile(`\s{2,}`)
-)
-
// normalizespaceFunc is XPath functions normalize-space(string?)
func normalizespaceFunc(q query, t iterator) interface{} {
var m string
- switch typ := q.Evaluate(t).(type) {
+ switch typ := functionArgs(q).Evaluate(t).(type) {
case string:
m = typ
case query:
@@ -356,10 +367,26 @@ func normalizespaceFunc(q query, t iterator) interface{} {
}
m = node.Value()
}
- m = strings.TrimSpace(m)
- m = regnewline.ReplaceAllString(m, " ")
- m = regseqspace.ReplaceAllString(m, " ")
- return m
+ var b = builderPool.Get().(stringBuilder)
+ b.Grow(len(m))
+
+ runeStr := []rune(strings.TrimSpace(m))
+ l := len(runeStr)
+ for i := range runeStr {
+ r := runeStr[i]
+ isSpace := unicode.IsSpace(r)
+ if !(isSpace && (i+1 < l && unicode.IsSpace(runeStr[i+1]))) {
+ if isSpace {
+ r = ' '
+ }
+ b.WriteRune(r)
+ }
+ }
+ result := b.String()
+ b.Reset()
+ builderPool.Put(b)
+
+ return result
}
// substringFunc is XPath functions substring function returns a part of a given string.
@@ -466,7 +493,7 @@ func translateFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} {
src := asString(t, functionArgs(arg2).Evaluate(t))
dst := asString(t, functionArgs(arg3).Evaluate(t))
- var replace []string
+ replace := make([]string, 0, len(src))
for i, s := range src {
d := ""
if i < len(dst) {
@@ -491,7 +518,7 @@ func replaceFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} {
// notFunc is XPATH functions not(expression) function operation.
func notFunc(q query, t iterator) interface{} {
- switch v := q.Evaluate(t).(type) {
+ switch v := functionArgs(q).Evaluate(t).(type) {
case bool:
return !v
case query:
@@ -507,20 +534,25 @@ func notFunc(q query, t iterator) interface{} {
// concat( string1 , string2 [, stringn]* )
func concatFunc(args ...query) func(query, iterator) interface{} {
return func(q query, t iterator) interface{} {
- var a []string
+ b := builderPool.Get().(stringBuilder)
for _, v := range args {
v = functionArgs(v)
+
switch v := v.Evaluate(t).(type) {
case string:
- a = append(a, v)
+ b.WriteString(v)
case query:
node := v.Select(t)
if node != nil {
- a = append(a, node.Value())
+ b.WriteString(node.Value())
}
}
}
- return strings.Join(a, "")
+ result := b.String()
+ b.Reset()
+ builderPool.Put(b)
+
+ return result
}
}
diff --git a/vendor/github.com/antchfx/xpath/func_go110.go b/vendor/github.com/antchfx/xpath/func_go110.go
index 500880fa..6df30d3d 100644
--- a/vendor/github.com/antchfx/xpath/func_go110.go
+++ b/vendor/github.com/antchfx/xpath/func_go110.go
@@ -2,8 +2,15 @@
package xpath
-import "math"
+import (
+ "math"
+ "strings"
+)
func round(f float64) int {
return int(math.Round(f))
}
+
+func newStringBuilder() stringBuilder{
+ return &strings.Builder{}
+}
diff --git a/vendor/github.com/antchfx/xpath/func_pre_go110.go b/vendor/github.com/antchfx/xpath/func_pre_go110.go
index 043616b3..335141f7 100644
--- a/vendor/github.com/antchfx/xpath/func_pre_go110.go
+++ b/vendor/github.com/antchfx/xpath/func_pre_go110.go
@@ -2,7 +2,10 @@
package xpath
-import "math"
+import (
+ "bytes"
+ "math"
+)
// math.Round() is supported by Go 1.10+,
// This method just compatible for version <1.10.
@@ -13,3 +16,7 @@ func round(f float64) int {
}
return int(f + math.Copysign(0.5, f))
}
+
+func newStringBuilder() stringBuilder {
+ return &bytes.Buffer{}
+}
diff --git a/vendor/github.com/antchfx/xpath/operator.go b/vendor/github.com/antchfx/xpath/operator.go
index f9c10bcd..8c2f31f8 100644
--- a/vendor/github.com/antchfx/xpath/operator.go
+++ b/vendor/github.com/antchfx/xpath/operator.go
@@ -173,7 +173,7 @@ func cmpNodeSetNodeSet(t iterator, op string, m, n interface{}) bool {
if y == nil {
return false
}
- return cmpStringStringF(op,x.Value(),y.Value())
+ return cmpStringStringF(op, x.Value(), y.Value())
}
func cmpStringNumeric(t iterator, op string, m, n interface{}) bool {
diff --git a/vendor/modules.txt b/vendor/modules.txt
index 90a2b316..062eea6f 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -10,10 +10,10 @@ github.com/andybalholm/cascadia
# github.com/antchfx/htmlquery v1.2.3
## explicit; go 1.14
github.com/antchfx/htmlquery
-# github.com/antchfx/xmlquery v1.2.4
+# github.com/antchfx/xmlquery v1.3.1
## explicit; go 1.14
github.com/antchfx/xmlquery
-# github.com/antchfx/xpath v1.1.8
+# github.com/antchfx/xpath v1.1.10
## explicit
github.com/antchfx/xpath
# github.com/apokalyptik/cfg v0.0.0-20160401174707-703f89116901