-
Notifications
You must be signed in to change notification settings - Fork 94
/
decoder.go
155 lines (130 loc) · 3.38 KB
/
decoder.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
package xml2json
import (
"encoding/xml"
"io"
"unicode"
"golang.org/x/net/html/charset"
)
const (
attrPrefix = "-"
contentPrefix = "#"
)
// A Decoder reads and decodes XML objects from an input stream.
type Decoder struct {
r io.Reader
err error
attributePrefix string
contentPrefix string
excludeAttrs map[string]bool
formatters []nodeFormatter
}
type element struct {
parent *element
n *Node
label string
}
func (dec *Decoder) SetAttributePrefix(prefix string) {
dec.attributePrefix = prefix
}
func (dec *Decoder) SetContentPrefix(prefix string) {
dec.contentPrefix = prefix
}
func (dec *Decoder) AddFormatters(formatters []nodeFormatter) {
dec.formatters = formatters
}
func (dec *Decoder) ExcludeAttributes(attrs []string) {
for _, attr := range attrs {
dec.excludeAttrs[attr] = true
}
}
func (dec *Decoder) DecodeWithCustomPrefixes(root *Node, contentPrefix string, attributePrefix string) error {
dec.contentPrefix = contentPrefix
dec.attributePrefix = attributePrefix
return dec.Decode(root)
}
// NewDecoder returns a new decoder that reads from r.
func NewDecoder(r io.Reader, plugins ...plugin) *Decoder {
d := &Decoder{r: r, contentPrefix: contentPrefix, attributePrefix: attrPrefix, excludeAttrs: map[string]bool{}}
for _, p := range plugins {
d = p.AddToDecoder(d)
}
return d
}
// Decode reads the next JSON-encoded value from its
// input and stores it in the value pointed to by v.
func (dec *Decoder) Decode(root *Node) error {
xmlDec := xml.NewDecoder(dec.r)
// That will convert the charset if the provided XML is non-UTF-8
xmlDec.CharsetReader = charset.NewReaderLabel
// Create first element from the root node
elem := &element{
parent: nil,
n: root,
}
for {
t, _ := xmlDec.Token()
if t == nil {
break
}
switch se := t.(type) {
case xml.StartElement:
// Build new a new current element and link it to its parent
elem = &element{
parent: elem,
n: &Node{},
label: se.Name.Local,
}
// Extract attributes as children
for _, a := range se.Attr {
if _, ok := dec.excludeAttrs[a.Name.Local]; ok {
continue
}
elem.n.AddChild(dec.attributePrefix+a.Name.Local, &Node{Data: a.Value})
}
case xml.CharData:
// Extract XML data (if any)
elem.n.Data = trimNonGraphic(string(xml.CharData(se)))
case xml.EndElement:
// And add it to its parent list
if elem.parent != nil {
elem.parent.n.AddChild(elem.label, elem.n)
}
// Then change the current element to its parent
elem = elem.parent
}
}
for _, formatter := range dec.formatters {
formatter.Format(root)
}
return nil
}
// trimNonGraphic returns a slice of the string s, with all leading and trailing
// non graphic characters and spaces removed.
//
// Graphic characters include letters, marks, numbers, punctuation, symbols,
// and spaces, from categories L, M, N, P, S, Zs.
// Spacing characters are set by category Z and property Pattern_White_Space.
func trimNonGraphic(s string) string {
if s == "" {
return s
}
var first *int
var last int
for i, r := range []rune(s) {
if !unicode.IsGraphic(r) || unicode.IsSpace(r) {
continue
}
if first == nil {
f := i // copy i
first = &f
last = i
} else {
last = i
}
}
// If first is nil, it means there are no graphic characters
if first == nil {
return ""
}
return string([]rune(s)[*first : last+1])
}