exclusivecanonicalization.go 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. package signedxml
  2. import (
  3. "sort"
  4. "strings"
  5. "github.com/beevik/etree"
  6. )
  7. // the attribute and attributes structs are used to implement the sort.Interface
  8. type attribute struct {
  9. prefix, uri, key, value string
  10. }
  11. type attributes []attribute
  12. func (a attributes) Len() int {
  13. return len(a)
  14. }
  15. // Less is part of the sort.Interface, and is used to order attributes by their
  16. // namespace URIs and then by their keys.
  17. func (a attributes) Less(i, j int) bool {
  18. if a[i].uri == "" && a[j].uri != "" {
  19. return true
  20. }
  21. if a[j].uri == "" && a[i].uri != "" {
  22. return false
  23. }
  24. iQual := a[i].uri + a[i].key
  25. jQual := a[j].uri + a[j].key
  26. return iQual < jQual
  27. }
  28. func (a attributes) Swap(i, j int) {
  29. a[i], a[j] = a[j], a[i]
  30. }
  31. // ExclusiveCanonicalization implements the CanonicalizationAlgorithm
  32. // interface and is used for processing the
  33. // http://www.w3.org/2001/10/xml-exc-c14n# and
  34. // http://www.w3.org/2001/10/xml-exc-c14n#WithComments transform
  35. // algorithms
  36. type ExclusiveCanonicalization struct {
  37. WithComments bool
  38. inclusiveNamespacePrefixList []string
  39. namespaces map[string]string
  40. }
  41. // Process is called to transfrom the XML using the ExclusiveCanonicalization
  42. // algorithm
  43. func (e ExclusiveCanonicalization) Process(inputXML string,
  44. transformXML string) (outputXML string, err error) {
  45. e.namespaces = make(map[string]string)
  46. doc := etree.NewDocument()
  47. doc.WriteSettings.CanonicalEndTags = true
  48. doc.WriteSettings.CanonicalText = true
  49. doc.WriteSettings.CanonicalAttrVal = true
  50. err = doc.ReadFromString(inputXML)
  51. if err != nil {
  52. return "", err
  53. }
  54. e.loadPrefixList(transformXML)
  55. e.processDocLevelNodes(doc)
  56. e.processRecursive(doc.Root(), nil, "")
  57. outputXML, err = doc.WriteToString()
  58. return outputXML, err
  59. }
  60. func (e *ExclusiveCanonicalization) loadPrefixList(transformXML string) {
  61. if transformXML != "" {
  62. tDoc := etree.NewDocument()
  63. tDoc.ReadFromString(transformXML)
  64. inclNSNode := tDoc.Root().SelectElement("InclusiveNamespaces")
  65. if inclNSNode != nil {
  66. prefixList := inclNSNode.SelectAttrValue("PrefixList", "")
  67. if prefixList != "" {
  68. e.inclusiveNamespacePrefixList = strings.Split(prefixList, " ")
  69. }
  70. }
  71. }
  72. }
  73. // process nodes outside of the root element
  74. func (e ExclusiveCanonicalization) processDocLevelNodes(doc *etree.Document) {
  75. // keep track of the previous node action to manage line returns in CharData
  76. previousNodeRemoved := false
  77. for i := 0; i < len(doc.Child); i++ {
  78. c := doc.Child[i]
  79. switch c := c.(type) {
  80. case *etree.Comment:
  81. if e.WithComments {
  82. previousNodeRemoved = false
  83. } else {
  84. removeTokenFromDocument(c, doc)
  85. i--
  86. previousNodeRemoved = true
  87. }
  88. case *etree.CharData:
  89. if isWhitespace(c.Data) {
  90. if previousNodeRemoved {
  91. removeTokenFromDocument(c, doc)
  92. i--
  93. previousNodeRemoved = true
  94. } else {
  95. c.Data = "\n"
  96. }
  97. }
  98. case *etree.Directive:
  99. removeTokenFromDocument(c, doc)
  100. i--
  101. previousNodeRemoved = true
  102. case *etree.ProcInst:
  103. // remove declaration, but leave other PI's
  104. if c.Target == "xml" {
  105. removeTokenFromDocument(c, doc)
  106. i--
  107. previousNodeRemoved = true
  108. } else {
  109. previousNodeRemoved = false
  110. }
  111. default:
  112. previousNodeRemoved = false
  113. }
  114. }
  115. // if the last line is CharData whitespace, then remove it
  116. if c, ok := doc.Child[len(doc.Child)-1].(*etree.CharData); ok {
  117. if isWhitespace(c.Data) {
  118. removeTokenFromDocument(c, doc)
  119. }
  120. }
  121. }
  122. func (e ExclusiveCanonicalization) processRecursive(node *etree.Element,
  123. prefixesInScope []string, defaultNS string) {
  124. newDefaultNS, newPrefixesInScope :=
  125. e.renderAttributes(node, prefixesInScope, defaultNS)
  126. for _, child := range node.Child {
  127. oldNamespaces := e.namespaces
  128. e.namespaces = copyNamespace(oldNamespaces)
  129. switch child := child.(type) {
  130. case *etree.Comment:
  131. if !e.WithComments {
  132. removeTokenFromElement(etree.Token(child), node)
  133. }
  134. case *etree.Element:
  135. e.processRecursive(child, newPrefixesInScope, newDefaultNS)
  136. }
  137. e.namespaces = oldNamespaces
  138. }
  139. }
  140. func (e ExclusiveCanonicalization) renderAttributes(node *etree.Element,
  141. prefixesInScope []string, defaultNS string) (newDefaultNS string,
  142. newPrefixesInScope []string) {
  143. currentNS := node.SelectAttrValue("xmlns", defaultNS)
  144. elementAttributes := []etree.Attr{}
  145. nsListToRender := make(map[string]string)
  146. attrListToRender := attributes{}
  147. // load map with for prefix -> uri lookup
  148. for _, attr := range node.Attr {
  149. if attr.Space == "xmlns" {
  150. e.namespaces[attr.Key] = attr.Value
  151. }
  152. }
  153. // handle the namespace of the node itself
  154. if node.Space != "" {
  155. if !contains(prefixesInScope, node.Space) {
  156. nsListToRender["xmlns:"+node.Space] = e.namespaces[node.Space]
  157. prefixesInScope = append(prefixesInScope, node.Space)
  158. }
  159. } else if defaultNS != currentNS {
  160. newDefaultNS = currentNS
  161. elementAttributes = append(elementAttributes,
  162. etree.Attr{Key: "xmlns", Value: currentNS})
  163. }
  164. for _, attr := range node.Attr {
  165. // include the namespaces if they are in the inclusiveNamespacePrefixList
  166. if attr.Space == "xmlns" {
  167. if !contains(prefixesInScope, attr.Key) &&
  168. contains(e.inclusiveNamespacePrefixList, attr.Key) {
  169. nsListToRender["xmlns:"+attr.Key] = attr.Value
  170. prefixesInScope = append(prefixesInScope, attr.Key)
  171. }
  172. }
  173. // include namespaces for qualfied attributes
  174. if attr.Space != "" &&
  175. attr.Space != "xmlns" &&
  176. !contains(prefixesInScope, attr.Space) {
  177. if attr.Space != "xml"{
  178. nsListToRender["xmlns:"+attr.Space] = e.namespaces[attr.Space]
  179. }
  180. prefixesInScope = append(prefixesInScope, attr.Space)
  181. }
  182. // inclued all non-namespace attributes
  183. if attr.Space != "xmlns" && attr.Key != "xmlns" {
  184. attrListToRender = append(attrListToRender,
  185. attribute{
  186. prefix: attr.Space,
  187. uri: e.namespaces[attr.Space],
  188. key: attr.Key,
  189. value: attr.Value,
  190. })
  191. }
  192. }
  193. // sort and add the namespace attributes first
  194. sortedNSList := getSortedNamespaces(nsListToRender)
  195. elementAttributes = append(elementAttributes, sortedNSList...)
  196. // then sort and add the non-namespace attributes
  197. sortedAttributes := getSortedAttributes(attrListToRender)
  198. elementAttributes = append(elementAttributes, sortedAttributes...)
  199. // replace the nodes attributes with the sorted copy
  200. node.Attr = elementAttributes
  201. return currentNS, prefixesInScope
  202. }
  203. func contains(slice []string, value string) bool {
  204. for _, s := range slice {
  205. if s == value {
  206. return true
  207. }
  208. }
  209. return false
  210. }
  211. // getSortedNamespaces sorts the namespace attributes by their prefix
  212. func getSortedNamespaces(list map[string]string) []etree.Attr {
  213. var keys []string
  214. for k := range list {
  215. keys = append(keys, k)
  216. }
  217. sort.Strings(keys)
  218. elem := etree.Element{}
  219. for _, k := range keys {
  220. elem.CreateAttr(k, list[k])
  221. }
  222. return elem.Attr
  223. }
  224. // getSortedAttributes sorts attributes by their namespace URIs
  225. func getSortedAttributes(list attributes) []etree.Attr {
  226. sort.Sort(list)
  227. attrs := make([]etree.Attr, len(list))
  228. for i, a := range list {
  229. attrs[i] = etree.Attr{
  230. Space: a.prefix,
  231. Key: a.key,
  232. Value: a.value,
  233. }
  234. }
  235. return attrs
  236. }
  237. func removeTokenFromElement(token etree.Token, e *etree.Element) *etree.Token {
  238. for i, t := range e.Child {
  239. if t == token {
  240. e.Child = append(e.Child[0:i], e.Child[i+1:]...)
  241. return &t
  242. }
  243. }
  244. return nil
  245. }
  246. func removeTokenFromDocument(token etree.Token, d *etree.Document) *etree.Token {
  247. for i, t := range d.Child {
  248. if t == token {
  249. d.Child = append(d.Child[0:i], d.Child[i+1:]...)
  250. return &t
  251. }
  252. }
  253. return nil
  254. }
  255. // isWhitespace returns true if the byte slice contains only
  256. // whitespace characters.
  257. func isWhitespace(s string) bool {
  258. for i := 0; i < len(s); i++ {
  259. if c := s[i]; c != ' ' && c != '\t' && c != '\n' && c != '\r' {
  260. return false
  261. }
  262. }
  263. return true
  264. }
  265. func copyNamespace(namespaces map[string]string) map[string]string {
  266. newVersion := map[string]string{}
  267. for index, element := range namespaces {
  268. newVersion[index] = element
  269. }
  270. return newVersion
  271. }