racker / node-elementtree Goto Github PK

View Code? Open in Web Editor NEW

131.0 158.0 27.0 75 KB

Port of Python's Element Tree module to Node.js

License: Apache License 2.0

JavaScript 98.67% Makefile 1.33%

node-elementtree's Introduction

node-elementtree

node-elementtree is a Node.js XML parser and serializer based upon the Python ElementTree v1.3 module.

Installation

$ npm install elementtree

Using the library

For the usage refer to the Python ElementTree library documentation - http://effbot.org/zone/element-index.htm#usage.

Supported XPath expressions in find, findall and findtext methods are listed on http://effbot.org/zone/element-xpath.htm.

Example 1 – Creating An XML Document

This example shows how to build a valid XML document that can be published to Atom Hopper. Atom Hopper is used internally as a bridge from products all the way to collecting revenue, called “Usage.” MaaS and other products send similar events to it every time user performs an action on a resource (e.g. creates,updates or deletes). Below is an example of leveraging the API to create a new XML document.

var et = require('elementtree');
var XML = et.XML;
var ElementTree = et.ElementTree;
var element = et.Element;
var subElement = et.SubElement;

var date, root, tenantId, serviceName, eventType, usageId, dataCenter, region,
checks, resourceId, category, startTime, resourceName, etree, xml;

date = new Date();

root = element('entry');
root.set('xmlns', 'http://www.w3.org/2005/Atom');

tenantId = subElement(root, 'TenantId');
tenantId.text = '12345';

serviceName = subElement(root, 'ServiceName');
serviceName.text = 'MaaS';

resourceId = subElement(root, 'ResourceID');
resourceId.text = 'enAAAA';

usageId = subElement(root, 'UsageID');
usageId.text = '550e8400-e29b-41d4-a716-446655440000';

eventType = subElement(root, 'EventType');
eventType.text = 'create';

category = subElement(root, 'category');
category.set('term', 'monitoring.entity.create');

dataCenter = subElement(root, 'DataCenter');
dataCenter.text = 'global';

region = subElement(root, 'Region');
region.text = 'global';

startTime = subElement(root, 'StartTime');
startTime.text = date;

resourceName = subElement(root, 'ResourceName');
resourceName.text = 'entity';

etree = new ElementTree(root);
xml = etree.write({'xml_declaration': false});
console.log(xml);

As you can see, both et.Element and et.SubElement are factory methods which return a new instance of Element and SubElement class, respectively. When you create a new element (tag) you can use set method to set an attribute. To set the tag value, assign a value to the .text attribute.

This example would output a document that looks like this:

<entry xmlns="http://www.w3.org/2005/Atom">
  <TenantId>12345</TenantId>
  <ServiceName>MaaS</ServiceName>
  <ResourceID>enAAAA</ResourceID>
  <UsageID>550e8400-e29b-41d4-a716-446655440000</UsageID>
  <EventType>create</EventType>
  <category term="monitoring.entity.create"/>
  <DataCenter>global</DataCenter>
  <Region>global</Region>
  <StartTime>Sun Apr 29 2012 16:37:32 GMT-0700 (PDT)</StartTime>
  <ResourceName>entity</ResourceName>
</entry>

Example 2 – Parsing An XML Document

This example shows how to parse an XML document and use simple XPath selectors. For demonstration purposes, we will use the XML document located at https://gist.github.com/2554343.

Behind the scenes, node-elementtree uses Isaac’s sax library for parsing XML, but the library has a concept of “parsers,” which means it’s pretty simple to add support for a different parser.

var fs = require('fs');

var et = require('elementtree');

var XML = et.XML;
var ElementTree = et.ElementTree;
var element = et.Element;
var subElement = et.SubElement;

var data, etree;

data = fs.readFileSync('document.xml').toString();
etree = et.parse(data);

console.log(etree.findall('./entry/TenantId').length); // 2
console.log(etree.findtext('./entry/ServiceName')); // MaaS
console.log(etree.findall('./entry/category')[0].get('term')); // monitoring.entity.create
console.log(etree.findall('*/category/[@term="monitoring.entity.update"]').length); // 1

Build status

License

node-elementtree is distributed under the Apache license.

node-elementtree's People

Contributors

Stargazers

Watchers

node-elementtree's Issues

findall needs support for namespace prefixes

Given the following xml:
<ns:root xmlns:ns="http://mynamespace">
<ns:items>
<ns:item id="1"/>
<ns:item id="2"/>
<ns:item id="3"/>
</ns:items>
</ns:root>

Expect:
This should return three item elements:
var items = opfXml.findall('items/item');
or
There should be a way to specify the namespace or it should assume the namespace of the current element.

Actual:
This returns an empty array of elements.

ElementPath.findtext() result in "TypeError: Cannot read property 'text' of undefined"

the reason is that in javascript (but not python) a truthy test of an empty array resolves to true. to reproduce:

var ET = require('elementtree');
var node = ET.parse('<root><node>data</node></root>').getroot();
node.findtext('non-existent-node/node');

here is a patch to address the issue:

--- elementtree-orig/lib/elementpath.js 2012-11-10 13:10:03.932756539 -0500
+++ elementtree/lib/elementpath.js  2012-11-10 13:10:45.232756505 -0500
@@ -320,7 +320,7 @@
 function find(element, path) {
   var resultElement = findall(element, path);

-  if (resultElement) {
+  if (resultElement && resultElement.length > 0) {
     return resultElement[0];
   }

@@ -330,7 +330,7 @@
 function findtext(element, path, defvalue) {
   var resultElements = findall(element, path);

-  if (resultElements) {
+  if (resultElements && resultElements.length > 0) {
     return resultElements[0].text;
   }

node.find() results in a "ReferenceError: resultElements is not defined"

to reproduce:

var ET = require('elementtree');
var node = ET.parse('<root><node>data</node></root>').getroot();
node.find('root/node');

here is a patch that resolves the issue:

--- elementtree-orig/lib/elementpath.js 2012-11-10 12:37:28.840758163 -0500
+++ elementtree/lib/elementpath.js  2012-11-10 12:38:01.776758137 -0500
@@ -318,10 +318,10 @@
 }

 function find(element, path) {
-  var resultElement = findall(element, path);
+  var resultElements = findall(element, path);

-  if (resultElement && resultElements.length > 0) {
-    return resultElement[0];
+  if (resultElements && resultElements.length > 0) {
+    return resultElements[0];
   }

   return null;

Can you push version 0.1.6 to NPM?

I need a fix to element.remove function
6af64d7

this is fix in 0.1.6

Thanks

difficulty getting remove to work

I'm not sure if this is a bug or me not quite grokking the system. I have been utilizing node-eleementtree for some time to populate xml documents and have had no problem but I have recently tried to utilize the remove function and am getting errors.

I'm doing a pretty basic settup:

fs.readFile(coursePath, function(err, data){
    var XML = et.XML;
    var ElementTree = et.ElementTree;
    var element = et.Element;
    var subElement = et.SubElement;

    var _data, etree;

    _data = data.toString();
    etree = et.parse(_data);
    var stringID = content.id.toString();
    console.log(etree.find('./item/[@id="'+stringID+'"]'));
    var myitem = etree.find('./item/[@id="'+stringID+'"]');

I'm identifying the element that I want to remove in the myitem variable. But when I run:

etree.remove(myitem);

I'm getting the following error:

[TypeError: undefined is not a function]
TypeError: undefined is not a function
at /Users/xxxxxxx/Sites/xxxxxx/bin/server/xxxxxxx-socket-handler.js:915:19
at fs.js:291:14
at Object.oncomplete (fs.js:97:15)

Is this an issue with the system or with the user?

Best,
Phil

Please expose TreeBuilder for ease of writing custom parsers

You don't expose the TreeBuilder to the supplied parser:

ElementTree.prototype.parse = function(source, parser) {
  if (!parser) {
    parser = get_parser(constants.DEFAULT_PARSER);
    parser = new parser.XMLParser(new TreeBuilder());
  }
  parser.feed(source);

Strikes me that makes it harder to get any work done.

ElementTree.write does not check if the option 'indent' is a integer

Placing a string in the 'indent' option for ElementTree.write causes the string, with a 1 appended to it, to be printed in front of every indented tag, instead of spaces. This is a result of the write function not checking to see if the 'indent' option is an integer. Line 310 will create a single element array with the value of options.indent + 1 as the element, instead of a array of options.indent + 1 blank elements.

API breakage and module version: Element.remove

This commit for 0.1.6: 10b3b8c

Not sure if you care about semver, but ideally since this is an incompatible API change from 0.1.5, the major version should have been bumped (or at least doc'ed)

Comments are not handled while XML parsing.

I know this is not the issue but a new request, but I feel this should be handled.
I have a below XML:

<Benchmark xmlns="http://checklists.nist.gov/xccdf/1.1" xml:lang="en-US">
 <Profile id="profile1">
    <title xmlns:xhtml="http://www.w3.org/1999/xhtml" xml:lang="en-US">text1</title>
    <select idref="This is rule 1" selected="true"/>
    <!--Rule:world_writable_sticky_dirs uses following values:-->
    <!--Rule:world_writable_sticky_dirs uses following values:-->
    <set-value idref="ssfs_master_key_timeout">20</set-value>
 </Profile> 

 <Profile id="profile2">
    <title xmlns:xhtml="http://www.w3.org/1999/xhtml" xml:lang="en-US">text2</title>
    <select idref="this is rule1" selected="true"/>
    <!--Rule:world_writable_sticky_dirs uses following values:-->
    <!--Rule:world_writable_sticky_dirs uses following values:-->
    <select idref="this is rule1" selected="true"/>
 </Profile>
</Benchmark>

Below is my code:
var fs = require('fs'); 
var et = require('elementtree');
var pd = require('pretty-data').pd;
var tailorData, etree;

tailorData = fs.readFileSync('my.xml').toString();

**etree = et.parse(tailorData); // This function misses all the comments section. So while writing etree to file, comments will be gone.**

var profile = etree.find('./Profile');
profile.set('id', 'MyElementTree');
var resultXml = etree.write();
fs.writeFileSync(tailoredXML, resultXml);

Their is no handler for comments.
NPM\elementtree\lib\parsers\sax.js:
XMLParser.prototype._handleComment = function(comment) {};

Is there any way I can include comments also in et.parse(), as these comments are important in my case ??? I have seen the below file of the elementtree npm, but didn't got idea how to handle the comments section.
NPM\elementtree\lib\parsers\sax.js
NPM\elementtree\lib\treebuilder.js

Sub-elements of sub-elements with a text value indent strangely

XML String without being indented: <root><one>TEXT<two>TEXT</two></one></root>

What I would expect when indented:

<root>
  <one>
    TEXT
    <two>TEXT</two>
  </one>
</root>

What I get from ElementTree.write:

<root>
    <one>TEXT        <two>TEXT</two>
</one>
</root>

If I remove the text value from the 'one' tag, everything indents properly

ET cannot output CData containing "]]>"

To reproduce with 0.1.6:

var et = require('elementtree'),
    root = et.Element('root'),
    cruelty = '<![CDATA[x]]>';
root.append(et.CData(cruelty));
console.log(et.tostring(root));

Buggy output:

<?xml version='1.0' encoding='utf-8'?>
<root><![CDATA[<![CDATA[x]]>]]></root>

xmllint complaint:

foo:2: parser error : Sequence ']]>' not allowed in content
<root><![CDATA[<![CDATA[x]]>]]></root>

Workaround:

root.append(et.CData(cruelty.replace(']]>', ']]]]><![CDATA[>'));

Output of workaround:

<?xml version='1.0' encoding='utf-8'?>
<root><![CDATA[<![CDATA[x]]]]><![CDATA[>]]></root>

Elements not inserted correctly

When the Element.insert method is called, the element is not actually inserted into the array. Instead, it overwrites the existing element at that index.

See https://github.com/racker/node-elementtree/blame/master/lib/elementtree.js#L101

Element.prototype.insert = function(index, element)
{
  this._children[index] = element;
};

Instead, line 103 should be changed to this._children.splice(index, 0, element);

xml content lost

XML:

original XML, (actually a html snippet)

<html>This Form has
    <b>"searchTemplate"</b>
    under root element.<br/>All Panels wil get data without clicking
    <b>"Search"</b>
    button,
    <b>"autoRun"</b>
    is on
    <br/>Click
    <b>"Search"</b>
    button is reqiured only for
    <b>"time2"</b>
    <p/>
</html>

After process by elementtree.

var result = et.parse(xml);
console.log(result.write());

output:

<html>This Form has
    <b>"searchTemplate"</b>
    <br/>
    <b>"Search"</b>
    <b>"autoRun"</b>
    <br/>
    <b>"Search"</b>
    <b>"time2"</b>
    <p/>
</html>

Some of the content was lost.

XPath match multiple arguments not working

Suppose you have this xml document

<?xml version="1.0" encoding="UTF-8"?>
<bookstore>
    <book title="A" lang="en" />
    <book title="A" lang="fr" />
</bookstore>

And you want to match the book with title A and language en, you can do that with the following XPath.

/bookstore/book[@title="A" and @lang="en"]

But if I try this, it returns the error Error: Invalid attribute predicate.

FileNotFoundError: [Errno 2] No such file or directory

Hi I want to use Python to delete the specific tag in a xml file.
Here is for example what I want to delete: <edge id="-28029380" from="444363583" to="307773297" priority="21" type="railway.highspeed" spreadType="center" shape="30808.38,22280.38 30815.34,22079.71 30818.81,21993.54 30825.20,21825.23 30834.10,21591.29 30834.28,21586.33" bidi="28029380" distance="58689.58"> <lane id="-28029380_0" index="0" allow="rail_fast" speed="55.56" length="691.54" shape="30808.43,22278.88 30815.34,22079.71 30818.81,21993.54 30825.20,21825.23 30834.10,21591.29 30834.23,21587.82"/> </edge>
and here is the python code:
`#！usr/bin/python

-- coding: utf-8 --

import xml.etree.ElementTree as ET
tree = ET.parse(r'C:\Users\zzj\Desktop\test\test.xml')
root = tree.getroot()
animNode = root.find('edge')
if animNode.attrib['type'] == 'railway.highspeed':
root.remove(animNode)
tree.write(r'C:\Users\zzj\Desktop\test\finish.xml')`

then I get the error of FileNotFoundError: [Errno 2] No such file or directory. I tried many time but the path doesn't work, could somebody help?
many thanks!

streaming writer with extension API

I have to stream XML through something that:

finds certain nodes
notes their text
finds certain following nodes
fetch a URL based on the text noted above
insert the results of fetching the URL as the text of a new child node

The original text is small enough to handle with ET.

If write could take a stream, and _serialize_xml could delegate the job of figuring out what to write to custom tag functions handed to Element, and the handler got the stream as an argument, I figure this could be pretty easy:

fetch the URL
match the encoding
pipe the results into the outputstream without closing it at finish
call back

As is, I figure I'll have to rip out ET and drop to a more conventional streaming parser and streaming writer.

Module lacks proper documentation

This module looks like it could be pretty cool to use, but there's no zero documentation on it other than a link to the python documentation. Can whoever maintains this write up some documentation for the node usage of the module?

Maintenance volunteer

I see that this repository is not being maintained. I would volunteer to help out.

0.1.7 et.XML('<plugins>') throws error

in 0.1.7, the line et.XML('<plugins>') throws an unclosed root tag error. Looks like this was due to updating the SAX module for 0.1.7 release. In 0.1.6 it worked fine.

Stack from me investigating why cordova tests are failing if we update to 0.1.7.

Error: Unclosed root tag
Line: 0
Column: 10
Char:
    at error (/private/var/folders/f4/m5mpnbg567x6k2lbvj2p3psm0000gn/T/e2e-test/plugin_dependency_test/hello3/platforms/android/cordova/node_modules/sax/lib/sax.js:666:10)
    at strictFail (/private/var/folders/f4/m5mpnbg567x6k2lbvj2p3psm0000gn/T/e2e-test/plugin_dependency_test/hello3/platforms/android/cordova/node_modules/sax/lib/sax.js:692:7)
    at end (/private/var/folders/f4/m5mpnbg567x6k2lbvj2p3psm0000gn/T/e2e-test/plugin_dependency_test/hello3/platforms/android/cordova/node_modules/sax/lib/sax.js:673:47)
    at Object.write (/private/var/folders/f4/m5mpnbg567x6k2lbvj2p3psm0000gn/T/e2e-test/plugin_dependency_test/hello3/platforms/android/cordova/node_modules/sax/lib/sax.js:981:14)
    at Object.close (/private/var/folders/f4/m5mpnbg567x6k2lbvj2p3psm0000gn/T/e2e-test/plugin_dependency_test/hello3/platforms/android/cordova/node_modules/sax/lib/sax.js:156:38)
    at XMLParser.close (/private/var/folders/f4/m5mpnbg567x6k2lbvj2p3psm0000gn/T/e2e-test/plugin_dependency_test/hello3/platforms/android/cordova/node_modules/elementtree/lib/parsers/sax.js:52:15)
    at ElementTree.parse (/private/var/folders/f4/m5mpnbg567x6k2lbvj2p3psm0000gn/T/e2e-test/plugin_dependency_test/hello3/platforms/android/cordova/node_modules/elementtree/lib/elementtree.js:272:23)
    at Object.exports.XML (/private/var/folders/f4/m5mpnbg567x6k2lbvj2p3psm0000gn/T/e2e-test/plugin_dependency_test/hello3/platforms/android/cordova/node_modules/elementtree/lib/elementtree.js:606:13)
    at Object.graftXML (/private/var/folders/f4/m5mpnbg567x6k2lbvj2p3psm0000gn/T/e2e-test/plugin_dependency_test/hello3/platforms/android/cordova/node_modules/cordova-common/src/util/xml-helpers.js:67:32)
    at ConfigFile_graft_child [as graft_child] (/private/var/folders/f4/m5mpnbg567x6k2lbvj2p3psm0000gn/T/e2e-test/plugin_dependency_test/hello3/platforms/android/cordova/node_modules/cordova-common/src/ConfigChanges/ConfigFile.js:123:46)

Setting 'method' option to 'text' in ElementTree.write causes errors

If the 'method' option in ElementTree.write is set to 'text', which is taken as valid input, this conditional, at line 299, is run:

 if (options.method === "text") {
    _serialize_text(sb, self._root, encoding);
  }

However, 'self', is undefined in this context, as well as 'encoding', and the function '_serialize_text' is nonexistent. Each of these will throw errors.