node-xml
Version:
An xml parser for node.js written in Javascript.
1,262 lines (1,006 loc) • 36.4 kB
JavaScript
// node-xml
// An xml parser for node.js
// (C) Rob Righter (@robrighter) 2009 - 2010, Licensed under the MIT-LICENSE
// Contributions from David Joham
(function () {
// CONSTANTS
var whitespace = "\n\r\t ";
//XMLP is a pull-based parser. The calling application passes in a XML string
//to the constructor, then repeatedly calls .next() to parse the next segment.
//.next() returns a flag indicating what type of segment was found, and stores
//data temporarily in couple member variables (name, content, array of
//attributes), which can be accessed by several .get____() methods.
//
//Basically, XMLP is the lowest common denominator parser - an very simple
//API which other wrappers can be built against.
var XMLP = function(strXML) {
// Normalize line breaks
strXML = SAXStrings.replace(strXML, null, null, "\r\n", "\n");
strXML = SAXStrings.replace(strXML, null, null, "\r", "\n");
this.m_xml = strXML;
this.m_iP = 0;
this.m_iState = XMLP._STATE_PROLOG;
this.m_stack = new Stack();
this._clearAttributes();
this.m_pause = false;
this.m_preInterruptIState = XMLP._STATE_PROLOG;
this.m_namespaceList = new Array();
this.m_chunkTransitionContinuation = null;
}
// CONSTANTS (these must be below the constructor)
XMLP._NONE = 0;
XMLP._ELM_B = 1;
XMLP._ELM_E = 2;
XMLP._ELM_EMP = 3;
XMLP._ATT = 4;
XMLP._TEXT = 5;
XMLP._ENTITY = 6;
XMLP._PI = 7;
XMLP._CDATA = 8;
XMLP._COMMENT = 9;
XMLP._DTD = 10;
XMLP._ERROR = 11;
XMLP._INTERRUPT = 12;
XMLP._CONT_XML = 0;
XMLP._CONT_ALT = 1;
XMLP._ATT_NAME = 0;
XMLP._ATT_VAL = 1;
XMLP._STATE_PROLOG = 1;
XMLP._STATE_DOCUMENT = 2;
XMLP._STATE_MISC = 3;
XMLP._errs = new Array();
XMLP._errs[XMLP.ERR_CLOSE_PI = 0 ] = "PI: missing closing sequence";
XMLP._errs[XMLP.ERR_CLOSE_DTD = 1 ] = "DTD: missing closing sequence";
XMLP._errs[XMLP.ERR_CLOSE_COMMENT = 2 ] = "Comment: missing closing sequence";
XMLP._errs[XMLP.ERR_CLOSE_CDATA = 3 ] = "CDATA: missing closing sequence";
XMLP._errs[XMLP.ERR_CLOSE_ELM = 4 ] = "Element: missing closing sequence";
XMLP._errs[XMLP.ERR_CLOSE_ENTITY = 5 ] = "Entity: missing closing sequence";
XMLP._errs[XMLP.ERR_PI_TARGET = 6 ] = "PI: target is required";
XMLP._errs[XMLP.ERR_ELM_EMPTY = 7 ] = "Element: cannot be both empty and closing";
XMLP._errs[XMLP.ERR_ELM_NAME = 8 ] = "Element: name must immediatly follow \"<\"";
XMLP._errs[XMLP.ERR_ELM_LT_NAME = 9 ] = "Element: \"<\" not allowed in element names";
XMLP._errs[XMLP.ERR_ATT_VALUES = 10] = "Attribute: values are required and must be in quotes";
XMLP._errs[XMLP.ERR_ATT_LT_NAME = 11] = "Element: \"<\" not allowed in attribute names";
XMLP._errs[XMLP.ERR_ATT_LT_VALUE = 12] = "Attribute: \"<\" not allowed in attribute values";
XMLP._errs[XMLP.ERR_ATT_DUP = 13] = "Attribute: duplicate attributes not allowed";
XMLP._errs[XMLP.ERR_ENTITY_UNKNOWN = 14] = "Entity: unknown entity";
XMLP._errs[XMLP.ERR_INFINITELOOP = 15] = "Infininte loop";
XMLP._errs[XMLP.ERR_DOC_STRUCTURE = 16] = "Document: only comments, processing instructions, or whitespace allowed outside of document element";
XMLP._errs[XMLP.ERR_ELM_NESTING = 17] = "Element: must be nested correctly";
XMLP.prototype.continueParsing = function(strXML) {
if(this.m_chunkTransitionContinuation){
strXML = this.m_chunkTransitionContinuation + strXML;
}
// Normalize line breaks
strXML = SAXStrings.replace(strXML, null, null, "\r\n", "\n");
strXML = SAXStrings.replace(strXML, null, null, "\r", "\n");
this.m_xml = strXML;
this.m_iP = 0;
this.m_iState = XMLP._STATE_DOCUMENT;
//this.m_stack = new Stack();
//this._clearAttributes();
this.m_pause = false;
this.m_preInterruptIState = XMLP._STATE_PROLOG;
this.m_chunkTransitionContinuation = null;
}
XMLP.prototype._addAttribute = function(name, value) {
this.m_atts[this.m_atts.length] = new Array(name, value);
}
XMLP.prototype._checkStructure = function(iEvent) {
if(XMLP._STATE_PROLOG == this.m_iState) {
if((XMLP._TEXT == iEvent) || (XMLP._ENTITY == iEvent)) {
if(SAXStrings.indexOfNonWhitespace(this.getContent(), this.getContentBegin(), this.getContentEnd()) != -1) {
return this._setErr(XMLP.ERR_DOC_STRUCTURE);
}
}
if((XMLP._ELM_B == iEvent) || (XMLP._ELM_EMP == iEvent)) {
this.m_iState = XMLP._STATE_DOCUMENT;
// Don't return - fall through to next state
}
}
if(XMLP._STATE_DOCUMENT == this.m_iState) {
if((XMLP._ELM_B == iEvent) || (XMLP._ELM_EMP == iEvent)) {
this.m_stack.push(this.getName());
}
if((XMLP._ELM_E == iEvent) || (XMLP._ELM_EMP == iEvent)) {
var strTop = this.m_stack.pop();
if((strTop == null) || (strTop != this.getName())) {
return this._setErr(XMLP.ERR_ELM_NESTING);
}
}
if(this.m_stack.count() == 0) {
this.m_iState = XMLP._STATE_MISC;
return iEvent;
}
}
if(XMLP._STATE_MISC == this.m_iState) {
if((XMLP._ELM_B == iEvent) || (XMLP._ELM_E == iEvent) || (XMLP._ELM_EMP == iEvent) || (XMLP.EVT_DTD == iEvent)) {
return this._setErr(XMLP.ERR_DOC_STRUCTURE);
}
if((XMLP._TEXT == iEvent) || (XMLP._ENTITY == iEvent)) {
if(SAXStrings.indexOfNonWhitespace(this.getContent(), this.getContentBegin(), this.getContentEnd()) != -1) {
return this._setErr(XMLP.ERR_DOC_STRUCTURE);
}
}
}
return iEvent;
}
XMLP.prototype._clearAttributes = function() {
this.m_atts = new Array();
}
XMLP.prototype._findAttributeIndex = function(name) {
for(var i = 0; i < this.m_atts.length; i++) {
if(this.m_atts[i][XMLP._ATT_NAME] == name) {
return i;
}
}
return -1;
}
XMLP.prototype.getAttributeCount = function() {
return this.m_atts ? this.m_atts.length : 0;
}
XMLP.prototype.getAttributeName = function(index) {
return ((index < 0) || (index >= this.m_atts.length)) ? null : this.m_atts[index][XMLP._ATT_NAME];
}
XMLP.prototype.getAttributeValue = function(index) {
return ((index < 0) || (index >= this.m_atts.length)) ? null : __unescapeString(this.m_atts[index][XMLP._ATT_VAL]);
}
XMLP.prototype.getAttributeValueByName = function(name) {
return this.getAttributeValue(this._findAttributeIndex(name));
}
XMLP.prototype.getColumnNumber = function() {
return SAXStrings.getColumnNumber(this.m_xml, this.m_iP);
}
XMLP.prototype.getContent = function() {
return (this.m_cSrc == XMLP._CONT_XML) ? this.m_xml : this.m_cAlt;
}
XMLP.prototype.getContentBegin = function() {
return this.m_cB;
}
XMLP.prototype.getContentEnd = function() {
return this.m_cE;
}
XMLP.prototype.getLineNumber = function() {
return SAXStrings.getLineNumber(this.m_xml, this.m_iP);
}
XMLP.prototype.getName = function() {
return this.m_name;
}
XMLP.prototype.pause = function(){
this.m_pause = true;
}
XMLP.prototype.resume = function(){
this.m_pause = false;
this.m_iState = this.m_preInterruptIState;
}
XMLP.prototype.next = function() {
if(!this.m_pause){
return this._checkStructure(this._parse());
}
else{
//save off the current event loop state and set the state to interrupt
this.m_preInterruptIState = this.m_iState;
return XMLP._INTERRUPT;
}
}
XMLP.prototype._parse = function() {
if(this.m_iP == this.m_xml.length) {
return XMLP._NONE;
}
function _indexOf(needle, haystack, start) {
// This is an improvement over the native indexOf because it stops at the
// end of the needle and doesn't continue to the end of the haystack looking.
for(var i = 0; i < needle.length; i++) {
if(needle.charAt(i) != haystack.charAt(start + i))
return -1;
}
return start;
}
var fc = this.m_xml.charAt(this.m_iP);
if (fc !== '<' && fc !== '&') {
return this._parseText (this.m_iP);
}
else if(this.m_iP == _indexOf("<?", this.m_xml, this.m_iP)) {
return this._parsePI (this.m_iP + 2);
}
else if(this.m_iP == _indexOf("<!DOCTYPE", this.m_xml, this.m_iP)) {
return this._parseDTD (this.m_iP + 9);
}
else if(this.m_iP == _indexOf("<!--", this.m_xml, this.m_iP)) {
return this._parseComment(this.m_iP + 4);
}
else if(this.m_iP == _indexOf("<![CDATA[", this.m_xml, this.m_iP)) {
return this._parseCDATA (this.m_iP + 9);
}
else if(this.m_iP == _indexOf("<", this.m_xml, this.m_iP)) {
return this._parseElement(this.m_iP + 1);
}
else if(this.m_iP == _indexOf("&", this.m_xml, this.m_iP)) {
return this._parseEntity (this.m_iP + 1);
}
else{
return this._parseText (this.m_iP);
}
}
////////// NAMESPACE SUPPORT //////////////////////////////////////////
XMLP.prototype._parsePrefixAndElementName = function (elementlabel){
splits = elementlabel.split(':',2);
return { prefix : ((splits.length === 1) ? '' : splits[0]), name : ((splits.length === 1) ? elementlabel : splits[1]), };
}
XMLP.prototype._parseNamespacesAndAtts = function (atts){
//translate namespaces into objects with "prefix","uri", "scopetag" Add them to: this.m_namespaceList
//The function should return a new list of tag attributes with the namespaces filtered
that = this;
var newnamespaces = [];
var filteredatts = [];
atts.map(function (item){
if(item[0].slice(0,5) === "xmlns"){
newnamespaces.push({
prefix : item[0].slice(6),
uri : item[1],
scopetag : that.m_name,
});
}
else{
filteredatts.push(item);
}
return "not used";
});
this.m_namespaceList = this.m_namespaceList.concat(newnamespaces);
return [ filteredatts, newnamespaces.map(function(item){return [item.prefix,item.uri];}) ];
}
XMLP.prototype._getContextualNamespace = function (prefix){
if(prefix !== ''){
for(item in this.m_namespaceList){
item = this.m_namespaceList[item];
if(item.prefix === prefix){
return item.uri;
}
}
}
//no match was found for the prefix so pop off the first non-prefix namespace
for(var i = (this.m_namespaceList.length-1); i>= 0; i--){
var item = this.m_namespaceList[i];
if(item.prefix === ''){
return item.uri;
}
}
//still nothing, lets just return an empty string
return '';
}
XMLP.prototype._removeExpiredNamesapces = function (closingtagname) {
//remove the expiring namespaces from the list (you can id them by scopetag)
var keeps = [];
this.m_namespaceList.map(function (item){
if(item.scopetag !== closingtagname){
keeps.push(item);
}
});
this.m_namespaceList = keeps;
}
////////////////////////////////////////////////////////////////////////
XMLP.prototype._parseAttribute = function(iB, iE) {
var iNB, iNE, iEq, iVB, iVE;
var cQuote, strN, strV;
this.m_cAlt = ""; //resets the value so we don't use an old one by accident (see testAttribute7 in the test suite)
iNB = SAXStrings.indexOfNonWhitespace(this.m_xml, iB, iE);
if((iNB == -1) ||(iNB >= iE)) {
return iNB;
}
iEq = this.m_xml.indexOf("=", iNB);
if((iEq == -1) || (iEq > iE)) {
return this._setErr(XMLP.ERR_ATT_VALUES);
}
iNE = SAXStrings.lastIndexOfNonWhitespace(this.m_xml, iNB, iEq);
iVB = SAXStrings.indexOfNonWhitespace(this.m_xml, iEq + 1, iE);
if((iVB == -1) ||(iVB > iE)) {
return this._setErr(XMLP.ERR_ATT_VALUES);
}
cQuote = this.m_xml.charAt(iVB);
if(SAXStrings.QUOTES.indexOf(cQuote) == -1) {
return this._setErr(XMLP.ERR_ATT_VALUES);
}
iVE = this.m_xml.indexOf(cQuote, iVB + 1);
if((iVE == -1) ||(iVE > iE)) {
return this._setErr(XMLP.ERR_ATT_VALUES);
}
strN = this.m_xml.substring(iNB, iNE + 1);
strV = this.m_xml.substring(iVB + 1, iVE);
if(strN.indexOf("<") != -1) {
return this._setErr(XMLP.ERR_ATT_LT_NAME);
}
if(strV.indexOf("<") != -1) {
return this._setErr(XMLP.ERR_ATT_LT_VALUE);
}
strV = SAXStrings.replace(strV, null, null, "\n", " ");
strV = SAXStrings.replace(strV, null, null, "\t", " ");
iRet = this._replaceEntities(strV);
if(iRet == XMLP._ERROR) {
return iRet;
}
strV = this.m_cAlt;
if(this._findAttributeIndex(strN) == -1) {
this._addAttribute(strN, strV);
}
else {
return this._setErr(XMLP.ERR_ATT_DUP);
}
this.m_iP = iVE + 2;
return XMLP._ATT;
}
XMLP.prototype._parseCDATA = function(iB) {
var iE = this.m_xml.indexOf("]]>", iB);
if (iE == -1) {
//This item never closes, although it could be a malformed document, we will assume that we are mid-chunck, save the string and reurn as interrupted
this.m_chunkTransitionContinuation = this.m_xml.slice(iB-9);//the '-<![CDATA[ adds the '<!DOCTYPE' back into the string
return XMLP._INTERRUPT;
//return this._setErr(XMLP.ERR_CLOSE_CDATA);
}
this._setContent(XMLP._CONT_XML, iB, iE);
this.m_iP = iE + 3;
return XMLP._CDATA;
}
XMLP.prototype._parseComment = function(iB) {
var iE = this.m_xml.indexOf("-" + "->", iB);
if (iE == -1) {
//This item never closes, although it could be a malformed document, we will assume that we are mid-chunck, save the string and reurn as interrupted
this.m_chunkTransitionContinuation = this.m_xml.slice(iB-4);//the '-4' adds the '<!--' back into the string
return XMLP._INTERRUPT;
//return this._setErr(XMLP.ERR_CLOSE_COMMENT);
}
this._setContent(XMLP._CONT_XML, iB, iE);
this.m_iP = iE + 3;
return XMLP._COMMENT;
}
XMLP.prototype._parseDTD = function(iB) {
// Eat DTD
var iE, strClose, iInt, iLast;
iE = this.m_xml.indexOf(">", iB);
if(iE == -1) {
//This item never closes, although it could be a malformed document, we will assume that we are mid-chunck, save the string and reurn as interrupted
this.m_chunkTransitionContinuation = this.m_xml.slice(iB-9);//the '-9' adds the '<!DOCTYPE' back into the string
return XMLP._INTERRUPT;
//return this._setErr(XMLP.ERR_CLOSE_DTD);
}
iInt = this.m_xml.indexOf("[", iB);
strClose = ((iInt != -1) && (iInt < iE)) ? "]>" : ">";
while(true) {
// DEBUG: Remove
if(iE == iLast) {
return this._setErr(XMLP.ERR_INFINITELOOP);
}
iLast = iE;
// DEBUG: Remove End
iE = this.m_xml.indexOf(strClose, iB);
if(iE == -1) {
return this._setErr(XMLP.ERR_CLOSE_DTD);
}
// Make sure it is not the end of a CDATA section
if (this.m_xml.substring(iE - 1, iE + 2) != "]]>") {
break;
}
}
this.m_iP = iE + strClose.length;
return XMLP._DTD;
}
XMLP.prototype._parseElement = function(iB) {
util = require('util');
var iE, iDE, iNE, iRet;
var iType, strN, iLast;
iDE = iE = this.m_xml.indexOf(">", iB);
if(iE == -1) {
//This element never closes, although it could be a malformed document, we will assume that we are mid-chunck, save the string and reurn as interrupted
this.m_chunkTransitionContinuation = this.m_xml.slice(iB-1);//the '-1' adds the '<' back into the string
return XMLP._INTERRUPT;
//return this._setErr(XMLP.ERR_CLOSE_ELM);
}
if(this.m_xml.charAt(iB) == "/") {
iType = XMLP._ELM_E;
iB++;
} else {
iType = XMLP._ELM_B;
}
if(this.m_xml.charAt(iE - 1) == "/") {
if(iType == XMLP._ELM_E) {
return this._setErr(XMLP.ERR_ELM_EMPTY);
}
iType = XMLP._ELM_EMP;
iDE--;
}
iDE = SAXStrings.lastIndexOfNonWhitespace(this.m_xml, iB, iDE);
//djohack
//hack to allow for elements with single character names to be recognized
if (iE - iB != 1 ) {
if(SAXStrings.indexOfNonWhitespace(this.m_xml, iB, iDE) != iB) {
return this._setErr(XMLP.ERR_ELM_NAME);
}
}
// end hack -- original code below
/*
if(SAXStrings.indexOfNonWhitespace(this.m_xml, iB, iDE) != iB)
return this._setErr(XMLP.ERR_ELM_NAME);
*/
this._clearAttributes();
iNE = SAXStrings.indexOfWhitespace(this.m_xml, iB, iDE);
if(iNE == -1) {
iNE = iDE + 1;
}
else {
this.m_iP = iNE;
while(this.m_iP < iDE) {
// DEBUG: Remove
if(this.m_iP == iLast) return this._setErr(XMLP.ERR_INFINITELOOP);
iLast = this.m_iP;
// DEBUG: Remove End
iRet = this._parseAttribute(this.m_iP, iDE);
if(iRet == XMLP._ERROR) return iRet;
}
}
strN = this.m_xml.substring(iB, iNE);
if(strN.indexOf("<") != -1) {
return this._setErr(XMLP.ERR_ELM_LT_NAME);
}
this.m_name = strN;
this.m_iP = iE + 1;
return iType;
}
XMLP.prototype._parseEntity = function(iB) {
var iE = this.m_xml.indexOf(";", iB);
if(iE == -1) {
//This item never closes, although it could be a malformed document, we will assume that we are mid-chunck, save the string and reurn as interrupted
this.m_chunkTransitionContinuation = this.m_xml.slice(iB-1);//the '-1' adds the '&' back into the string
return XMLP._INTERRUPT;
//return this._setErr(XMLP.ERR_CLOSE_ENTITY);
}
this.m_iP = iE + 1;
return this._replaceEntity(this.m_xml, iB, iE);
}
XMLP.prototype._parsePI = function(iB) {
var iE, iTB, iTE, iCB, iCE;
iE = this.m_xml.indexOf("?>", iB);
if(iE == -1) {
//This item never closes, although it could be a malformed document, we will assume that we are mid-chunck, save the string and reurn as interrupted
this.m_chunkTransitionContinuation = this.m_xml.slice(iB-2);//the '-2' adds the '?>' back into the string
return XMLP._INTERRUPT;
return this._setErr(XMLP.ERR_CLOSE_PI);
}
iTB = SAXStrings.indexOfNonWhitespace(this.m_xml, iB, iE);
if(iTB == -1) {
return this._setErr(XMLP.ERR_PI_TARGET);
}
iTE = SAXStrings.indexOfWhitespace(this.m_xml, iTB, iE);
if(iTE == -1) {
iTE = iE;
}
iCB = SAXStrings.indexOfNonWhitespace(this.m_xml, iTE, iE);
if(iCB == -1) {
iCB = iE;
}
iCE = SAXStrings.lastIndexOfNonWhitespace(this.m_xml, iCB, iE);
if(iCE == -1) {
iCE = iE - 1;
}
this.m_name = this.m_xml.substring(iTB, iTE);
this._setContent(XMLP._CONT_XML, iCB, iCE + 1);
this.m_iP = iE + 2;
return XMLP._PI;
}
XMLP.prototype._parseText = function(iB) {
var iE, ch;
for (iE=iB; iE<this.m_xml.length; ++iE) {
ch = this.m_xml.charAt(iE);
if (ch === '<' || ch === '&') {
break;
}
}
this._setContent(XMLP._CONT_XML, iB, iE);
this.m_iP = iE;
return XMLP._TEXT;
}
XMLP.prototype._replaceEntities = function(strD, iB, iE) {
if(SAXStrings.isEmpty(strD)) return "";
iB = iB || 0;
iE = iE || strD.length;
var iEB, iEE, strRet = "";
iEB = strD.indexOf("&", iB);
iEE = iB;
while((iEB > 0) && (iEB < iE)) {
strRet += strD.substring(iEE, iEB);
iEE = strD.indexOf(";", iEB) + 1;
if((iEE == 0) || (iEE > iE)) {
return this._setErr(XMLP.ERR_CLOSE_ENTITY);
}
iRet = this._replaceEntity(strD, iEB + 1, iEE - 1);
if(iRet == XMLP._ERROR) {
return iRet;
}
strRet += this.m_cAlt;
iEB = strD.indexOf("&", iEE);
}
if(iEE != iE) {
strRet += strD.substring(iEE, iE);
}
this._setContent(XMLP._CONT_ALT, strRet);
return XMLP._ENTITY;
}
XMLP.prototype._replaceEntity = function(strD, iB, iE) {
if(SAXStrings.isEmpty(strD)) return -1;
iB = iB || 0;
iE = iE || strD.length;
switch(strD.substring(iB, iE)) {
case "amp": strEnt = "&"; break;
case "lt": strEnt = "<"; break;
case "gt": strEnt = ">"; break;
case "apos": strEnt = "'"; break;
case "quot": strEnt = "\""; break;
case "nbsp":strEnt = ''; break;
case "lt":strEnt = '<'; break;
case "gt":strEnt = '>'; break;
case "amp":strEnt = '&'; break;
case "cent":strEnt = "¢"; break;
case "pound":strEnt = '£'; break;
case "yen":strEnt = '¥'; break;
case "euro":strEnt = '€'; break;
case "sect":strEnt = '§'; break;
case "copy":strEnt = '©'; break;
case "reg":strEnt = '®'; break;
default:
if(strD.charAt(iB) == "#") {
strEnt = String.fromCharCode(parseInt(strD.substring(iB + 1, iE)));
} else {
strEnt = ' ';
//return this._setErr(XMLP.ERR_ENTITY_UNKNOWN);
}
break;
}
this._setContent(XMLP._CONT_ALT, strEnt);
return XMLP._ENTITY;
}
XMLP.prototype._setContent = function(iSrc) {
var args = arguments;
if(XMLP._CONT_XML == iSrc) {
this.m_cAlt = null;
this.m_cB = args[1];
this.m_cE = args[2];
} else {
this.m_cAlt = args[1];
this.m_cB = 0;
this.m_cE = args[1].length;
}
this.m_cSrc = iSrc;
}
XMLP.prototype._setErr = function(iErr) {
var strErr = XMLP._errs[iErr];
this.m_cAlt = strErr;
this.m_cB = 0;
this.m_cE = strErr.length;
this.m_cSrc = XMLP._CONT_ALT;
return XMLP._ERROR;
} // end function _setErr
//SaxParser is an object that basically wraps an XMLP instance, and provides an
//event-based interface for parsing. This is the object users interact with when coding
//with XML for <SCRIPT>
var SaxParser = function(eventhandlerfactory) {
var eventhandler = new function(){
}
var thehandler = function() {};
thehandler.prototype.onStartDocument = function (funct){
eventhandler.onStartDocument = funct;
}
thehandler.prototype.onEndDocument = function (funct){
eventhandler.onEndDocument = funct;
}
thehandler.prototype.onStartElementNS = function (funct){
eventhandler.onStartElementNS = funct;
}
thehandler.prototype.onEndElementNS = function (funct){
eventhandler.onEndElementNS = funct;
}
thehandler.prototype.onCharacters = function(funct) {
eventhandler.onCharacters = funct;
}
thehandler.prototype.onCdata = function(funct) {
eventhandler.onCdata = funct;
}
thehandler.prototype.onComment = function(funct) {
eventhandler.onComment = funct;
}
thehandler.prototype.onWarning = function(funct) {
eventhandler.onWarning = funct;
}
thehandler.prototype.onError = function(funct) {
eventhandler.onError = funct;
}
eventhandlerfactory(new thehandler());
//eventhandler = eventhandler(eventhandler);
this.m_hndDoc = eventhandler;
this.m_hndErr = eventhandler;
this.m_hndLex = eventhandler;
this.m_interrupted = false;
}
// CONSTANTS (these must be below the constructor)
SaxParser.DOC_B = 1;
SaxParser.DOC_E = 2;
SaxParser.ELM_B = 3;
SaxParser.ELM_E = 4;
SaxParser.CHARS = 5;
SaxParser.PI = 6;
SaxParser.CD_B = 7;
SaxParser.CD_E = 8;
SaxParser.CMNT = 9;
SaxParser.DTD_B = 10;
SaxParser.DTD_E = 11;
SaxParser.prototype.parseFile = function(filename) { //This function will only work in the node.js environment.
var fs = require('fs');
var that = this;
fs.readFile(filename, function (err, data) {
that.parseString(data);
});
}
SaxParser.prototype.parseString = function(strD) {
util = require('util');
var that = this;
var startnew = true;
if(!that.m_parser){
that.m_parser = new XMLP(strD);
startnew = false;
}
else{
that.m_parser.continueParsing(strD);
startnew = true;
}
//if(that.m_hndDoc && that.m_hndDoc.setDocumentLocator) {
// that.m_hndDoc.setDocumentLocator(that);
//}
that.m_bErr = false;
if(!that.m_bErr && !startnew) {
that._fireEvent(SaxParser.DOC_B);
}
that._parseLoop();
if(!that.m_bErr && !that.m_interrupted) {
that._fireEvent(SaxParser.DOC_E);
}
that.m_xml = null;
that.m_iP = 0;
that.m_interrupted = false;
}
SaxParser.prototype.pause = function() {
this.m_parser.pause();
}
SaxParser.prototype.resume = function() {
//reset the state
this.m_parser.resume();
this.m_interrupted = false;
//now start up the parse loop
var that = this;
setTimeout(function(){
that._parseLoop();
if(!that.m_bErr && !that.m_interrupted) {
that._fireEvent(SaxParser.DOC_E);
}
}, 0);
}
SaxParser.prototype.setDocumentHandler = function(hnd) {
this.m_hndDoc = hnd;
}
SaxParser.prototype.setErrorHandler = function(hnd) {
this.m_hndErr = hnd;
}
SaxParser.prototype.setLexicalHandler = function(hnd) {
this.m_hndLex = hnd;
}
SaxParser.prototype.getColumnNumber = function() {
return this.m_parser.getColumnNumber();
}
SaxParser.prototype.getLineNumber = function() {
return this.m_parser.getLineNumber();
}
SaxParser.prototype.getMessage = function() {
return this.m_strErrMsg;
}
SaxParser.prototype.getPublicId = function() {
return null;
}
SaxParser.prototype.getSystemId = function() {
return null;
}
SaxParser.prototype.getLength = function() {
return this.m_parser.getAttributeCount();
}
SaxParser.prototype.getName = function(index) {
return this.m_parser.getAttributeName(index);
}
SaxParser.prototype.getValue = function(index) {
return this.m_parser.getAttributeValue(index);
}
SaxParser.prototype.getValueByName = function(name) {
return this.m_parser.getAttributeValueByName(name);
}
SaxParser.prototype._fireError = function(strMsg) {
this.m_strErrMsg = strMsg;
this.m_bErr = true;
if(this.m_hndErr && this.m_hndErr.onError) {
this.m_hndErr.onError(this.m_strErrMsg);
}
}
SaxParser.prototype._fireEvent = function(iEvt) {
var hnd, func, args = arguments, iLen = args.length - 1;
if(this.m_bErr) return;
if(SaxParser.DOC_B == iEvt) {
func = "onStartDocument"; hnd = this.m_hndDoc;
}
else if (SaxParser.DOC_E == iEvt) {
func = "onEndDocument"; hnd = this.m_hndDoc;
}
else if (SaxParser.ELM_B == iEvt) {
func = "onStartElementNS"; hnd = this.m_hndDoc;
}
else if (SaxParser.ELM_E == iEvt) {
func = "onEndElementNS"; hnd = this.m_hndDoc;
}
else if (SaxParser.CHARS == iEvt) {
func = "onCharacters"; hnd = this.m_hndDoc;
}
else if (SaxParser.PI == iEvt) {
func = "processingInstruction"; hnd = this.m_hndDoc;
}
else if (SaxParser.CD_B == iEvt) {
func = "onCdata"; hnd = this.m_hndLex;
}
else if (SaxParser.CD_E == iEvt) {
func = "onEndCDATA"; hnd = this.m_hndLex;
}
else if (SaxParser.CMNT == iEvt) {
func = "onComment"; hnd = this.m_hndLex;
}
if(hnd && hnd[func]) {
if(0 == iLen) {
hnd[func]();
}
else if (1 == iLen) {
hnd[func](args[1]);
}
else if (2 == iLen) {
hnd[func](args[1], args[2]);
}
else if (3 == iLen) {
hnd[func](args[1], args[2], args[3]);
}
else if (4 == iLen) {
hnd[func](args[1], args[2], args[3], args[4]);
}
else if (5 == iLen) {
hnd[func](args[1], args[2], args[3], args[4], args[5]);
}
else if (6 == iLen) {
hnd[func](args[1], args[2], args[3], args[4], args[5], args[6]);
}
}
}
SaxParser.prototype._parseLoop = function(parser) {
var iEvent, parser;
parser = this.m_parser;
while(!this.m_bErr) {
iEvent = parser.next();
if(iEvent == XMLP._ELM_B) {
theatts = this.m_parser.m_atts;
nameobject = parser._parsePrefixAndElementName(parser.getName());
theattsandnamespace = parser._parseNamespacesAndAtts(theatts);
var theuri = parser._getContextualNamespace(nameobject.prefix);
this._fireEvent(SaxParser.ELM_B, nameobject.name, theattsandnamespace[0], (nameobject.prefix === '')? null : nameobject.prefix, (theuri === '')? null : theuri ,theattsandnamespace[1] );
}
else if(iEvent == XMLP._ELM_E) {
nameobject = parser._parsePrefixAndElementName(parser.getName());
var theuri = parser._getContextualNamespace(nameobject.prefix);
parser._removeExpiredNamesapces(parser.getName());
this._fireEvent(SaxParser.ELM_E, nameobject.name, (nameobject.prefix === '')? null : nameobject.prefix, (theuri === '')? null : theuri);
}
else if(iEvent == XMLP._ELM_EMP) {
//this is both a begin and end element
theatts = this.m_parser.m_atts;
nameobject = parser._parsePrefixAndElementName(parser.getName());
theattsandnamespace = parser._parseNamespacesAndAtts(theatts);
var theuri = parser._getContextualNamespace(nameobject.prefix);
this._fireEvent(SaxParser.ELM_B, nameobject.name, theattsandnamespace[0], (nameobject.prefix === '')? null : nameobject.prefix, (theuri === '')? null : theuri ,theattsandnamespace[1], true );
parser._removeExpiredNamesapces(parser.getName());
this._fireEvent(SaxParser.ELM_E, nameobject.name, (nameobject.prefix === '')? null : nameobject.prefix, (theuri === '')? null : theuri, true);
//this._fireEvent(SaxParser.ELM_B, parser.getName(), this.m_parser.m_atts.map(function(item){return { name : item[0], value : item[1], };}) );
//this._fireEvent(SaxParser.ELM_E, parser.getName());
}
else if(iEvent == XMLP._TEXT) {
this._fireEvent(SaxParser.CHARS, parser.getContent().slice(parser.getContentBegin(),parser.getContentEnd()));
}
else if(iEvent == XMLP._ENTITY) {
this._fireEvent(SaxParser.CHARS, parser.getContent(), parser.getContentBegin(), parser.getContentEnd() - parser.getContentBegin());
}
else if(iEvent == XMLP._PI) {
this._fireEvent(SaxParser.PI, parser.getName(), parser.getContent().substring(parser.getContentBegin(), parser.getContentEnd()));
}
else if(iEvent == XMLP._CDATA) {
this._fireEvent(SaxParser.CD_B, parser.getContent().slice(parser.getContentBegin(),parser.getContentEnd()));
//this._fireEvent(SaxParser.CHARS, parser.getContent(), parser.getContentBegin(), parser.getContentEnd() - parser.getContentBegin());
//this._fireEvent(SaxParser.CD_E);
}
else if(iEvent == XMLP._COMMENT) {
this._fireEvent(SaxParser.CMNT, parser.getContent().slice(parser.getContentBegin(),parser.getContentEnd()));
}
else if(iEvent == XMLP._DTD) {
}
else if(iEvent == XMLP._ERROR) {
this._fireError(parser.getContent());
}
else if(iEvent == XMLP._INTERRUPT){
this.m_interrupted = true;
return;//just return and wait to be restarted
}
else if(iEvent == XMLP._NONE) {
return;
}
}
}
//SAXStrings: a useful object containing string manipulation functions
var SAXStrings = function() {
//This is the constructor of the SAXStrings object
}
// CONSTANTS (these must be below the constructor)
SAXStrings.WHITESPACE = " \t\n\r";
SAXStrings.QUOTES = "\"'";
SAXStrings.getColumnNumber = function(strD, iP) {
if(SAXStrings.isEmpty(strD)) {
return -1;
}
iP = iP || strD.length;
var arrD = strD.substring(0, iP).split("\n");
var strLine = arrD[arrD.length - 1];
arrD.length--;
var iLinePos = arrD.join("\n").length;
return iP - iLinePos;
}
SAXStrings.getLineNumber = function(strD, iP) {
if(SAXStrings.isEmpty(strD)) {
return -1;
}
iP = iP || strD.length;
return strD.substring(0, iP).split("\n").length
}
SAXStrings.indexOfNonWhitespace = function(strD, iB, iE) {
if(SAXStrings.isEmpty(strD)) {
return -1;
}
iB = iB || 0;
iE = iE || strD.length;
for(var i = iB; i < iE; i++){
if(SAXStrings.WHITESPACE.indexOf(strD.charAt(i)) == -1) {
return i;
}
}
return -1;
}
SAXStrings.indexOfWhitespace = function(strD, iB, iE) {
if(SAXStrings.isEmpty(strD)) {
return -1;
}
iB = iB || 0;
iE = iE || strD.length;
for(var i = iB; i < iE; i++) {
if(SAXStrings.WHITESPACE.indexOf(strD.charAt(i)) != -1) {
return i;
}
}
return -1;
}
SAXStrings.isEmpty = function(strD) {
return (strD == null) || (strD.length == 0);
}
SAXStrings.lastIndexOfNonWhitespace = function(strD, iB, iE) {
if(SAXStrings.isEmpty(strD)) {
return -1;
}
iB = iB || 0;
iE = iE || strD.length;
for(var i = iE - 1; i >= iB; i--){
if(SAXStrings.WHITESPACE.indexOf(strD.charAt(i)) == -1){
return i;
}
}
return -1;
}
SAXStrings.replace = function(strD, iB, iE, strF, strR) {
if(SAXStrings.isEmpty(strD)) {
return "";
}
iB = iB || 0;
iE = iE || strD.length;
return strD.toString().substring(iB, iE).split(strF).join(strR);
}
var Stack = function() {
this.m_arr = new Array();
}
Stack.prototype.clear = function() {
this.m_arr = new Array();
}
Stack.prototype.count = function() {
return this.m_arr.length;
}
Stack.prototype.destroy = function() {
this.m_arr = null;
}
Stack.prototype.peek = function() {
if(this.m_arr.length == 0) {
return null;
}
return this.m_arr[this.m_arr.length - 1];
}
Stack.prototype.pop = function() {
if(this.m_arr.length == 0) {
return null;
}
var o = this.m_arr[this.m_arr.length - 1];
this.m_arr.length--;
return o;
}
Stack.prototype.push = function(o) {
this.m_arr[this.m_arr.length] = o;
}
// CONVENIENCE FUNCTIONS
function isEmpty(str) {
return (str==null) || (str.length==0);
}
function trim(trimString, leftTrim, rightTrim) {
if (isEmpty(trimString)) {
return "";
}
// the general focus here is on minimal method calls - hence only one
// substring is done to complete the trim.
if (leftTrim == null) {
leftTrim = true;
}
if (rightTrim == null) {
rightTrim = true;
}
var left=0;
var right=0;
var i=0;
var k=0;
// modified to properly handle strings that are all whitespace
if (leftTrim == true) {
while ((i<trimString.length) && (whitespace.indexOf(trimString.charAt(i++))!=-1)) {
left++;
}
}
if (rightTrim == true) {
k=trimString.length-1;
while((k>=left) && (whitespace.indexOf(trimString.charAt(k--))!=-1)) {
right++;
}
}
return trimString.substring(left, trimString.length - right);
}
function __escapeString(str) {
var escAmpRegEx = /&/g;
var escLtRegEx = /</g;
var escGtRegEx = />/g;
var quotRegEx = /"/g;
var aposRegEx = /'/g;
str = str.replace(escAmpRegEx, "&");
str = str.replace(escLtRegEx, "<");
str = str.replace(escGtRegEx, ">");
str = str.replace(quotRegEx, """);
str = str.replace(aposRegEx, "'");
return str;
}
function __unescapeString(str) {
var escAmpRegEx = /&/g;
var escLtRegEx = /</g;
var escGtRegEx = />/g;
var quotRegEx = /"/g;
var aposRegEx = /'/g;
str = str.replace(escAmpRegEx, "&");
str = str.replace(escLtRegEx, "<");
str = str.replace(escGtRegEx, ">");
str = str.replace(quotRegEx, "\"");
str = str.replace(aposRegEx, "'");
return str;
}
exports.SaxParser = SaxParser;
})()