Plato on Github
Report Home
node_modules/sax/lib/sax.js
Maintainability
48.98
Lines of code
1409
Difficulty
83.65
Estimated Errors
18.53
Function weight
By Complexity
By SLOC
// wrapper for non-node envs ;(function (sax) { sax.parser = function (strict, opt) { return new SAXParser(strict, opt) } sax.SAXParser = SAXParser sax.SAXStream = SAXStream sax.createStream = createStream // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns. // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)), // since that's the earliest that a buffer overrun could occur. This way, checks are // as rare as required, but as often as necessary to ensure never crossing this bound. // Furthermore, buffers are only tested at most once per write(), so passing a very // large string into write() might have undesirable effects, but this is manageable by // the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme // edge case, result in creating at most one complete copy of the string passed in. // Set to Infinity to have unlimited buffers. sax.MAX_BUFFER_LENGTH = 64 * 1024 var buffers = [ "comment", "sgmlDecl", "textNode", "tagName", "doctype", "procInstName", "procInstBody", "entity", "attribName", "attribValue", "cdata", "script" ] sax.EVENTS = // for discoverability. [ "text" , "processinginstruction" , "sgmldeclaration" , "doctype" , "comment" , "attribute" , "opentag" , "closetag" , "opencdata" , "cdata" , "closecdata" , "error" , "end" , "ready" , "script" , "opennamespace" , "closenamespace" ] function SAXParser (strict, opt) { if (!(this instanceof SAXParser)) return new SAXParser(strict, opt) var parser = this clearBuffers(parser) parser.q = parser.c = "" parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH parser.opt = opt || {} parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags parser.looseCase = parser.opt.lowercase ? "toLowerCase" : "toUpperCase" parser.tags = [] parser.closed = parser.closedRoot = parser.sawRoot = false parser.tag = parser.error = null parser.strict = !!strict parser.noscript = !!(strict || parser.opt.noscript) parser.state = S.BEGIN parser.ENTITIES = Object.create(sax.ENTITIES) parser.attribList = [] // namespaces form a prototype chain. // it always points at the current tag, // which protos to its parent tag. if (parser.opt.xmlns) parser.ns = Object.create(rootNS) // mostly just for error reporting parser.trackPosition = parser.opt.position !== false if (parser.trackPosition) { parser.position = parser.line = parser.column = 0 } emit(parser, "onready") } if (!Object.create) Object.create = function (o) { function f () { this.__proto__ = o } f.prototype = o return new f } if (!Object.getPrototypeOf) Object.getPrototypeOf = function (o) { return o.__proto__ } if (!Object.keys) Object.keys = function (o) { var a = [] for (var i in o) if (o.hasOwnProperty(i)) a.push(i) return a } function checkBufferLength (parser) { var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10) , maxActual = 0 for (var i = 0, l = buffers.length; i < l; i ++) { var len = parser[buffers[i]].length if (len > maxAllowed) { // Text/cdata nodes can get big, and since they're buffered, // we can get here under normal conditions. // Avoid issues by emitting the text node now, // so at least it won't get any bigger. switch (buffers[i]) { case "textNode": closeText(parser) break case "cdata": emitNode(parser, "oncdata", parser.cdata) parser.cdata = "" break case "script": emitNode(parser, "onscript", parser.script) parser.script = "" break default: error(parser, "Max buffer length exceeded: "+buffers[i]) } } maxActual = Math.max(maxActual, len) } // schedule the next check for the earliest possible buffer overrun. parser.bufferCheckPosition = (sax.MAX_BUFFER_LENGTH - maxActual) + parser.position } function clearBuffers (parser) { for (var i = 0, l = buffers.length; i < l; i ++) { parser[buffers[i]] = "" } } function flushBuffers (parser) { closeText(parser) if (parser.cdata !== "") { emitNode(parser, "oncdata", parser.cdata) parser.cdata = "" } if (parser.script !== "") { emitNode(parser, "onscript", parser.script) parser.script = "" } } SAXParser.prototype = { end: function () { end(this) } , write: write , resume: function () { this.error = null; return this } , close: function () { return this.write(null) } , flush: function () { flushBuffers(this) } } try { var Stream = require("stream").Stream } catch (ex) { var Stream = function () {} } var streamWraps = sax.EVENTS.filter(function (ev) { return ev !== "error" && ev !== "end" }) function createStream (strict, opt) { return new SAXStream(strict, opt) } function SAXStream (strict, opt) { if (!(this instanceof SAXStream)) return new SAXStream(strict, opt) Stream.apply(this) this._parser = new SAXParser(strict, opt) this.writable = true this.readable = true var me = this this._parser.onend = function () { me.emit("end") } this._parser.onerror = function (er) { me.emit("error", er) // if didn't throw, then means error was handled. // go ahead and clear error, so we can write again. me._parser.error = null } this._decoder = null; streamWraps.forEach(function (ev) { Object.defineProperty(me, "on" + ev, { get: function () { return me._parser["on" + ev] }, set: function (h) { if (!h) { me.removeAllListeners(ev) return me._parser["on"+ev] = h } me.on(ev, h) }, enumerable: true, configurable: false }) }) } SAXStream.prototype = Object.create(Stream.prototype, { constructor: { value: SAXStream } }) SAXStream.prototype.write = function (data) { if (typeof Buffer === 'function' && typeof Buffer.isBuffer === 'function' && Buffer.isBuffer(data)) { if (!this._decoder) { var SD = require('string_decoder').StringDecoder this._decoder = new SD('utf8') } data = this._decoder.write(data); } this._parser.write(data.toString()) this.emit("data", data) return true } SAXStream.prototype.end = function (chunk) { if (chunk && chunk.length) this.write(chunk) this._parser.end() return true } SAXStream.prototype.on = function (ev, handler) { var me = this if (!me._parser["on"+ev] && streamWraps.indexOf(ev) !== -1) { me._parser["on"+ev] = function () { var args = arguments.length === 1 ? [arguments[0]] : Array.apply(null, arguments) args.splice(0, 0, ev) me.emit.apply(me, args) } } return Stream.prototype.on.call(me, ev, handler) } // character classes and tokens var whitespace = "\r\n\t " // this really needs to be replaced with character classes. // XML allows all manner of ridiculous numbers and digits. , number = "0124356789" , letter = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" // (Letter | "_" | ":") , quote = "'\"" , entity = number+letter+"#" , attribEnd = whitespace + ">" , CDATA = "[CDATA[" , DOCTYPE = "DOCTYPE" , XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace" , XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/" , rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE } // turn all the string character sets into character class objects. whitespace = charClass(whitespace) number = charClass(number) letter = charClass(letter) // http://www.w3.org/TR/REC-xml/#NT-NameStartChar // This implementation works on strings, a single character at a time // as such, it cannot ever support astral-plane characters (10000-EFFFF) // without a significant breaking change to either this parser, or the // JavaScript language. Implementation of an emoji-capable xml parser // is left as an exercise for the reader. var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/ var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040\.\d-]/ quote = charClass(quote) entity = charClass(entity) attribEnd = charClass(attribEnd) function charClass (str) { return str.split("").reduce(function (s, c) { s[c] = true return s }, {}) } function isRegExp (c) { return Object.prototype.toString.call(c) === '[object RegExp]' } function is (charclass, c) { return isRegExp(charclass) ? !!c.match(charclass) : charclass[c] } function not (charclass, c) { return !is(charclass, c) } var S = 0 sax.STATE = { BEGIN : S++ , TEXT : S++ // general stuff , TEXT_ENTITY : S++ // & and such. , OPEN_WAKA : S++ // < , SGML_DECL : S++ // <!BLARG , SGML_DECL_QUOTED : S++ // <!BLARG foo "bar , DOCTYPE : S++ // <!DOCTYPE , DOCTYPE_QUOTED : S++ // <!DOCTYPE "//blah , DOCTYPE_DTD : S++ // <!DOCTYPE "//blah" [ ... , DOCTYPE_DTD_QUOTED : S++ // <!DOCTYPE "//blah" [ "foo , COMMENT_STARTING : S++ // <!- , COMMENT : S++ // <!-- , COMMENT_ENDING : S++ // <!-- blah - , COMMENT_ENDED : S++ // <!-- blah -- , CDATA : S++ // <![CDATA[ something , CDATA_ENDING : S++ // ] , CDATA_ENDING_2 : S++ // ]] , PROC_INST : S++ // <?hi , PROC_INST_BODY : S++ // <?hi there , PROC_INST_ENDING : S++ // <?hi "there" ? , OPEN_TAG : S++ // <strong , OPEN_TAG_SLASH : S++ // <strong / , ATTRIB : S++ // <a , ATTRIB_NAME : S++ // <a foo , ATTRIB_NAME_SAW_WHITE : S++ // <a foo _ , ATTRIB_VALUE : S++ // <a foo= , ATTRIB_VALUE_QUOTED : S++ // <a foo="bar , ATTRIB_VALUE_CLOSED : S++ // <a foo="bar" , ATTRIB_VALUE_UNQUOTED : S++ // <a foo=bar , ATTRIB_VALUE_ENTITY_Q : S++ // <foo bar=""" , ATTRIB_VALUE_ENTITY_U : S++ // <foo bar=" , CLOSE_TAG : S++ // </a , CLOSE_TAG_SAW_WHITE : S++ // </a > , SCRIPT : S++ // <script> ... , SCRIPT_ENDING : S++ // <script> ... < } sax.ENTITIES = { "amp" : "&" , "gt" : ">" , "lt" : "<" , "quot" : "\"" , "apos" : "'" , "AElig" : 198 , "Aacute" : 193 , "Acirc" : 194 , "Agrave" : 192 , "Aring" : 197 , "Atilde" : 195 , "Auml" : 196 , "Ccedil" : 199 , "ETH" : 208 , "Eacute" : 201 , "Ecirc" : 202 , "Egrave" : 200 , "Euml" : 203 , "Iacute" : 205 , "Icirc" : 206 , "Igrave" : 204 , "Iuml" : 207 , "Ntilde" : 209 , "Oacute" : 211 , "Ocirc" : 212 , "Ograve" : 210 , "Oslash" : 216 , "Otilde" : 213 , "Ouml" : 214 , "THORN" : 222 , "Uacute" : 218 , "Ucirc" : 219 , "Ugrave" : 217 , "Uuml" : 220 , "Yacute" : 221 , "aacute" : 225 , "acirc" : 226 , "aelig" : 230 , "agrave" : 224 , "aring" : 229 , "atilde" : 227 , "auml" : 228 , "ccedil" : 231 , "eacute" : 233 , "ecirc" : 234 , "egrave" : 232 , "eth" : 240 , "euml" : 235 , "iacute" : 237 , "icirc" : 238 , "igrave" : 236 , "iuml" : 239 , "ntilde" : 241 , "oacute" : 243 , "ocirc" : 244 , "ograve" : 242 , "oslash" : 248 , "otilde" : 245 , "ouml" : 246 , "szlig" : 223 , "thorn" : 254 , "uacute" : 250 , "ucirc" : 251 , "ugrave" : 249 , "uuml" : 252 , "yacute" : 253 , "yuml" : 255 , "copy" : 169 , "reg" : 174 , "nbsp" : 160 , "iexcl" : 161 , "cent" : 162 , "pound" : 163 , "curren" : 164 , "yen" : 165 , "brvbar" : 166 , "sect" : 167 , "uml" : 168 , "ordf" : 170 , "laquo" : 171 , "not" : 172 , "shy" : 173 , "macr" : 175 , "deg" : 176 , "plusmn" : 177 , "sup1" : 185 , "sup2" : 178 , "sup3" : 179 , "acute" : 180 , "micro" : 181 , "para" : 182 , "middot" : 183 , "cedil" : 184 , "ordm" : 186 , "raquo" : 187 , "frac14" : 188 , "frac12" : 189 , "frac34" : 190 , "iquest" : 191 , "times" : 215 , "divide" : 247 , "OElig" : 338 , "oelig" : 339 , "Scaron" : 352 , "scaron" : 353 , "Yuml" : 376 , "fnof" : 402 , "circ" : 710 , "tilde" : 732 , "Alpha" : 913 , "Beta" : 914 , "Gamma" : 915 , "Delta" : 916 , "Epsilon" : 917 , "Zeta" : 918 , "Eta" : 919 , "Theta" : 920 , "Iota" : 921 , "Kappa" : 922 , "Lambda" : 923 , "Mu" : 924 , "Nu" : 925 , "Xi" : 926 , "Omicron" : 927 , "Pi" : 928 , "Rho" : 929 , "Sigma" : 931 , "Tau" : 932 , "Upsilon" : 933 , "Phi" : 934 , "Chi" : 935 , "Psi" : 936 , "Omega" : 937 , "alpha" : 945 , "beta" : 946 , "gamma" : 947 , "delta" : 948 , "epsilon" : 949 , "zeta" : 950 , "eta" : 951 , "theta" : 952 , "iota" : 953 , "kappa" : 954 , "lambda" : 955 , "mu" : 956 , "nu" : 957 , "xi" : 958 , "omicron" : 959 , "pi" : 960 , "rho" : 961 , "sigmaf" : 962 , "sigma" : 963 , "tau" : 964 , "upsilon" : 965 , "phi" : 966 , "chi" : 967 , "psi" : 968 , "omega" : 969 , "thetasym" : 977 , "upsih" : 978 , "piv" : 982 , "ensp" : 8194 , "emsp" : 8195 , "thinsp" : 8201 , "zwnj" : 8204 , "zwj" : 8205 , "lrm" : 8206 , "rlm" : 8207 , "ndash" : 8211 , "mdash" : 8212 , "lsquo" : 8216 , "rsquo" : 8217 , "sbquo" : 8218 , "ldquo" : 8220 , "rdquo" : 8221 , "bdquo" : 8222 , "dagger" : 8224 , "Dagger" : 8225 , "bull" : 8226 , "hellip" : 8230 , "permil" : 8240 , "prime" : 8242 , "Prime" : 8243 , "lsaquo" : 8249 , "rsaquo" : 8250 , "oline" : 8254 , "frasl" : 8260 , "euro" : 8364 , "image" : 8465 , "weierp" : 8472 , "real" : 8476 , "trade" : 8482 , "alefsym" : 8501 , "larr" : 8592 , "uarr" : 8593 , "rarr" : 8594 , "darr" : 8595 , "harr" : 8596 , "crarr" : 8629 , "lArr" : 8656 , "uArr" : 8657 , "rArr" : 8658 , "dArr" : 8659 , "hArr" : 8660 , "forall" : 8704 , "part" : 8706 , "exist" : 8707 , "empty" : 8709 , "nabla" : 8711 , "isin" : 8712 , "notin" : 8713 , "ni" : 8715 , "prod" : 8719 , "sum" : 8721 , "minus" : 8722 , "lowast" : 8727 , "radic" : 8730 , "prop" : 8733 , "infin" : 8734 , "ang" : 8736 , "and" : 8743 , "or" : 8744 , "cap" : 8745 , "cup" : 8746 , "int" : 8747 , "there4" : 8756 , "sim" : 8764 , "cong" : 8773 , "asymp" : 8776 , "ne" : 8800 , "equiv" : 8801 , "le" : 8804 , "ge" : 8805 , "sub" : 8834 , "sup" : 8835 , "nsub" : 8836 , "sube" : 8838 , "supe" : 8839 , "oplus" : 8853 , "otimes" : 8855 , "perp" : 8869 , "sdot" : 8901 , "lceil" : 8968 , "rceil" : 8969 , "lfloor" : 8970 , "rfloor" : 8971 , "lang" : 9001 , "rang" : 9002 , "loz" : 9674 , "spades" : 9824 , "clubs" : 9827 , "hearts" : 9829 , "diams" : 9830 } Object.keys(sax.ENTITIES).forEach(function (key) { var e = sax.ENTITIES[key] var s = typeof e === 'number' ? String.fromCharCode(e) : e sax.ENTITIES[key] = s }) for (var S in sax.STATE) sax.STATE[sax.STATE[S]] = S // shorthand S = sax.STATE function emit (parser, event, data) { parser[event] && parser[event](data) } function emitNode (parser, nodeType, data) { if (parser.textNode) closeText(parser) emit(parser, nodeType, data) } function closeText (parser) { parser.textNode = textopts(parser.opt, parser.textNode) if (parser.textNode) emit(parser, "ontext", parser.textNode) parser.textNode = "" } function textopts (opt, text) { if (opt.trim) text = text.trim() if (opt.normalize) text = text.replace(/\s+/g, " ") return text } function error (parser, er) { closeText(parser) if (parser.trackPosition) { er += "\nLine: "+parser.line+ "\nColumn: "+parser.column+ "\nChar: "+parser.c } er = new Error(er) parser.error = er emit(parser, "onerror", er) return parser } function end (parser) { if (!parser.closedRoot) strictFail(parser, "Unclosed root tag") if ((parser.state !== S.BEGIN) && (parser.state !== S.TEXT)) error(parser, "Unexpected end") closeText(parser) parser.c = "" parser.closed = true emit(parser, "onend") SAXParser.call(parser, parser.strict, parser.opt) return parser } function strictFail (parser, message) { if (typeof parser !== 'object' || !(parser instanceof SAXParser)) throw new Error('bad call to strictFail'); if (parser.strict) error(parser, message) } function newTag (parser) { if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]() var parent = parser.tags[parser.tags.length - 1] || parser , tag = parser.tag = { name : parser.tagName, attributes : {} } // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar" if (parser.opt.xmlns) tag.ns = parent.ns parser.attribList.length = 0 } function qname (name, attribute) { var i = name.indexOf(":") , qualName = i < 0 ? [ "", name ] : name.split(":") , prefix = qualName[0] , local = qualName[1] // <x "xmlns"="http://foo"> if (attribute && name === "xmlns") { prefix = "xmlns" local = "" } return { prefix: prefix, local: local } } function attrib (parser) { if (!parser.strict) parser.attribName = parser.attribName[parser.looseCase]() if (parser.attribList.indexOf(parser.attribName) !== -1 || parser.tag.attributes.hasOwnProperty(parser.attribName)) { return parser.attribName = parser.attribValue = "" } if (parser.opt.xmlns) { var qn = qname(parser.attribName, true) , prefix = qn.prefix , local = qn.local if (prefix === "xmlns") { // namespace binding attribute; push the binding into scope if (local === "xml" && parser.attribValue !== XML_NAMESPACE) { strictFail( parser , "xml: prefix must be bound to " + XML_NAMESPACE + "\n" + "Actual: " + parser.attribValue ) } else if (local === "xmlns" && parser.attribValue !== XMLNS_NAMESPACE) { strictFail( parser , "xmlns: prefix must be bound to " + XMLNS_NAMESPACE + "\n" + "Actual: " + parser.attribValue ) } else { var tag = parser.tag , parent = parser.tags[parser.tags.length - 1] || parser if (tag.ns === parent.ns) { tag.ns = Object.create(parent.ns) } tag.ns[local] = parser.attribValue } } // defer onattribute events until all attributes have been seen // so any new bindings can take effect; preserve attribute order // so deferred events can be emitted in document order parser.attribList.push([parser.attribName, parser.attribValue]) } else { // in non-xmlns mode, we can emit the event right away parser.tag.attributes[parser.attribName] = parser.attribValue emitNode( parser , "onattribute" , { name: parser.attribName , value: parser.attribValue } ) } parser.attribName = parser.attribValue = "" } function openTag (parser, selfClosing) { if (parser.opt.xmlns) { // emit namespace binding events var tag = parser.tag // add namespace info to tag var qn = qname(parser.tagName) tag.prefix = qn.prefix tag.local = qn.local tag.uri = tag.ns[qn.prefix] || "" if (tag.prefix && !tag.uri) { strictFail(parser, "Unbound namespace prefix: " + JSON.stringify(parser.tagName)) tag.uri = qn.prefix } var parent = parser.tags[parser.tags.length - 1] || parser if (tag.ns && parent.ns !== tag.ns) { Object.keys(tag.ns).forEach(function (p) { emitNode( parser , "onopennamespace" , { prefix: p , uri: tag.ns[p] } ) }) } // handle deferred onattribute events // Note: do not apply default ns to attributes: // http://www.w3.org/TR/REC-xml-names/#defaulting for (var i = 0, l = parser.attribList.length; i < l; i ++) { var nv = parser.attribList[i] var name = nv[0] , value = nv[1] , qualName = qname(name, true) , prefix = qualName.prefix , local = qualName.local , uri = prefix == "" ? "" : (tag.ns[prefix] || "") , a = { name: name , value: value , prefix: prefix , local: local , uri: uri } // if there's any attributes with an undefined namespace, // then fail on them now. if (prefix && prefix != "xmlns" && !uri) { strictFail(parser, "Unbound namespace prefix: " + JSON.stringify(prefix)) a.uri = prefix } parser.tag.attributes[name] = a emitNode(parser, "onattribute", a) } parser.attribList.length = 0 } parser.tag.isSelfClosing = !!selfClosing // process the tag parser.sawRoot = true parser.tags.push(parser.tag) emitNode(parser, "onopentag", parser.tag) if (!selfClosing) { // special case for <script> in non-strict mode. if (!parser.noscript && parser.tagName.toLowerCase() === "script") { parser.state = S.SCRIPT } else { parser.state = S.TEXT } parser.tag = null parser.tagName = "" } parser.attribName = parser.attribValue = "" parser.attribList.length = 0 } function closeTag (parser) { if (!parser.tagName) { strictFail(parser, "Weird empty close tag.") parser.textNode += "</>" parser.state = S.TEXT return } if (parser.script) { if (parser.tagName !== "script") { parser.script += "</" + parser.tagName + ">" parser.tagName = "" parser.state = S.SCRIPT return } emitNode(parser, "onscript", parser.script) parser.script = "" } // first make sure that the closing tag actually exists. // <a><b></c></b></a> will close everything, otherwise. var t = parser.tags.length var tagName = parser.tagName if (!parser.strict) tagName = tagName[parser.looseCase]() var closeTo = tagName while (t --) { var close = parser.tags[t] if (close.name !== closeTo) { // fail the first time in strict mode strictFail(parser, "Unexpected close tag") } else break } // didn't find it. we already failed for strict, so just abort. if (t < 0) { strictFail(parser, "Unmatched closing tag: "+parser.tagName) parser.textNode += "</" + parser.tagName + ">" parser.state = S.TEXT return } parser.tagName = tagName var s = parser.tags.length while (s --> t) { var tag = parser.tag = parser.tags.pop() parser.tagName = parser.tag.name emitNode(parser, "onclosetag", parser.tagName) var x = {} for (var i in tag.ns) x[i] = tag.ns[i] var parent = parser.tags[parser.tags.length - 1] || parser if (parser.opt.xmlns && tag.ns !== parent.ns) { // remove namespace bindings introduced by tag Object.keys(tag.ns).forEach(function (p) { var n = tag.ns[p] emitNode(parser, "onclosenamespace", { prefix: p, uri: n }) }) } } if (t === 0) parser.closedRoot = true parser.tagName = parser.attribValue = parser.attribName = "" parser.attribList.length = 0 parser.state = S.TEXT } function parseEntity (parser) { var entity = parser.entity , entityLC = entity.toLowerCase() , num , numStr = "" if (parser.ENTITIES[entity]) return parser.ENTITIES[entity] if (parser.ENTITIES[entityLC]) return parser.ENTITIES[entityLC] entity = entityLC if (entity.charAt(0) === "#") { if (entity.charAt(1) === "x") { entity = entity.slice(2) num = parseInt(entity, 16) numStr = num.toString(16) } else { entity = entity.slice(1) num = parseInt(entity, 10) numStr = num.toString(10) } } entity = entity.replace(/^0+/, "") if (numStr.toLowerCase() !== entity) { strictFail(parser, "Invalid character entity") return "&"+parser.entity + ";" } return String.fromCodePoint(num) } function write (chunk) { var parser = this if (this.error) throw this.error if (parser.closed) return error(parser, "Cannot write after close. Assign an onready handler.") if (chunk === null) return end(parser) var i = 0, c = "" while (parser.c = c = chunk.charAt(i++)) { if (parser.trackPosition) { parser.position ++ if (c === "\n") { parser.line ++ parser.column = 0 } else parser.column ++ } switch (parser.state) { case S.BEGIN: if (c === "<") { parser.state = S.OPEN_WAKA parser.startTagPosition = parser.position } else if (not(whitespace,c)) { // have to process this as a text node. // weird, but happens. strictFail(parser, "Non-whitespace before first tag.") parser.textNode = c parser.state = S.TEXT } continue case S.TEXT: if (parser.sawRoot && !parser.closedRoot) { var starti = i-1 while (c && c!=="<" && c!=="&") { c = chunk.charAt(i++) if (c && parser.trackPosition) { parser.position ++ if (c === "\n") { parser.line ++ parser.column = 0 } else parser.column ++ } } parser.textNode += chunk.substring(starti, i-1) } if (c === "<") { parser.state = S.OPEN_WAKA parser.startTagPosition = parser.position } else { if (not(whitespace, c) && (!parser.sawRoot || parser.closedRoot)) strictFail(parser, "Text data outside of root node.") if (c === "&") parser.state = S.TEXT_ENTITY else parser.textNode += c } continue case S.SCRIPT: // only non-strict if (c === "<") { parser.state = S.SCRIPT_ENDING } else parser.script += c continue case S.SCRIPT_ENDING: if (c === "/") { parser.state = S.CLOSE_TAG } else { parser.script += "<" + c parser.state = S.SCRIPT } continue case S.OPEN_WAKA: // either a /, ?, !, or text is coming next. if (c === "!") { parser.state = S.SGML_DECL parser.sgmlDecl = "" } else if (is(whitespace, c)) { // wait for it... } else if (is(nameStart,c)) { parser.state = S.OPEN_TAG parser.tagName = c } else if (c === "/") { parser.state = S.CLOSE_TAG parser.tagName = "" } else if (c === "?") { parser.state = S.PROC_INST parser.procInstName = parser.procInstBody = "" } else { strictFail(parser, "Unencoded <") // if there was some whitespace, then add that in. if (parser.startTagPosition + 1 < parser.position) { var pad = parser.position - parser.startTagPosition c = new Array(pad).join(" ") + c } parser.textNode += "<" + c parser.state = S.TEXT } continue case S.SGML_DECL: if ((parser.sgmlDecl+c).toUpperCase() === CDATA) { emitNode(parser, "onopencdata") parser.state = S.CDATA parser.sgmlDecl = "" parser.cdata = "" } else if (parser.sgmlDecl+c === "--") { parser.state = S.COMMENT parser.comment = "" parser.sgmlDecl = "" } else if ((parser.sgmlDecl+c).toUpperCase() === DOCTYPE) { parser.state = S.DOCTYPE if (parser.doctype || parser.sawRoot) strictFail(parser, "Inappropriately located doctype declaration") parser.doctype = "" parser.sgmlDecl = "" } else if (c === ">") { emitNode(parser, "onsgmldeclaration", parser.sgmlDecl) parser.sgmlDecl = "" parser.state = S.TEXT } else if (is(quote, c)) { parser.state = S.SGML_DECL_QUOTED parser.sgmlDecl += c } else parser.sgmlDecl += c continue case S.SGML_DECL_QUOTED: if (c === parser.q) { parser.state = S.SGML_DECL parser.q = "" } parser.sgmlDecl += c continue case S.DOCTYPE: if (c === ">") { parser.state = S.TEXT emitNode(parser, "ondoctype", parser.doctype) parser.doctype = true // just remember that we saw it. } else { parser.doctype += c if (c === "[") parser.state = S.DOCTYPE_DTD else if (is(quote, c)) { parser.state = S.DOCTYPE_QUOTED parser.q = c } } continue case S.DOCTYPE_QUOTED: parser.doctype += c if (c === parser.q) { parser.q = "" parser.state = S.DOCTYPE } continue case S.DOCTYPE_DTD: parser.doctype += c if (c === "]") parser.state = S.DOCTYPE else if (is(quote,c)) { parser.state = S.DOCTYPE_DTD_QUOTED parser.q = c } continue case S.DOCTYPE_DTD_QUOTED: parser.doctype += c if (c === parser.q) { parser.state = S.DOCTYPE_DTD parser.q = "" } continue case S.COMMENT: if (c === "-") parser.state = S.COMMENT_ENDING else parser.comment += c continue case S.COMMENT_ENDING: if (c === "-") { parser.state = S.COMMENT_ENDED parser.comment = textopts(parser.opt, parser.comment) if (parser.comment) emitNode(parser, "oncomment", parser.comment) parser.comment = "" } else { parser.comment += "-" + c parser.state = S.COMMENT } continue case S.COMMENT_ENDED: if (c !== ">") { strictFail(parser, "Malformed comment") // allow <!-- blah -- bloo --> in non-strict mode, // which is a comment of " blah -- bloo " parser.comment += "--" + c parser.state = S.COMMENT } else parser.state = S.TEXT continue case S.CDATA: if (c === "]") parser.state = S.CDATA_ENDING else parser.cdata += c continue case S.CDATA_ENDING: if (c === "]") parser.state = S.CDATA_ENDING_2 else { parser.cdata += "]" + c parser.state = S.CDATA } continue case S.CDATA_ENDING_2: if (c === ">") { if (parser.cdata) emitNode(parser, "oncdata", parser.cdata) emitNode(parser, "onclosecdata") parser.cdata = "" parser.state = S.TEXT } else if (c === "]") { parser.cdata += "]" } else { parser.cdata += "]]" + c parser.state = S.CDATA } continue case S.PROC_INST: if (c === "?") parser.state = S.PROC_INST_ENDING else if (is(whitespace, c)) parser.state = S.PROC_INST_BODY else parser.procInstName += c continue case S.PROC_INST_BODY: if (!parser.procInstBody && is(whitespace, c)) continue else if (c === "?") parser.state = S.PROC_INST_ENDING else parser.procInstBody += c continue case S.PROC_INST_ENDING: if (c === ">") { emitNode(parser, "onprocessinginstruction", { name : parser.procInstName, body : parser.procInstBody }) parser.procInstName = parser.procInstBody = "" parser.state = S.TEXT } else { parser.procInstBody += "?" + c parser.state = S.PROC_INST_BODY } continue case S.OPEN_TAG: if (is(nameBody, c)) parser.tagName += c else { newTag(parser) if (c === ">") openTag(parser) else if (c === "/") parser.state = S.OPEN_TAG_SLASH else { if (not(whitespace, c)) strictFail( parser, "Invalid character in tag name") parser.state = S.ATTRIB } } continue case S.OPEN_TAG_SLASH: if (c === ">") { openTag(parser, true) closeTag(parser) } else { strictFail(parser, "Forward-slash in opening tag not followed by >") parser.state = S.ATTRIB } continue case S.ATTRIB: // haven't read the attribute name yet. if (is(whitespace, c)) continue else if (c === ">") openTag(parser) else if (c === "/") parser.state = S.OPEN_TAG_SLASH else if (is(nameStart, c)) { parser.attribName = c parser.attribValue = "" parser.state = S.ATTRIB_NAME } else strictFail(parser, "Invalid attribute name") continue case S.ATTRIB_NAME: if (c === "=") parser.state = S.ATTRIB_VALUE else if (c === ">") { strictFail(parser, "Attribute without value") parser.attribValue = parser.attribName attrib(parser) openTag(parser) } else if (is(whitespace, c)) parser.state = S.ATTRIB_NAME_SAW_WHITE else if (is(nameBody, c)) parser.attribName += c else strictFail(parser, "Invalid attribute name") continue case S.ATTRIB_NAME_SAW_WHITE: if (c === "=") parser.state = S.ATTRIB_VALUE else if (is(whitespace, c)) continue else { strictFail(parser, "Attribute without value") parser.tag.attributes[parser.attribName] = "" parser.attribValue = "" emitNode(parser, "onattribute", { name : parser.attribName, value : "" }) parser.attribName = "" if (c === ">") openTag(parser) else if (is(nameStart, c)) { parser.attribName = c parser.state = S.ATTRIB_NAME } else { strictFail(parser, "Invalid attribute name") parser.state = S.ATTRIB } } continue case S.ATTRIB_VALUE: if (is(whitespace, c)) continue else if (is(quote, c)) { parser.q = c parser.state = S.ATTRIB_VALUE_QUOTED } else { strictFail(parser, "Unquoted attribute value") parser.state = S.ATTRIB_VALUE_UNQUOTED parser.attribValue = c } continue case S.ATTRIB_VALUE_QUOTED: if (c !== parser.q) { if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_Q else parser.attribValue += c continue } attrib(parser) parser.q = "" parser.state = S.ATTRIB_VALUE_CLOSED continue case S.ATTRIB_VALUE_CLOSED: if (is(whitespace, c)) { parser.state = S.ATTRIB } else if (c === ">") openTag(parser) else if (c === "/") parser.state = S.OPEN_TAG_SLASH else if (is(nameStart, c)) { strictFail(parser, "No whitespace between attributes") parser.attribName = c parser.attribValue = "" parser.state = S.ATTRIB_NAME } else strictFail(parser, "Invalid attribute name") continue case S.ATTRIB_VALUE_UNQUOTED: if (not(attribEnd,c)) { if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_U else parser.attribValue += c continue } attrib(parser) if (c === ">") openTag(parser) else parser.state = S.ATTRIB continue case S.CLOSE_TAG: if (!parser.tagName) { if (is(whitespace, c)) continue else if (not(nameStart, c)) { if (parser.script) { parser.script += "</" + c parser.state = S.SCRIPT } else { strictFail(parser, "Invalid tagname in closing tag.") } } else parser.tagName = c } else if (c === ">") closeTag(parser) else if (is(nameBody, c)) parser.tagName += c else if (parser.script) { parser.script += "</" + parser.tagName parser.tagName = "" parser.state = S.SCRIPT } else { if (not(whitespace, c)) strictFail(parser, "Invalid tagname in closing tag") parser.state = S.CLOSE_TAG_SAW_WHITE } continue case S.CLOSE_TAG_SAW_WHITE: if (is(whitespace, c)) continue if (c === ">") closeTag(parser) else strictFail(parser, "Invalid characters in closing tag") continue case S.TEXT_ENTITY: case S.ATTRIB_VALUE_ENTITY_Q: case S.ATTRIB_VALUE_ENTITY_U: switch(parser.state) { case S.TEXT_ENTITY: var returnState = S.TEXT, buffer = "textNode" break case S.ATTRIB_VALUE_ENTITY_Q: var returnState = S.ATTRIB_VALUE_QUOTED, buffer = "attribValue" break case S.ATTRIB_VALUE_ENTITY_U: var returnState = S.ATTRIB_VALUE_UNQUOTED, buffer = "attribValue" break } if (c === ";") { parser[buffer] += parseEntity(parser) parser.entity = "" parser.state = returnState } else if (is(entity, c)) parser.entity += c else { strictFail(parser, "Invalid character entity") parser[buffer] += "&" + parser.entity + c parser.entity = "" parser.state = returnState } continue default: throw new Error(parser, "Unknown state: " + parser.state) } } // while // cdata blocks can get very big under normal conditions. emit and move on. // if (parser.state === S.CDATA && parser.cdata) { // emitNode(parser, "oncdata", parser.cdata) // parser.cdata = "" // } if (parser.position >= parser.bufferCheckPosition) checkBufferLength(parser) return parser } /*! http://mths.be/fromcodepoint v0.1.0 by @mathias */ if (!String.fromCodePoint) { (function() { var stringFromCharCode = String.fromCharCode; var floor = Math.floor; var fromCodePoint = function() { var MAX_SIZE = 0x4000; var codeUnits = []; var highSurrogate; var lowSurrogate; var index = -1; var length = arguments.length; if (!length) { return ''; } var result = ''; while (++index < length) { var codePoint = Number(arguments[index]); if ( !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity` codePoint < 0 || // not a valid Unicode code point codePoint > 0x10FFFF || // not a valid Unicode code point floor(codePoint) != codePoint // not an integer ) { throw RangeError('Invalid code point: ' + codePoint); } if (codePoint <= 0xFFFF) { // BMP code point codeUnits.push(codePoint); } else { // Astral code point; split in surrogate halves // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae codePoint -= 0x10000; highSurrogate = (codePoint >> 10) + 0xD800; lowSurrogate = (codePoint % 0x400) + 0xDC00; codeUnits.push(highSurrogate, lowSurrogate); } if (index + 1 == length || codeUnits.length > MAX_SIZE) { result += stringFromCharCode.apply(null, codeUnits); codeUnits.length = 0; } } return result; }; if (Object.defineProperty) { Object.defineProperty(String, 'fromCodePoint', { 'value': fromCodePoint, 'configurable': true, 'writable': true }); } else { String.fromCodePoint = fromCodePoint; } }()); } })(typeof exports === "undefined" ? sax = {} : exports);