2066 lines
55 KiB
JavaScript
2066 lines
55 KiB
JavaScript
|
"use strict";
|
||
|
|
||
|
const { isS, isChar, isNameStartChar, isNameChar, S_LIST, NAME_RE } =
|
||
|
require("xmlchars/xml/1.0/ed5");
|
||
|
const { isNCNameStartChar, isNCNameChar, NC_NAME_RE } = require("xmlchars/xmlns/1.0/ed3");
|
||
|
|
||
|
const XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace";
|
||
|
const XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/";
|
||
|
|
||
|
const rootNS = {
|
||
|
__proto__: null,
|
||
|
xml: XML_NAMESPACE,
|
||
|
xmlns: XMLNS_NAMESPACE,
|
||
|
};
|
||
|
|
||
|
const XML_ENTITIES = {
|
||
|
__proto__: null,
|
||
|
amp: "&",
|
||
|
gt: ">",
|
||
|
lt: "<",
|
||
|
quot: "\"",
|
||
|
apos: "'",
|
||
|
};
|
||
|
|
||
|
const S_INITIAL = "sInitial"; // initial state
|
||
|
const S_BEGIN_WHITESPACE = "sBeginWhitespace"; // leading whitespace
|
||
|
const S_DOCTYPE = "sDoctype"; // <!DOCTYPE
|
||
|
const S_DOCTYPE_QUOTE = "sDoctypeQuote"; // <!DOCTYPE "//blah
|
||
|
const S_DTD = "sDTD"; // <!DOCTYPE "//blah" [ ...
|
||
|
const S_DTD_QUOTED = "sDTDQuoted"; // <!DOCTYPE "//blah" [ "foo
|
||
|
const S_DTD_OPEN_WAKA = "sDTDOpenWaka";
|
||
|
const S_DTD_OPEN_WAKA_BANG = "sDTDOpenWakaBang";
|
||
|
const S_DTD_COMMENT = "sDTDComment"; // <!--
|
||
|
const S_DTD_COMMENT_ENDING = "sDTDCommentEnding"; // <!-- blah -
|
||
|
const S_DTD_COMMENT_ENDED = "sDTDCommentEnded"; // <!-- blah --
|
||
|
const S_DTD_PI = "sDTDPI"; // <?
|
||
|
const S_DTD_PI_ENDING = "sDTDPIEnding"; // <?hi "there" ?
|
||
|
const S_TEXT = "sText"; // general stuff
|
||
|
const S_ENTITY = "sEntity"; // & and such
|
||
|
const S_OPEN_WAKA = "sOpenWaka"; // <
|
||
|
const S_OPEN_WAKA_BANG = "sOpenWakaBang"; // <!...
|
||
|
const S_COMMENT = "sComment"; // <!--
|
||
|
const S_COMMENT_ENDING = "sCommentEnding"; // <!-- blah -
|
||
|
const S_COMMENT_ENDED = "sCommentEnded"; // <!-- blah --
|
||
|
const S_CDATA = "sCData"; // <![CDATA[ something
|
||
|
const S_CDATA_ENDING = "sCDataEnding"; // ]
|
||
|
const S_CDATA_ENDING_2 = "sCDataEnding2"; // ]]
|
||
|
const S_PI_FIRST_CHAR = "sPIFirstChar"; // <?hi, first char
|
||
|
const S_PI_REST = "sPIRest"; // <?hi, rest of the name
|
||
|
const S_PI_BODY = "sPIBody"; // <?hi there
|
||
|
const S_PI_ENDING = "sPIEnding"; // <?hi "there" ?
|
||
|
const S_OPEN_TAG = "sOpenTag"; // <strong
|
||
|
const S_OPEN_TAG_SLASH = "sOpenTagSlash"; // <strong /
|
||
|
const S_ATTRIB = "sAttrib"; // <a
|
||
|
const S_ATTRIB_NAME = "sAttribName"; // <a foo
|
||
|
const S_ATTRIB_NAME_SAW_WHITE = "sAttribNameSawWhite"; // <a foo _
|
||
|
const S_ATTRIB_VALUE = "sAttribValue"; // <a foo=
|
||
|
const S_ATTRIB_VALUE_QUOTED = "sAttribValueQuoted"; // <a foo="bar
|
||
|
const S_ATTRIB_VALUE_CLOSED = "sAttribValueClosed"; // <a foo="bar"
|
||
|
const S_ATTRIB_VALUE_UNQUOTED = "sAttribValueUnquoted"; // <a foo=bar
|
||
|
const S_CLOSE_TAG = "sCloseTag"; // </a
|
||
|
const S_CLOSE_TAG_SAW_WHITE = "sCloseTagSawWhite"; // </a >
|
||
|
|
||
|
// These states are internal to sPIBody
|
||
|
const S_XML_DECL_NAME_START = 1; // <?xml
|
||
|
const S_XML_DECL_NAME = 2; // <?xml foo
|
||
|
const S_XML_DECL_EQ = 3; // <?xml foo=
|
||
|
const S_XML_DECL_VALUE_START = 4; // <?xml foo=
|
||
|
const S_XML_DECL_VALUE = 5; // <?xml foo="bar"
|
||
|
|
||
|
/**
|
||
|
* The list of supported events.
|
||
|
*/
|
||
|
exports.EVENTS = [
|
||
|
"text",
|
||
|
"processinginstruction",
|
||
|
"doctype",
|
||
|
"comment",
|
||
|
"opentagstart",
|
||
|
"opentag",
|
||
|
"closetag",
|
||
|
"cdata",
|
||
|
"error",
|
||
|
"end",
|
||
|
"ready",
|
||
|
];
|
||
|
|
||
|
const NL = 0xA;
|
||
|
const CR = 0xD;
|
||
|
const SPACE = 0x20;
|
||
|
const BANG = 0x21;
|
||
|
const DQUOTE = 0x22;
|
||
|
const AMP = 0x26;
|
||
|
const SQUOTE = 0x27;
|
||
|
const MINUS = 0x2D;
|
||
|
const FORWARD_SLASH = 0x2F;
|
||
|
const SEMICOLON = 0x3B;
|
||
|
const LESS = 0x3C;
|
||
|
const EQUAL = 0x3D;
|
||
|
const GREATER = 0x3E;
|
||
|
const QUESTION = 0x3F;
|
||
|
const OPEN_BRACKET = 0x5B;
|
||
|
const CLOSE_BRACKET = 0x5D;
|
||
|
|
||
|
function isQuote(c) {
|
||
|
return c === DQUOTE || c === SQUOTE;
|
||
|
}
|
||
|
|
||
|
const QUOTES = [DQUOTE, SQUOTE];
|
||
|
|
||
|
const DOCTYPE_TERMINATOR = [...QUOTES, OPEN_BRACKET, GREATER];
|
||
|
const DTD_TERMINATOR = [...QUOTES, LESS, CLOSE_BRACKET];
|
||
|
const XML_DECL_NAME_TERMINATOR = [EQUAL, QUESTION, ...S_LIST];
|
||
|
const ATTRIB_VALUE_UNQUOTED_TERMINATOR = [...S_LIST, GREATER, AMP, LESS];
|
||
|
|
||
|
function nsPairCheck(parser, prefix, uri) {
|
||
|
switch (prefix) {
|
||
|
case "xml":
|
||
|
if (uri !== XML_NAMESPACE) {
|
||
|
parser.fail(`xml prefix must be bound to ${XML_NAMESPACE}.`);
|
||
|
}
|
||
|
break;
|
||
|
case "xmlns":
|
||
|
if (uri !== XMLNS_NAMESPACE) {
|
||
|
parser.fail(`xmlns prefix must be bound to ${XMLNS_NAMESPACE}.`);
|
||
|
}
|
||
|
break;
|
||
|
default:
|
||
|
}
|
||
|
|
||
|
switch (uri) {
|
||
|
case XMLNS_NAMESPACE:
|
||
|
parser.fail(prefix === "" ?
|
||
|
`the default namespace may not be set to ${uri}.` :
|
||
|
`may not assign a prefix (even "xmlns") to the URI \
|
||
|
${XMLNS_NAMESPACE}.`);
|
||
|
break;
|
||
|
case XML_NAMESPACE:
|
||
|
switch (prefix) {
|
||
|
case "xml":
|
||
|
// Assinging the XML namespace to "xml" is fine.
|
||
|
break;
|
||
|
case "":
|
||
|
parser.fail(`the default namespace may not be set to ${uri}.`);
|
||
|
break;
|
||
|
default:
|
||
|
parser.fail("may not assign the xml namespace to another prefix.");
|
||
|
}
|
||
|
break;
|
||
|
default:
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
function nsMappingCheck(parser, mapping) {
|
||
|
for (const local of Object.keys(mapping)) {
|
||
|
nsPairCheck(parser, local, mapping[local]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
function isNCName(name) {
|
||
|
return NC_NAME_RE.test(name);
|
||
|
}
|
||
|
|
||
|
function isName(name) {
|
||
|
return NAME_RE.test(name);
|
||
|
}
|
||
|
|
||
|
const FORBIDDEN_START = 0;
|
||
|
const FORBIDDEN_BRACKET = 1;
|
||
|
const FORBIDDEN_BRACKET_BRACKET = 2;
|
||
|
|
||
|
/**
|
||
|
* Data structure for an XML tag.
|
||
|
*
|
||
|
* @typedef {object} SaxesTag
|
||
|
*
|
||
|
* @property {string} name The tag's name. This is the combination of prefix and
|
||
|
* global name. For instance ``<a:b>`` would have ``"a:b"`` for ``name``.
|
||
|
*
|
||
|
* @property {string} prefix The tag's prefix. For instance ``<a:b>`` would have
|
||
|
* ``"a"`` for ``prefix``. Undefined if we do not track namespaces.
|
||
|
*
|
||
|
* @property {string} local The tag's local name. For instance ``<a:b>`` would
|
||
|
* have ``"b"`` for ``local``. Undefined if we do not track namespaces.
|
||
|
*
|
||
|
* @property {string} uri The namespace URI of this tag. Undefined if we do not
|
||
|
* track namespaces.
|
||
|
*
|
||
|
* @property {Object.<string, SaxesAttribute> | Object.<string, string>}
|
||
|
* attributes A map of attribute name to attributes. If namespaces are tracked,
|
||
|
* the values in the map are {@link SaxesAttribute SaxesAttribute}
|
||
|
* objects. Otherwise, they are strings.
|
||
|
*
|
||
|
* @property {Object.<string, string>} ns The namespace bindings in effect.
|
||
|
*
|
||
|
* @property {boolean} isSelfClosing Whether the tag is
|
||
|
* self-closing (e.g. ``<foo/>``).
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
/**
|
||
|
* Data structure for an XML attribute
|
||
|
*
|
||
|
* @typedef {object} SaxesAttribute
|
||
|
*
|
||
|
* @property {string} name The attribute's name. This is the combination of
|
||
|
* prefix and local name. For instance ``a:b="c"`` would have ``a:b`` for name.
|
||
|
*
|
||
|
* @property {string} prefix The attribute's prefix. For instance ``a:b="c"``
|
||
|
* would have ``"a"`` for ``prefix``.
|
||
|
*
|
||
|
* @property {string} local The attribute's local name. For instance ``a:b="c"``
|
||
|
* would have ``"b"`` for ``local``.
|
||
|
*
|
||
|
* @property {string} uri The namespace URI of this attribute.
|
||
|
*
|
||
|
* @property {string} value The attribute's value.
|
||
|
*/
|
||
|
|
||
|
/**
|
||
|
* @typedef XMLDecl
|
||
|
*
|
||
|
* @property {string} [version] The version specified by the XML declaration.
|
||
|
*
|
||
|
* @property {string} [encoding] The encoding specified by the XML declaration.
|
||
|
*
|
||
|
* @property {string} [standalone] The value of the standalone parameter
|
||
|
* specified by the XML declaration.
|
||
|
*/
|
||
|
|
||
|
/**
|
||
|
* @callback ResolvePrefix
|
||
|
*
|
||
|
* @param {string} prefix The prefix to check.
|
||
|
*
|
||
|
* @returns {string|undefined} The URI corresponding to the prefix, if any.
|
||
|
*/
|
||
|
|
||
|
/**
|
||
|
* @typedef SaxesOptions
|
||
|
*
|
||
|
* @property {boolean} [xmlns] Whether to track namespaces. Unset means
|
||
|
* ``false``.
|
||
|
*
|
||
|
* @property {boolean} [fragment] Whether to accept XML fragments. Unset means
|
||
|
* ``false``.
|
||
|
*
|
||
|
* @property {boolean} [additionalNamespaces] A plain object whose key, value
|
||
|
* pairs define namespaces known before parsing the XML file. It is not legal
|
||
|
* to pass bindings for the namespaces ``"xml"`` or ``"xmlns"``.
|
||
|
*
|
||
|
* @property {ResolvePrefix} [resolvePrefix] A function that will be used if the
|
||
|
* parser cannot resolve a namespace prefix on its own.
|
||
|
*
|
||
|
* @property {boolean} [position] Whether to track positions. Unset means
|
||
|
* ``true``.
|
||
|
*
|
||
|
* @property {string} [fileName] A file name to use for error reporting. Leaving
|
||
|
* this unset will report a file name of "undefined". "File name" is a loose
|
||
|
* concept. You could use a URL to some resource, or any descriptive name you
|
||
|
* like.
|
||
|
*/
|
||
|
|
||
|
class SaxesParser {
|
||
|
/**
|
||
|
* @param {SaxesOptions} opt The parser options.
|
||
|
*/
|
||
|
constructor(opt) {
|
||
|
this._init(opt);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Reset the parser state.
|
||
|
*
|
||
|
* @private
|
||
|
*/
|
||
|
_init(opt) {
|
||
|
this.comment = "";
|
||
|
this.openWakaBang = "";
|
||
|
this.text = "";
|
||
|
this.name = "";
|
||
|
this.doctype = "";
|
||
|
this.piTarget = "";
|
||
|
this.piBody = "";
|
||
|
this.entity = "";
|
||
|
this.cdata = "";
|
||
|
this.xmlDeclName = "";
|
||
|
this.xmlDeclValue = "";
|
||
|
|
||
|
/**
|
||
|
* The options passed to the constructor of this parser.
|
||
|
*
|
||
|
* @type {SaxesOptions}
|
||
|
*/
|
||
|
this.opt = opt || {};
|
||
|
|
||
|
/**
|
||
|
* Indicates whether or not the parser is closed. If ``true``, wait for
|
||
|
* the ``ready`` event to write again.
|
||
|
*
|
||
|
* @type {boolean}
|
||
|
*/
|
||
|
this.closed = false;
|
||
|
|
||
|
/**
|
||
|
* The XML declaration for this document.
|
||
|
*
|
||
|
* @type {XMLDecl}
|
||
|
*/
|
||
|
this.xmlDecl = {
|
||
|
version: undefined,
|
||
|
encoding: undefined,
|
||
|
standalone: undefined,
|
||
|
};
|
||
|
|
||
|
this.q = null;
|
||
|
this.tags = [];
|
||
|
this.tag = null;
|
||
|
this.chunk = "";
|
||
|
this.chunkPosition = 0;
|
||
|
this.i = 0;
|
||
|
this.trailingCR = false;
|
||
|
this.forbiddenState = FORBIDDEN_START;
|
||
|
/**
|
||
|
* A map of entity name to expansion.
|
||
|
*
|
||
|
* @type {Object.<string, string>}
|
||
|
*/
|
||
|
this.ENTITIES = Object.create(XML_ENTITIES);
|
||
|
this.attribList = [];
|
||
|
|
||
|
// The logic is organized so as to minimize the need to check
|
||
|
// this.opt.fragment while parsing.
|
||
|
|
||
|
const fragmentOpt = this.fragmentOpt = !!this.opt.fragment;
|
||
|
this.state = fragmentOpt ? S_TEXT : S_INITIAL;
|
||
|
// We want these to be all true if we are dealing with a fragment.
|
||
|
this.reportedTextBeforeRoot = this.reportedTextAfterRoot = this.closedRoot =
|
||
|
this.sawRoot = fragmentOpt;
|
||
|
// An XML declaration is intially possible only when parsing whole
|
||
|
// documents.
|
||
|
this.xmlDeclPossible = !fragmentOpt;
|
||
|
|
||
|
this.piIsXMLDecl = false;
|
||
|
this.xmlDeclState = S_XML_DECL_NAME_START;
|
||
|
this.xmlDeclExpects = ["version"];
|
||
|
this.requiredSeparator = false;
|
||
|
this.entityReturnState = undefined;
|
||
|
const xmlnsOpt = this.xmlnsOpt = !!this.opt.xmlns;
|
||
|
|
||
|
if (xmlnsOpt) {
|
||
|
// This is the function we use to perform name checks on PIs and entities.
|
||
|
// When namespaces are used, colons are not allowed in PI target names or
|
||
|
// entity names. So the check depends on whether namespaces are used. See:
|
||
|
//
|
||
|
// https://www.w3.org/XML/xml-names-19990114-errata.html
|
||
|
// NE08
|
||
|
//
|
||
|
this.nameStartCheck = isNCNameStartChar;
|
||
|
this.nameCheck = isNCNameChar;
|
||
|
this.isName = isNCName;
|
||
|
this.processAttribs = this.processAttribsNS;
|
||
|
this.pushAttrib = this.pushAttribNS;
|
||
|
|
||
|
this.ns = Object.assign({ __proto__: null }, rootNS);
|
||
|
const additional = this.opt.additionalNamespaces;
|
||
|
if (additional) {
|
||
|
nsMappingCheck(this, additional);
|
||
|
Object.assign(this.ns, additional);
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
this.nameStartCheck = isNameStartChar;
|
||
|
this.nameCheck = isNameChar;
|
||
|
this.isName = isName;
|
||
|
this.processAttribs = this.processAttribsPlain;
|
||
|
this.pushAttrib = this.pushAttribPlain;
|
||
|
}
|
||
|
|
||
|
this.trackPosition = this.opt.position !== false;
|
||
|
/** The line number the parser is currently looking at. */
|
||
|
this.line = 1;
|
||
|
|
||
|
/** The column the parser is currently looking at. */
|
||
|
this.column = 0;
|
||
|
|
||
|
this.fileName = this.opt.fileName;
|
||
|
this.onready();
|
||
|
}
|
||
|
|
||
|
/** The stream position the parser is currently looking at. */
|
||
|
get position() {
|
||
|
return this.chunkPosition + this.i;
|
||
|
}
|
||
|
|
||
|
/* eslint-disable class-methods-use-this */
|
||
|
/**
|
||
|
* Event handler for text data. The default implementation is a no-op.
|
||
|
*
|
||
|
* @param {string} text The text data encountered by the parser.
|
||
|
*
|
||
|
*/
|
||
|
ontext() {}
|
||
|
|
||
|
/**
|
||
|
* Event handler for processing instructions. The default implementation is a
|
||
|
* no-op.
|
||
|
*
|
||
|
* @param {{target: string, body: string}} data The target and body of
|
||
|
* the processing instruction.
|
||
|
*/
|
||
|
onprocessinginstruction() {}
|
||
|
|
||
|
/**
|
||
|
* Event handler for doctype. The default implementation is a no-op.
|
||
|
*
|
||
|
* @param {string} doctype The doctype contents.
|
||
|
*/
|
||
|
ondoctype() {}
|
||
|
|
||
|
/**
|
||
|
* Event handler for comments. The default implementation is a no-op.
|
||
|
*
|
||
|
* @param {string} comment The comment contents.
|
||
|
*/
|
||
|
oncomment() {}
|
||
|
|
||
|
/**
|
||
|
* Event handler for the start of an open tag. This is called as soon as we
|
||
|
* have a tag name. The default implementation is a no-op.
|
||
|
*
|
||
|
* @param {SaxesTag} tag The tag.
|
||
|
*/
|
||
|
onopentagstart() {}
|
||
|
|
||
|
/**
|
||
|
* Event handler for an open tag. This is called when the open tag is
|
||
|
* complete. (We've encountered the ">" that ends the open tag.) The default
|
||
|
* implementation is a no-op.
|
||
|
*
|
||
|
* @param {SaxesTag} tag The tag.
|
||
|
*/
|
||
|
onopentag() {}
|
||
|
|
||
|
/**
|
||
|
* Event handler for a close tag. Note that for self-closing tags, this is
|
||
|
* called right after ``onopentag``. The default implementation is a no-op.
|
||
|
*
|
||
|
* @param {SaxesTag} tag The tag.
|
||
|
*/
|
||
|
onclosetag() {}
|
||
|
|
||
|
/**
|
||
|
* Event handler for a CDATA section. This is called when ending the
|
||
|
* CDATA section. The default implementation is a no-op.
|
||
|
*
|
||
|
* @param {string} cdata The contents of the CDATA section.
|
||
|
*/
|
||
|
oncdata() {}
|
||
|
|
||
|
/**
|
||
|
* Event handler for the stream end. This is called when the stream has been
|
||
|
* closed with ``close`` or by passing ``null`` to ``write``. The default
|
||
|
* implementation is a no-op.
|
||
|
*/
|
||
|
onend() {}
|
||
|
|
||
|
/**
|
||
|
* Event handler indicating parser readiness . This is called when the parser
|
||
|
* is ready to parse a new document. The default implementation is a no-op.
|
||
|
*/
|
||
|
onready() {}
|
||
|
|
||
|
/**
|
||
|
* Event handler indicating an error. The default implementation throws the
|
||
|
* error. Override with a no-op handler if you don't want this.
|
||
|
*
|
||
|
* @param {Error} err The error that occurred.
|
||
|
*/
|
||
|
onerror(err) {
|
||
|
throw new Error(err);
|
||
|
}
|
||
|
/* eslint-enable class-methods-use-this */
|
||
|
|
||
|
/**
|
||
|
* Report a parsing error. This method is made public so that client code may
|
||
|
* check for issues that are outside the scope of this project and can report
|
||
|
* errors.
|
||
|
*
|
||
|
* @param {Error} er The error to report.
|
||
|
*
|
||
|
* @returns this
|
||
|
*/
|
||
|
fail(er) {
|
||
|
const message = (this.trackPosition) ?
|
||
|
`${this.fileName}:${this.line}:${this.column}: ${er}` : er;
|
||
|
|
||
|
this.onerror(new Error(message));
|
||
|
return this;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Write a XML data to the parser.
|
||
|
*
|
||
|
* @param {string} chunk The XML data to write.
|
||
|
*
|
||
|
* @returns this
|
||
|
*/
|
||
|
write(chunk) {
|
||
|
if (this.closed) {
|
||
|
return this.fail("cannot write after close; assign an onready handler.");
|
||
|
}
|
||
|
|
||
|
let end = false;
|
||
|
if (chunk === null) {
|
||
|
end = true;
|
||
|
chunk = "";
|
||
|
}
|
||
|
|
||
|
if (typeof chunk === "object") {
|
||
|
chunk = chunk.toString();
|
||
|
}
|
||
|
|
||
|
// We checked if performing a pre-decomposition of the string into an array
|
||
|
// of single complete characters (``Array.from(chunk)``) would be faster
|
||
|
// than the current repeated calls to ``codePointAt``. As of August 2018, it
|
||
|
// isn't. (There may be Node-specific code that would perform faster than
|
||
|
// ``Array.from`` but don't want to be dependent on Node.)
|
||
|
|
||
|
let limit = chunk.length;
|
||
|
|
||
|
if (this.trailingCR) {
|
||
|
// The previous chunk had a trailing cr. We need to handle it now.
|
||
|
chunk = `\r${chunk}`;
|
||
|
}
|
||
|
|
||
|
if (!end && chunk[limit - 1] === CR) {
|
||
|
// The chunk ends with a trailing CR. We cannot know how to handle it
|
||
|
// until we get the next chunk or the end of the stream. So save it for
|
||
|
// later.
|
||
|
limit--;
|
||
|
this.trailingCR = true;
|
||
|
}
|
||
|
this.limit = limit;
|
||
|
|
||
|
this.chunk = chunk;
|
||
|
this.i = 0;
|
||
|
while (this.i < limit) {
|
||
|
this[this.state]();
|
||
|
}
|
||
|
this.chunkPosition += limit;
|
||
|
|
||
|
return end ? this.end() : this;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Close the current stream. Perform final well-formedness checks and reset
|
||
|
* the parser tstate.
|
||
|
*
|
||
|
* @returns this
|
||
|
*/
|
||
|
close() {
|
||
|
return this.write(null);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Get a single code point out of the current chunk. This updates the current
|
||
|
* position if we do position tracking.
|
||
|
*
|
||
|
* @private
|
||
|
*
|
||
|
* @returns {number} The character read.
|
||
|
*/
|
||
|
getCode() {
|
||
|
const { chunk, i } = this;
|
||
|
// Using charCodeAt and handling the surrogates ourselves is faster
|
||
|
// than using codePointAt.
|
||
|
let code = chunk.charCodeAt(i);
|
||
|
|
||
|
let skip = 1;
|
||
|
switch (code) {
|
||
|
case CR:
|
||
|
// We may get NaN if we read past the end of the chunk, which is
|
||
|
// fine.
|
||
|
if (chunk.charCodeAt(i + 1) === NL) {
|
||
|
// A \r\n sequence is converted to \n so we have to skip over the next
|
||
|
// character. We already know it has a size of 1 so ++ is fine here.
|
||
|
skip++;
|
||
|
}
|
||
|
// Otherwise, a \r is just converted to \n, so we don't have to skip
|
||
|
// ahead.
|
||
|
|
||
|
// In either case, \r becomes \n.
|
||
|
code = NL;
|
||
|
/* yes, fall through */
|
||
|
case NL:
|
||
|
this.line++;
|
||
|
this.column = 0;
|
||
|
break;
|
||
|
default:
|
||
|
this.column++;
|
||
|
if (code >= 0xD800 && code <= 0xDBFF) {
|
||
|
code = 0x10000 + ((code - 0xD800) * 0x400) +
|
||
|
(chunk.charCodeAt(i + 1) - 0xDC00);
|
||
|
this.column++;
|
||
|
skip++;
|
||
|
}
|
||
|
|
||
|
if (!isChar(code)) {
|
||
|
this.fail("disallowed character.");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
this.i += skip;
|
||
|
|
||
|
return code;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* @callback CharacterTest
|
||
|
*
|
||
|
* @private
|
||
|
*
|
||
|
* @param {string} c The character to test.
|
||
|
*
|
||
|
* @returns {boolean} ``true`` if the method should continue capturing text,
|
||
|
* ``false`` otherwise.
|
||
|
*/
|
||
|
|
||
|
/**
|
||
|
* Capture characters into a buffer until encountering one of a set of
|
||
|
* characters.
|
||
|
*
|
||
|
* @private
|
||
|
*
|
||
|
* @param {number[]} chars An array of codepoints. Encountering a character in
|
||
|
* the array ends the capture.
|
||
|
*
|
||
|
* @param {string} buffer The name of the buffer to save into.
|
||
|
*
|
||
|
* @return {number|undefined} The character code that made the capture end, or
|
||
|
* ``undefined`` if we hit the end of the chunk.
|
||
|
*/
|
||
|
captureTo(chars, buffer) {
|
||
|
const { chunk, limit, i: start } = this;
|
||
|
while (this.i < limit) {
|
||
|
const c = this.getCode();
|
||
|
if (chars.includes(c)) {
|
||
|
// This is faster than adding codepoints one by one.
|
||
|
this[buffer] += chunk.substring(start,
|
||
|
this.i - (c <= 0xFFFF ? 1 : 2));
|
||
|
return c;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// This is faster than adding codepoints one by one.
|
||
|
this[buffer] += chunk.substring(start);
|
||
|
return undefined;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Capture characters into a buffer until encountering a character.
|
||
|
*
|
||
|
* @private
|
||
|
*
|
||
|
* @param {number} char The codepoint that ends the capture.
|
||
|
*
|
||
|
* @param {string} buffer The name of the buffer to save into.
|
||
|
*
|
||
|
* @return {boolean} ``true`` if we ran into the character. Otherwise, we ran
|
||
|
* into the end of the current chunk.
|
||
|
*/
|
||
|
captureToChar(char, buffer) {
|
||
|
const { chunk, limit, i: start } = this;
|
||
|
while (this.i < limit) {
|
||
|
const c = this.getCode();
|
||
|
if (c === char) {
|
||
|
// This is faster than adding codepoints one by one.
|
||
|
this[buffer] += chunk.substring(start,
|
||
|
this.i - (c <= 0xFFFF ? 1 : 2));
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// This is faster than adding codepoints one by one.
|
||
|
this[buffer] += chunk.substring(start);
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Capture characters that satisfy ``isNameChar`` into the ``name`` field of
|
||
|
* this parser.
|
||
|
*
|
||
|
* @private
|
||
|
*
|
||
|
* @return {number|undefined} The character code that made the test fail, or
|
||
|
* ``undefined`` if we hit the end of the chunk.
|
||
|
*/
|
||
|
captureNameChars() {
|
||
|
const { chunk, limit, i: start } = this;
|
||
|
while (this.i < limit) {
|
||
|
const c = this.getCode();
|
||
|
if (!isNameChar(c)) {
|
||
|
// This is faster than adding codepoints one by one.
|
||
|
this.name += chunk.substring(start,
|
||
|
this.i - (c <= 0xFFFF ? 1 : 2));
|
||
|
return c;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// This is faster than adding codepoints one by one.
|
||
|
this.name += chunk.substring(start);
|
||
|
return undefined;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Capture characters into a buffer while ``this.nameCheck`` run on the
|
||
|
* character read returns true.
|
||
|
*
|
||
|
* @private
|
||
|
*
|
||
|
* @param {string} buffer The name of the buffer to save into.
|
||
|
*
|
||
|
* @return {number|undefined} The character code that made the test fail, or
|
||
|
* ``undefined`` if we hit the end of the chunk.
|
||
|
*/
|
||
|
captureWhileNameCheck(buffer) {
|
||
|
const { chunk, limit, i: start } = this;
|
||
|
while (this.i < limit) {
|
||
|
const c = this.getCode();
|
||
|
if (!this.nameCheck(c)) {
|
||
|
// This is faster than adding codepoints one by one.
|
||
|
this[buffer] += chunk.substring(start,
|
||
|
this.i - (c <= 0xFFFF ? 1 : 2));
|
||
|
return c;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// This is faster than adding codepoints one by one.
|
||
|
this[buffer] += chunk.substring(start);
|
||
|
return undefined;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Skip white spaces.
|
||
|
*
|
||
|
* @private
|
||
|
*
|
||
|
* @return {string|undefined} The character that ended the skip, or
|
||
|
* ``undefined`` if we hit the end of the chunk.
|
||
|
*/
|
||
|
skipSpaces() {
|
||
|
const { limit } = this;
|
||
|
while (this.i < limit) {
|
||
|
const c = this.getCode();
|
||
|
if (!isS(c)) {
|
||
|
return c;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return undefined;
|
||
|
}
|
||
|
|
||
|
// STATE HANDLERS
|
||
|
|
||
|
/** @private */
|
||
|
sInitial() {
|
||
|
// We are essentially peeking at the first character of the chunk. Since
|
||
|
// S_INITIAL can be in effect only when we start working on the first chunk,
|
||
|
// the index at which we must look is necessarily 0. Note also that the
|
||
|
// following tests do not depend on decoding surrogates.
|
||
|
const c = this.chunk.charCodeAt(0);
|
||
|
// If the initial character is 0xFEFF, ignore it.
|
||
|
if (c === 0xFEFF) {
|
||
|
this.i++;
|
||
|
this.column++;
|
||
|
}
|
||
|
else if (isS(c)) {
|
||
|
this.i++;
|
||
|
this.column++;
|
||
|
// An XML declaration cannot appear after initial spaces.
|
||
|
this.xmlDeclPossible = false;
|
||
|
}
|
||
|
|
||
|
this.state = S_BEGIN_WHITESPACE;
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sBeginWhitespace() {
|
||
|
const c = this.skipSpaces();
|
||
|
if (c === LESS) {
|
||
|
this.state = S_OPEN_WAKA;
|
||
|
}
|
||
|
else if (c) {
|
||
|
// have to process this as a text node.
|
||
|
// weird, but happens.
|
||
|
if (!this.reportedTextBeforeRoot) {
|
||
|
this.fail("text data outside of root node.");
|
||
|
this.reportedTextBeforeRoot = true;
|
||
|
}
|
||
|
this.text = String.fromCodePoint(c);
|
||
|
this.state = S_TEXT;
|
||
|
this.xmlDeclPossible = false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sText() {
|
||
|
//
|
||
|
// We did try a version of saxes where the S_TEXT state was split in two
|
||
|
// states: one for text inside the root element, and one for text
|
||
|
// outside. This was avoiding having to test this.tags.length to decide what
|
||
|
// implementation to actually use.
|
||
|
//
|
||
|
// Peformance testing on gigabyte-size files did not show any advantage to
|
||
|
// using the two states solution instead of the current one. Conversely, it
|
||
|
// made the code a bit more complicated elsewhere. For instance, a comment
|
||
|
// can appear before the root element so when a comment ended it was
|
||
|
// necessary to determine whether to return to the S_TEXT state or to the
|
||
|
// new text-outside-root state.
|
||
|
//
|
||
|
if (this.tags.length !== 0) {
|
||
|
this.handleTextInRoot();
|
||
|
}
|
||
|
else {
|
||
|
this.handleTextOutsideRoot();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
handleTextInRoot() {
|
||
|
// This is essentially a specialized version of captureTo which is optimized
|
||
|
// for performing the ]]> check. A previous version of this code, checked
|
||
|
// ``this.text`` for the presence of ]]>. It simplified the code but was
|
||
|
// very costly when character data contained a lot of entities to be parsed.
|
||
|
//
|
||
|
// Since we are using a specialized loop, we also keep track of the presence
|
||
|
// of ]]> in text data. The sequence ]]> is forbidden to appear as-is.
|
||
|
//
|
||
|
const { chunk, limit, i: start } = this;
|
||
|
let { forbiddenState } = this;
|
||
|
let c;
|
||
|
// eslint-disable-next-line no-labels, no-restricted-syntax
|
||
|
scanLoop:
|
||
|
while (this.i < limit) {
|
||
|
const code = this.getCode();
|
||
|
switch (code) {
|
||
|
case LESS:
|
||
|
this.state = S_OPEN_WAKA;
|
||
|
c = code;
|
||
|
forbiddenState = FORBIDDEN_START;
|
||
|
// eslint-disable-next-line no-labels
|
||
|
break scanLoop;
|
||
|
case AMP:
|
||
|
this.state = S_ENTITY;
|
||
|
this.entityReturnState = S_TEXT;
|
||
|
c = code;
|
||
|
forbiddenState = FORBIDDEN_START;
|
||
|
// eslint-disable-next-line no-labels
|
||
|
break scanLoop;
|
||
|
case CLOSE_BRACKET:
|
||
|
switch (forbiddenState) {
|
||
|
case FORBIDDEN_START:
|
||
|
forbiddenState = FORBIDDEN_BRACKET;
|
||
|
break;
|
||
|
case FORBIDDEN_BRACKET:
|
||
|
forbiddenState = FORBIDDEN_BRACKET_BRACKET;
|
||
|
break;
|
||
|
case FORBIDDEN_BRACKET_BRACKET:
|
||
|
break;
|
||
|
default:
|
||
|
throw new Error("impossible state");
|
||
|
}
|
||
|
break;
|
||
|
case GREATER:
|
||
|
if (forbiddenState === FORBIDDEN_BRACKET_BRACKET) {
|
||
|
this.fail("the string \"]]>\" is disallowed in char data.");
|
||
|
}
|
||
|
forbiddenState = FORBIDDEN_START;
|
||
|
break;
|
||
|
default:
|
||
|
forbiddenState = FORBIDDEN_START;
|
||
|
}
|
||
|
}
|
||
|
this.forbiddenState = forbiddenState;
|
||
|
|
||
|
// This is faster than adding codepoints one by one.
|
||
|
this.text += chunk.substring(start,
|
||
|
c === undefined ? undefined :
|
||
|
(this.i - (c <= 0xFFFF ? 1 : 2)));
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
handleTextOutsideRoot() {
|
||
|
// This is essentially a specialized version of captureTo which is optimized
|
||
|
// for performing the ]]> check. A previous version of this code, checked
|
||
|
// ``this.text`` for the presence of ]]>. It simplified the code but was
|
||
|
// very costly when character data contained a lot of entities to be parsed.
|
||
|
//
|
||
|
// Since we are using a specialized loop, we also keep track of the presence
|
||
|
// of non-space characters in the text since these are errors when appearing
|
||
|
// outside the document root element.
|
||
|
//
|
||
|
const { chunk, limit, i: start } = this;
|
||
|
let nonSpace = false;
|
||
|
let c;
|
||
|
// eslint-disable-next-line no-labels, no-restricted-syntax
|
||
|
outRootLoop:
|
||
|
while (this.i < limit) {
|
||
|
const code = this.getCode();
|
||
|
switch (code) {
|
||
|
case LESS:
|
||
|
this.state = S_OPEN_WAKA;
|
||
|
c = code;
|
||
|
// eslint-disable-next-line no-labels
|
||
|
break outRootLoop;
|
||
|
case AMP:
|
||
|
this.state = S_ENTITY;
|
||
|
this.entityReturnState = S_TEXT;
|
||
|
c = code;
|
||
|
nonSpace = true;
|
||
|
// eslint-disable-next-line no-labels
|
||
|
break outRootLoop;
|
||
|
default:
|
||
|
if (!isS(code)) {
|
||
|
nonSpace = true;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// This is faster than adding codepoints one by one.
|
||
|
this.text += chunk.substring(start,
|
||
|
c === undefined ? undefined :
|
||
|
(this.i - (c <= 0xFFFF ? 1 : 2)));
|
||
|
|
||
|
if (!nonSpace) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
// We use the reportedTextBeforeRoot and reportedTextAfterRoot flags
|
||
|
// to avoid reporting errors for every single character that is out of
|
||
|
// place.
|
||
|
if (!this.sawRoot && !this.reportedTextBeforeRoot) {
|
||
|
this.fail("text data outside of root node.");
|
||
|
this.reportedTextBeforeRoot = true;
|
||
|
}
|
||
|
|
||
|
if (this.closedRoot && !this.reportedTextAfterRoot) {
|
||
|
this.fail("text data outside of root node.");
|
||
|
this.reportedTextAfterRoot = true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sOpenWaka() {
|
||
|
const c = this.getCode();
|
||
|
// either a /, ?, !, or text is coming next.
|
||
|
if (isNameStartChar(c)) {
|
||
|
this.state = S_OPEN_TAG;
|
||
|
this.name = String.fromCodePoint(c);
|
||
|
this.xmlDeclPossible = false;
|
||
|
}
|
||
|
else {
|
||
|
switch (c) {
|
||
|
case FORWARD_SLASH:
|
||
|
this.state = S_CLOSE_TAG;
|
||
|
this.xmlDeclPossible = false;
|
||
|
break;
|
||
|
case BANG:
|
||
|
this.state = S_OPEN_WAKA_BANG;
|
||
|
this.openWakaBang = "";
|
||
|
this.xmlDeclPossible = false;
|
||
|
break;
|
||
|
case QUESTION:
|
||
|
this.state = S_PI_FIRST_CHAR;
|
||
|
break;
|
||
|
default:
|
||
|
this.fail("disallowed character in tag name.");
|
||
|
this.state = S_TEXT;
|
||
|
this.xmlDeclPossible = false;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sOpenWakaBang() {
|
||
|
this.openWakaBang += String.fromCodePoint(this.getCode());
|
||
|
switch (this.openWakaBang) {
|
||
|
case "[CDATA[":
|
||
|
if (!this.sawRoot && !this.reportedTextBeforeRoot) {
|
||
|
this.fail("text data outside of root node.");
|
||
|
this.reportedTextBeforeRoot = true;
|
||
|
}
|
||
|
|
||
|
if (this.closedRoot && !this.reportedTextAfterRoot) {
|
||
|
this.fail("text data outside of root node.");
|
||
|
this.reportedTextAfterRoot = true;
|
||
|
}
|
||
|
this.state = S_CDATA;
|
||
|
this.openWakaBang = "";
|
||
|
break;
|
||
|
case "--":
|
||
|
this.state = S_COMMENT;
|
||
|
this.openWakaBang = "";
|
||
|
break;
|
||
|
case "DOCTYPE":
|
||
|
this.state = S_DOCTYPE;
|
||
|
if (this.doctype || this.sawRoot) {
|
||
|
this.fail("inappropriately located doctype declaration.");
|
||
|
}
|
||
|
this.openWakaBang = "";
|
||
|
break;
|
||
|
default:
|
||
|
// 7 happens to be the maximum length of the string that can possibly
|
||
|
// match one of the cases above.
|
||
|
if (this.openWakaBang.length >= 7) {
|
||
|
this.fail("incorrect syntax.");
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sDoctype() {
|
||
|
const c = this.captureTo(DOCTYPE_TERMINATOR, "doctype");
|
||
|
if (c === GREATER) {
|
||
|
this.state = S_TEXT;
|
||
|
if (this.text.length !== 0) {
|
||
|
this.closeText();
|
||
|
}
|
||
|
this.ondoctype(this.doctype);
|
||
|
this.doctype = true; // just remember that we saw it.
|
||
|
}
|
||
|
else if (c) {
|
||
|
this.doctype += String.fromCodePoint(c);
|
||
|
if (c === OPEN_BRACKET) {
|
||
|
this.state = S_DTD;
|
||
|
}
|
||
|
else if (isQuote(c)) {
|
||
|
this.state = S_DOCTYPE_QUOTE;
|
||
|
this.q = c;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sDoctypeQuote() {
|
||
|
const { q } = this;
|
||
|
if (this.captureToChar(q, "doctype")) {
|
||
|
this.doctype += String.fromCodePoint(q);
|
||
|
this.q = null;
|
||
|
this.state = S_DOCTYPE;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sDTD() {
|
||
|
const c = this.captureTo(DTD_TERMINATOR, "doctype");
|
||
|
if (!c) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
this.doctype += String.fromCodePoint(c);
|
||
|
if (c === CLOSE_BRACKET) {
|
||
|
this.state = S_DOCTYPE;
|
||
|
}
|
||
|
else if (c === LESS) {
|
||
|
this.state = S_DTD_OPEN_WAKA;
|
||
|
}
|
||
|
else if (isQuote(c)) {
|
||
|
this.state = S_DTD_QUOTED;
|
||
|
this.q = c;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sDTDQuoted() {
|
||
|
const { q } = this;
|
||
|
if (this.captureToChar(q, "doctype")) {
|
||
|
this.doctype += String.fromCodePoint(q);
|
||
|
this.state = S_DTD;
|
||
|
this.q = null;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sDTDOpenWaka() {
|
||
|
const c = this.getCode();
|
||
|
this.doctype += String.fromCodePoint(c);
|
||
|
switch (c) {
|
||
|
case BANG:
|
||
|
this.state = S_DTD_OPEN_WAKA_BANG;
|
||
|
this.openWakaBang = "";
|
||
|
break;
|
||
|
case QUESTION:
|
||
|
this.state = S_DTD_PI;
|
||
|
break;
|
||
|
default:
|
||
|
this.state = S_DTD;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sDTDOpenWakaBang() {
|
||
|
const char = String.fromCodePoint(this.getCode());
|
||
|
const owb = this.openWakaBang += char;
|
||
|
this.doctype += char;
|
||
|
if (owb !== "-") {
|
||
|
this.state = owb === "--" ? S_DTD_COMMENT : S_DTD;
|
||
|
this.openWakaBang = "";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sDTDComment() {
|
||
|
if (this.captureToChar(MINUS, "doctype")) {
|
||
|
this.doctype += "-";
|
||
|
this.state = S_DTD_COMMENT_ENDING;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sDTDCommentEnding() {
|
||
|
const c = this.getCode();
|
||
|
this.doctype += String.fromCodePoint(c);
|
||
|
this.state = c === MINUS ? S_DTD_COMMENT_ENDED : S_DTD_COMMENT;
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sDTDCommentEnded() {
|
||
|
const c = this.getCode();
|
||
|
this.doctype += String.fromCodePoint(c);
|
||
|
if (c === GREATER) {
|
||
|
this.state = S_DTD;
|
||
|
}
|
||
|
else {
|
||
|
this.fail("malformed comment.");
|
||
|
// <!-- blah -- bloo --> will be recorded as
|
||
|
// a comment of " blah -- bloo "
|
||
|
this.state = S_DTD_COMMENT;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sDTDPI() {
|
||
|
if (this.captureToChar(QUESTION, "doctype")) {
|
||
|
this.doctype += "?";
|
||
|
this.state = S_DTD_PI_ENDING;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sDTDPIEnding() {
|
||
|
const c = this.getCode();
|
||
|
this.doctype += String.fromCodePoint(c);
|
||
|
if (c === GREATER) {
|
||
|
this.state = S_DTD;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sComment() {
|
||
|
if (this.captureToChar(MINUS, "comment")) {
|
||
|
this.state = S_COMMENT_ENDING;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sCommentEnding() {
|
||
|
const c = this.getCode();
|
||
|
if (c === MINUS) {
|
||
|
this.state = S_COMMENT_ENDED;
|
||
|
if (this.text.length !== 0) {
|
||
|
this.closeText();
|
||
|
}
|
||
|
this.oncomment(this.comment);
|
||
|
this.comment = "";
|
||
|
}
|
||
|
else {
|
||
|
this.comment += `-${String.fromCodePoint(c)}`;
|
||
|
this.state = S_COMMENT;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sCommentEnded() {
|
||
|
const c = this.getCode();
|
||
|
if (c !== GREATER) {
|
||
|
this.fail("malformed comment.");
|
||
|
// <!-- blah -- bloo --> will be recorded as
|
||
|
// a comment of " blah -- bloo "
|
||
|
this.comment += `--${String.fromCodePoint(c)}`;
|
||
|
this.state = S_COMMENT;
|
||
|
}
|
||
|
else {
|
||
|
this.state = S_TEXT;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sCData() {
|
||
|
if (this.captureToChar(CLOSE_BRACKET, "cdata")) {
|
||
|
this.state = S_CDATA_ENDING;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sCDataEnding() {
|
||
|
const c = this.getCode();
|
||
|
if (c === CLOSE_BRACKET) {
|
||
|
this.state = S_CDATA_ENDING_2;
|
||
|
}
|
||
|
else {
|
||
|
this.cdata += `]${String.fromCodePoint(c)}`;
|
||
|
this.state = S_CDATA;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sCDataEnding2() {
|
||
|
const c = this.getCode();
|
||
|
switch (c) {
|
||
|
case GREATER:
|
||
|
if (this.text.length !== 0) {
|
||
|
this.closeText();
|
||
|
}
|
||
|
this.oncdata(this.cdata);
|
||
|
this.cdata = "";
|
||
|
this.state = S_TEXT;
|
||
|
break;
|
||
|
case CLOSE_BRACKET:
|
||
|
this.cdata += "]";
|
||
|
break;
|
||
|
default:
|
||
|
this.cdata += `]]${String.fromCodePoint(c)}`;
|
||
|
this.state = S_CDATA;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sPIFirstChar() {
|
||
|
const c = this.getCode();
|
||
|
if (this.nameStartCheck(c)) {
|
||
|
this.piTarget += String.fromCodePoint(c);
|
||
|
this.state = S_PI_REST;
|
||
|
}
|
||
|
else if (c === QUESTION || isS(c)) {
|
||
|
this.fail("processing instruction without a target.");
|
||
|
this.state = c === QUESTION ? S_PI_ENDING : S_PI_BODY;
|
||
|
}
|
||
|
else {
|
||
|
this.fail("disallowed character in processing instruction name.");
|
||
|
this.piTarget += String.fromCodePoint(c);
|
||
|
this.state = S_PI_REST;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sPIRest() {
|
||
|
const c = this.captureWhileNameCheck("piTarget");
|
||
|
if ((c === QUESTION || isS(c))) {
|
||
|
this.piIsXMLDecl = this.piTarget === "xml";
|
||
|
if (this.piIsXMLDecl && !this.xmlDeclPossible) {
|
||
|
this.fail("an XML declaration must be at the start of the document.");
|
||
|
}
|
||
|
this.state = c === QUESTION ? S_PI_ENDING : S_PI_BODY;
|
||
|
}
|
||
|
else if (c) {
|
||
|
this.fail("disallowed character in processing instruction name.");
|
||
|
this.piTarget += String.fromCodePoint(c);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sPIBody() {
|
||
|
let c;
|
||
|
if (this.piIsXMLDecl) {
|
||
|
switch (this.xmlDeclState) {
|
||
|
case S_XML_DECL_NAME_START: {
|
||
|
c = this.getCode();
|
||
|
if (isS(c)) {
|
||
|
c = this.skipSpaces();
|
||
|
}
|
||
|
else if (this.requiredSeparator && c !== QUESTION) {
|
||
|
this.fail("whitespace required.");
|
||
|
}
|
||
|
this.requiredSeparator = false;
|
||
|
|
||
|
// The question mark character is not valid inside any of the XML
|
||
|
// declaration name/value pairs.
|
||
|
if (c === QUESTION) {
|
||
|
this.state = S_PI_ENDING;
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
if (c) {
|
||
|
this.xmlDeclState = S_XML_DECL_NAME;
|
||
|
this.xmlDeclName = String.fromCodePoint(c);
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
case S_XML_DECL_NAME:
|
||
|
c = this.captureTo(XML_DECL_NAME_TERMINATOR, "xmlDeclName");
|
||
|
// The question mark character is not valid inside any of the XML
|
||
|
// declaration name/value pairs.
|
||
|
if (c === QUESTION) {
|
||
|
this.state = S_PI_ENDING;
|
||
|
return;
|
||
|
}
|
||
|
if (isS(c) || c === EQUAL) {
|
||
|
if (!this.xmlDeclExpects.includes(this.xmlDeclName)) {
|
||
|
switch (this.xmlDeclName.length) {
|
||
|
case 0:
|
||
|
this.fail("did not expect any more name/value pairs.");
|
||
|
break;
|
||
|
case 1:
|
||
|
this.fail(`expected the name ${this.xmlDeclExpects[0]}.`);
|
||
|
break;
|
||
|
default:
|
||
|
this.fail(`expected one of ${this.xmlDeclExpects.join(", ")}`);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
this.xmlDeclState = (c === EQUAL) ? S_XML_DECL_VALUE_START :
|
||
|
S_XML_DECL_EQ;
|
||
|
}
|
||
|
break;
|
||
|
case S_XML_DECL_EQ:
|
||
|
c = this.getCode();
|
||
|
// The question mark character is not valid inside any of the XML
|
||
|
// declaration name/value pairs.
|
||
|
if (c === QUESTION) {
|
||
|
this.state = S_PI_ENDING;
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
if (!isS(c)) {
|
||
|
if (c !== EQUAL) {
|
||
|
this.fail("value required.");
|
||
|
}
|
||
|
this.xmlDeclState = S_XML_DECL_VALUE_START;
|
||
|
}
|
||
|
break;
|
||
|
case S_XML_DECL_VALUE_START:
|
||
|
c = this.getCode();
|
||
|
// The question mark character is not valid inside any of the XML
|
||
|
// declaration name/value pairs.
|
||
|
if (c === QUESTION) {
|
||
|
this.state = S_PI_ENDING;
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
if (!isS(c)) {
|
||
|
if (!isQuote(c)) {
|
||
|
this.fail("value must be quoted.");
|
||
|
this.q = SPACE;
|
||
|
}
|
||
|
else {
|
||
|
this.q = c;
|
||
|
}
|
||
|
this.xmlDeclState = S_XML_DECL_VALUE;
|
||
|
}
|
||
|
break;
|
||
|
case S_XML_DECL_VALUE:
|
||
|
c = this.captureTo([this.q, QUESTION], "xmlDeclValue");
|
||
|
|
||
|
// The question mark character is not valid inside any of the XML
|
||
|
// declaration name/value pairs.
|
||
|
if (c === QUESTION) {
|
||
|
this.state = S_PI_ENDING;
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
if (c) {
|
||
|
switch (this.xmlDeclName) {
|
||
|
case "version":
|
||
|
if (!/^1\.[0-9]+$/.test(this.xmlDeclValue)) {
|
||
|
this.fail("version number must match /^1\\.[0-9]+$/.");
|
||
|
}
|
||
|
this.xmlDeclExpects = ["encoding", "standalone"];
|
||
|
this.xmlDecl.version = this.xmlDeclValue;
|
||
|
break;
|
||
|
case "encoding":
|
||
|
if (!/^[A-Za-z][A-Za-z0-9._-]*$/.test(this.xmlDeclValue)) {
|
||
|
this.fail("encoding value must match \
|
||
|
/^[A-Za-z0-9][A-Za-z0-9._-]*$/.");
|
||
|
}
|
||
|
this.xmlDeclExpects = ["standalone"];
|
||
|
this.xmlDecl.encoding = this.xmlDeclValue;
|
||
|
break;
|
||
|
case "standalone":
|
||
|
if (this.xmlDeclValue !== "yes" && this.xmlDeclValue !== "no") {
|
||
|
this.fail("standalone value must match \"yes\" or \"no\".");
|
||
|
}
|
||
|
this.xmlDeclExpects = [];
|
||
|
this.xmlDecl.standalone = this.xmlDeclValue;
|
||
|
break;
|
||
|
default:
|
||
|
// We don't need to raise an error here since we've already
|
||
|
// raised one when checking what name was expected.
|
||
|
}
|
||
|
this.xmlDeclName = this.xmlDeclValue = "";
|
||
|
this.xmlDeclState = S_XML_DECL_NAME_START;
|
||
|
this.requiredSeparator = true;
|
||
|
}
|
||
|
break;
|
||
|
default:
|
||
|
throw new Error(this,
|
||
|
`Unknown XML declaration state: ${this.xmlDeclState}`);
|
||
|
}
|
||
|
}
|
||
|
else if (this.piBody.length === 0) {
|
||
|
c = this.getCode();
|
||
|
if (c === QUESTION) {
|
||
|
this.state = S_PI_ENDING;
|
||
|
}
|
||
|
else if (!isS(c)) {
|
||
|
this.piBody = String.fromCodePoint(c);
|
||
|
}
|
||
|
}
|
||
|
// The question mark character is not valid inside any of the XML
|
||
|
// declaration name/value pairs.
|
||
|
else if (this.captureToChar(QUESTION, "piBody")) {
|
||
|
this.state = S_PI_ENDING;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sPIEnding() {
|
||
|
const c = this.getCode();
|
||
|
if (this.piIsXMLDecl) {
|
||
|
if (c === GREATER) {
|
||
|
if (this.piTarget !== "xml") {
|
||
|
this.fail("processing instructions are not allowed before root.");
|
||
|
}
|
||
|
else if (this.xmlDeclState !== S_XML_DECL_NAME_START) {
|
||
|
this.fail("XML declaration is incomplete.");
|
||
|
}
|
||
|
else if (this.xmlDeclExpects.includes("version")) {
|
||
|
this.fail("XML declaration must contain a version.");
|
||
|
}
|
||
|
this.xmlDeclName = this.xmlDeclValue = "";
|
||
|
this.requiredSeparator = false;
|
||
|
this.piTarget = this.piBody = "";
|
||
|
this.state = S_TEXT;
|
||
|
}
|
||
|
else {
|
||
|
// We got here because the previous character was a ?, but the
|
||
|
// question mark character is not valid inside any of the XML
|
||
|
// declaration name/value pairs.
|
||
|
this.fail(
|
||
|
"The character ? is disallowed anywhere in XML declarations.");
|
||
|
}
|
||
|
}
|
||
|
else if (c === GREATER) {
|
||
|
if (this.piTarget.trim().toLowerCase() === "xml") {
|
||
|
this.fail("the XML declaration must appear at the start of the document.");
|
||
|
}
|
||
|
if (this.text.length !== 0) {
|
||
|
this.closeText();
|
||
|
}
|
||
|
this.onprocessinginstruction({
|
||
|
target: this.piTarget,
|
||
|
body: this.piBody,
|
||
|
});
|
||
|
this.piTarget = this.piBody = "";
|
||
|
this.state = S_TEXT;
|
||
|
}
|
||
|
else if (c === QUESTION) {
|
||
|
// We ran into ?? as part of a processing instruction. We initially
|
||
|
// took the first ? as a sign that the PI was ending, but it is
|
||
|
// not. So we have to add it to the body but we take the new ? as a
|
||
|
// sign that the PI is ending.
|
||
|
this.piBody += "?";
|
||
|
}
|
||
|
else {
|
||
|
this.piBody += `?${String.fromCodePoint(c)}`;
|
||
|
this.state = S_PI_BODY;
|
||
|
}
|
||
|
this.xmlDeclPossible = false;
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sOpenTag() {
|
||
|
const c = this.captureNameChars();
|
||
|
if (!c) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
const tag = this.tag = {
|
||
|
name: this.name,
|
||
|
attributes: Object.create(null),
|
||
|
};
|
||
|
|
||
|
if (this.xmlnsOpt) {
|
||
|
tag.ns = Object.create(null);
|
||
|
}
|
||
|
|
||
|
if (this.text.length !== 0) {
|
||
|
this.closeText();
|
||
|
}
|
||
|
this.onopentagstart(tag);
|
||
|
this.sawRoot = true;
|
||
|
if (!this.fragmentOpt && this.closedRoot) {
|
||
|
this.fail("documents may contain only one root.");
|
||
|
}
|
||
|
|
||
|
switch (c) {
|
||
|
case GREATER:
|
||
|
this.openTag();
|
||
|
break;
|
||
|
case FORWARD_SLASH:
|
||
|
this.state = S_OPEN_TAG_SLASH;
|
||
|
break;
|
||
|
default:
|
||
|
if (!isS(c)) {
|
||
|
this.fail("disallowed character in tag name.");
|
||
|
}
|
||
|
this.state = S_ATTRIB;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sOpenTagSlash() {
|
||
|
const c = this.getCode();
|
||
|
if (c === GREATER) {
|
||
|
this.openSelfClosingTag();
|
||
|
}
|
||
|
else {
|
||
|
this.fail("forward-slash in opening tag not followed by >.");
|
||
|
this.state = S_ATTRIB;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sAttrib() {
|
||
|
const c = this.skipSpaces();
|
||
|
if (!c) {
|
||
|
return;
|
||
|
}
|
||
|
if (isNameStartChar(c)) {
|
||
|
this.name = String.fromCodePoint(c);
|
||
|
this.state = S_ATTRIB_NAME;
|
||
|
}
|
||
|
else if (c === GREATER) {
|
||
|
this.openTag();
|
||
|
}
|
||
|
else if (c === FORWARD_SLASH) {
|
||
|
this.state = S_OPEN_TAG_SLASH;
|
||
|
}
|
||
|
else {
|
||
|
this.fail("disallowed character in attribute name.");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
pushAttribNS(name, value) {
|
||
|
const { prefix, local } = this.qname(name);
|
||
|
this.attribList.push({ name, prefix, local, value, uri: undefined });
|
||
|
if (prefix === "xmlns") {
|
||
|
const trimmed = value.trim();
|
||
|
this.tag.ns[local] = trimmed;
|
||
|
nsPairCheck(this, local, trimmed);
|
||
|
}
|
||
|
else if (name === "xmlns") {
|
||
|
const trimmed = value.trim();
|
||
|
this.tag.ns[""] = trimmed;
|
||
|
nsPairCheck(this, "", trimmed);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
pushAttribPlain(name, value) {
|
||
|
this.attribList.push({ name, value });
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sAttribName() {
|
||
|
const c = this.captureNameChars();
|
||
|
if (c === EQUAL) {
|
||
|
this.state = S_ATTRIB_VALUE;
|
||
|
}
|
||
|
else if (isS(c)) {
|
||
|
this.state = S_ATTRIB_NAME_SAW_WHITE;
|
||
|
}
|
||
|
else if (c === GREATER) {
|
||
|
this.fail("attribute without value.");
|
||
|
this.pushAttrib(this.name, this.name);
|
||
|
this.name = this.text = "";
|
||
|
this.openTag();
|
||
|
}
|
||
|
else if (c) {
|
||
|
this.fail("disallowed character in attribute name.");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sAttribNameSawWhite() {
|
||
|
const c = this.skipSpaces();
|
||
|
if (!c) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
if (c === EQUAL) {
|
||
|
this.state = S_ATTRIB_VALUE;
|
||
|
}
|
||
|
else {
|
||
|
this.fail("attribute without value.");
|
||
|
this.tag.attributes[this.name] = "";
|
||
|
this.text = "";
|
||
|
this.name = "";
|
||
|
if (c === GREATER) {
|
||
|
this.openTag();
|
||
|
}
|
||
|
else if (isNameStartChar(c)) {
|
||
|
this.name = String.fromCodePoint(c);
|
||
|
this.state = S_ATTRIB_NAME;
|
||
|
}
|
||
|
else {
|
||
|
this.fail("disallowed character in attribute name.");
|
||
|
this.state = S_ATTRIB;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sAttribValue() {
|
||
|
const c = this.getCode();
|
||
|
if (isQuote(c)) {
|
||
|
this.q = c;
|
||
|
this.state = S_ATTRIB_VALUE_QUOTED;
|
||
|
}
|
||
|
else if (!isS(c)) {
|
||
|
this.fail("unquoted attribute value.");
|
||
|
this.state = S_ATTRIB_VALUE_UNQUOTED;
|
||
|
this.text = String.fromCodePoint(c);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sAttribValueQuoted() {
|
||
|
// We deliberately do not use captureTo here. The specialized code we use
|
||
|
// here is faster than using captureTo.
|
||
|
const { q } = this;
|
||
|
const { chunk, limit, i: start } = this;
|
||
|
// eslint-disable-next-line no-constant-condition
|
||
|
while (true) {
|
||
|
if (this.i >= limit) {
|
||
|
// This is faster than adding codepoints one by one.
|
||
|
this.text += chunk.substring(start);
|
||
|
return;
|
||
|
}
|
||
|
const code = this.getCode();
|
||
|
if (code === q || code === AMP || code === LESS) {
|
||
|
// This is faster than adding codepoints one by one.
|
||
|
const slice = chunk.substring(start,
|
||
|
this.i - (code <= 0xFFFF ? 1 : 2));
|
||
|
switch (code) {
|
||
|
case q:
|
||
|
this.pushAttrib(this.name, this.text + slice);
|
||
|
this.name = this.text = "";
|
||
|
this.q = null;
|
||
|
this.state = S_ATTRIB_VALUE_CLOSED;
|
||
|
return;
|
||
|
case AMP:
|
||
|
this.text += slice;
|
||
|
this.state = S_ENTITY;
|
||
|
this.entityReturnState = S_ATTRIB_VALUE_QUOTED;
|
||
|
return;
|
||
|
default:
|
||
|
this.text += slice;
|
||
|
this.fail("disallowed character.");
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sAttribValueClosed() {
|
||
|
const c = this.getCode();
|
||
|
if (isS(c)) {
|
||
|
this.state = S_ATTRIB;
|
||
|
}
|
||
|
else if (c === GREATER) {
|
||
|
this.openTag();
|
||
|
}
|
||
|
else if (c === FORWARD_SLASH) {
|
||
|
this.state = S_OPEN_TAG_SLASH;
|
||
|
}
|
||
|
else if (isNameStartChar(c)) {
|
||
|
this.fail("no whitespace between attributes.");
|
||
|
this.name = String.fromCodePoint(c);
|
||
|
this.state = S_ATTRIB_NAME;
|
||
|
}
|
||
|
else {
|
||
|
this.fail("disallowed character in attribute name.");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sAttribValueUnquoted() {
|
||
|
const c = this.captureTo(ATTRIB_VALUE_UNQUOTED_TERMINATOR, "text");
|
||
|
if (c === AMP) {
|
||
|
this.state = S_ENTITY;
|
||
|
this.entityReturnState = S_ATTRIB_VALUE_UNQUOTED;
|
||
|
}
|
||
|
else if (c === LESS) {
|
||
|
this.fail("disallowed character.");
|
||
|
}
|
||
|
else if (c) {
|
||
|
if (this.text.includes("]]>")) {
|
||
|
this.fail("the string \"]]>\" is disallowed in char data.");
|
||
|
}
|
||
|
this.pushAttrib(this.name, this.text);
|
||
|
this.name = this.text = "";
|
||
|
if (c === GREATER) {
|
||
|
this.openTag();
|
||
|
}
|
||
|
else {
|
||
|
this.state = S_ATTRIB;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sCloseTag() {
|
||
|
const c = this.captureNameChars();
|
||
|
if (c === GREATER) {
|
||
|
this.closeTag();
|
||
|
}
|
||
|
else if (isS(c)) {
|
||
|
this.state = S_CLOSE_TAG_SAW_WHITE;
|
||
|
}
|
||
|
else if (c) {
|
||
|
this.fail("disallowed character in closing tag.");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sCloseTagSawWhite() {
|
||
|
const c = this.skipSpaces();
|
||
|
if (c === GREATER) {
|
||
|
this.closeTag();
|
||
|
}
|
||
|
else if (c) {
|
||
|
this.fail("disallowed character in closing tag.");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
sEntity() {
|
||
|
if (this.captureToChar(SEMICOLON, "entity")) {
|
||
|
this.state = this.entityReturnState;
|
||
|
if (this.entity === "") {
|
||
|
this.fail("empty entity name.");
|
||
|
this.text += "&;";
|
||
|
return;
|
||
|
}
|
||
|
this.text += this.parseEntity(this.entity);
|
||
|
this.entity = "";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// END OF STATE HANDLERS
|
||
|
|
||
|
/**
|
||
|
* End parsing. This performs final well-formedness checks and resets the
|
||
|
* parser to a clean state.
|
||
|
*
|
||
|
* @private
|
||
|
*
|
||
|
* @returns this
|
||
|
*/
|
||
|
end() {
|
||
|
if (!this.sawRoot) {
|
||
|
this.fail("document must contain a root element.");
|
||
|
}
|
||
|
const { tags } = this;
|
||
|
while (tags.length > 0) {
|
||
|
const tag = tags.pop();
|
||
|
this.fail(`unclosed tag: ${tag.name}`);
|
||
|
}
|
||
|
if ((this.state !== S_INITIAL) &&
|
||
|
(this.state !== S_TEXT)) {
|
||
|
this.fail("unexpected end.");
|
||
|
}
|
||
|
if (this.text.length !== 0) {
|
||
|
this.closeText();
|
||
|
}
|
||
|
this.closed = true;
|
||
|
this.onend();
|
||
|
this._init(this.opt);
|
||
|
return this;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* If there's text to emit ``ontext``, emit it.
|
||
|
*
|
||
|
* @private
|
||
|
*/
|
||
|
closeText() {
|
||
|
this.ontext(this.text);
|
||
|
this.text = "";
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Resolve a namespace prefix.
|
||
|
*
|
||
|
* @param {string} prefix The prefix to resolve.
|
||
|
*
|
||
|
* @returns {string|undefined} The namespace URI or ``undefined`` if the
|
||
|
* prefix is not defined.
|
||
|
*/
|
||
|
resolve(prefix) {
|
||
|
let uri = this.tag.ns[prefix];
|
||
|
if (uri !== undefined) {
|
||
|
return uri;
|
||
|
}
|
||
|
|
||
|
const { tags } = this;
|
||
|
for (let index = tags.length - 1; index >= 0; index--) {
|
||
|
uri = tags[index].ns[prefix];
|
||
|
if (uri !== undefined) {
|
||
|
return uri;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
uri = this.ns[prefix];
|
||
|
if (uri) {
|
||
|
return uri;
|
||
|
}
|
||
|
|
||
|
const { resolvePrefix } = this.opt;
|
||
|
return resolvePrefix ? resolvePrefix(prefix) : undefined;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Parse a qname into its prefix and local name parts.
|
||
|
*
|
||
|
* @private
|
||
|
*
|
||
|
* @param {string} name The name to parse
|
||
|
*
|
||
|
* @returns {{prefix: string, local: string}}
|
||
|
*/
|
||
|
qname(name) {
|
||
|
const colon = name.indexOf(":");
|
||
|
if (colon === -1) {
|
||
|
return { prefix: "", local: name };
|
||
|
}
|
||
|
|
||
|
const local = name.substring(colon + 1);
|
||
|
const prefix = name.substring(0, colon);
|
||
|
if (prefix === "" || local === "" || local.includes(":")) {
|
||
|
this.fail(`malformed name: ${name}.`);
|
||
|
}
|
||
|
|
||
|
return { prefix, local };
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
processAttribsNS() {
|
||
|
const { tag, attribList } = this;
|
||
|
const { name: tagName, attributes } = tag;
|
||
|
|
||
|
{
|
||
|
// add namespace info to tag
|
||
|
const { prefix, local } = this.qname(tagName);
|
||
|
tag.prefix = prefix;
|
||
|
tag.local = local;
|
||
|
const uri = tag.uri = this.resolve(prefix) || "";
|
||
|
|
||
|
if (prefix) {
|
||
|
if (prefix === "xmlns") {
|
||
|
this.fail("tags may not have \"xmlns\" as prefix.");
|
||
|
}
|
||
|
|
||
|
if (!uri) {
|
||
|
this.fail(`unbound namespace prefix: ${JSON.stringify(prefix)}.`);
|
||
|
tag.uri = prefix;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (attribList.length === 0) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
const seen = new Set();
|
||
|
// Note: do not apply default ns to attributes:
|
||
|
// http://www.w3.org/TR/REC-xml-names/#defaulting
|
||
|
for (const attr of attribList) {
|
||
|
const { name, prefix, local } = attr;
|
||
|
let uri;
|
||
|
let eqname;
|
||
|
if (prefix === "") {
|
||
|
uri = (name === "xmlns") ? XMLNS_NAMESPACE : "";
|
||
|
eqname = name;
|
||
|
}
|
||
|
else {
|
||
|
uri = this.resolve(prefix);
|
||
|
// if there's any attributes with an undefined namespace,
|
||
|
// then fail on them now.
|
||
|
if (!uri) {
|
||
|
this.fail(`unbound namespace prefix: ${JSON.stringify(prefix)}.`);
|
||
|
uri = prefix;
|
||
|
}
|
||
|
eqname = `{${uri}}${local}`;
|
||
|
}
|
||
|
|
||
|
if (seen.has(eqname)) {
|
||
|
this.fail(`duplicate attribute: ${eqname}.`);
|
||
|
}
|
||
|
seen.add(eqname);
|
||
|
|
||
|
attr.uri = uri;
|
||
|
attributes[name] = attr;
|
||
|
}
|
||
|
|
||
|
this.attribList = [];
|
||
|
}
|
||
|
|
||
|
/** @private */
|
||
|
processAttribsPlain() {
|
||
|
const { attribList, tag: { attributes } } = this;
|
||
|
for (const { name, value } of attribList) {
|
||
|
if (attributes[name]) {
|
||
|
this.fail(`duplicate attribute: ${name}.`);
|
||
|
}
|
||
|
attributes[name] = value;
|
||
|
}
|
||
|
|
||
|
this.attribList = [];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Handle a complete open tag. This parser code calls this once it has seen
|
||
|
* the whole tag. This method checks for well-formeness and then emits
|
||
|
* ``onopentag``.
|
||
|
*
|
||
|
* @private
|
||
|
*/
|
||
|
openTag() {
|
||
|
this.processAttribs();
|
||
|
|
||
|
const { tag, tags } = this;
|
||
|
tag.isSelfClosing = false;
|
||
|
|
||
|
// There cannot be any pending text here due to the onopentagstart that was
|
||
|
// necessarily emitted before we get here. So we do not check text.
|
||
|
this.onopentag(tag);
|
||
|
tags.push(tag);
|
||
|
this.state = S_TEXT;
|
||
|
this.name = "";
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Handle a complete self-closing tag. This parser code calls this once it has
|
||
|
* seen the whole tag. This method checks for well-formeness and then emits
|
||
|
* ``onopentag`` and ``onclosetag``.
|
||
|
*
|
||
|
* @private
|
||
|
*/
|
||
|
openSelfClosingTag() {
|
||
|
this.processAttribs();
|
||
|
|
||
|
const { tag, tags } = this;
|
||
|
tag.isSelfClosing = true;
|
||
|
|
||
|
// There cannot be any pending text here due to the onopentagstart that was
|
||
|
// necessarily emitted before we get here. So we do not check text.
|
||
|
this.onopentag(tag);
|
||
|
this.onclosetag(tag);
|
||
|
const top = this.tag = tags[tags.length - 1];
|
||
|
if (!top) {
|
||
|
this.closedRoot = true;
|
||
|
}
|
||
|
this.state = S_TEXT;
|
||
|
this.name = "";
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Handle a complete close tag. This parser code calls this once it has seen
|
||
|
* the whole tag. This method checks for well-formeness and then emits
|
||
|
* ``onclosetag``.
|
||
|
*
|
||
|
* @private
|
||
|
*/
|
||
|
closeTag() {
|
||
|
const { tags, name } = this;
|
||
|
|
||
|
// Our state after this will be S_TEXT, no matter what, and we can clear
|
||
|
// tagName now.
|
||
|
this.state = S_TEXT;
|
||
|
this.name = "";
|
||
|
|
||
|
if (!name) {
|
||
|
this.fail("weird empty close tag.");
|
||
|
this.text += "</>";
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
let l = tags.length;
|
||
|
while (l-- > 0) {
|
||
|
const tag = this.tag = tags.pop();
|
||
|
if (this.text.length !== 0) {
|
||
|
this.closeText();
|
||
|
}
|
||
|
this.onclosetag(tag);
|
||
|
if (tag.name === name) {
|
||
|
break;
|
||
|
}
|
||
|
this.fail("unexpected close tag.");
|
||
|
}
|
||
|
|
||
|
if (l === 0) {
|
||
|
this.closedRoot = true;
|
||
|
}
|
||
|
else if (l < 0) {
|
||
|
this.fail(`unmatched closing tag: ${name}.`);
|
||
|
this.text += `</${name}>`;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Resolves an entity. Makes any necessary well-formedness checks.
|
||
|
*
|
||
|
* @private
|
||
|
*
|
||
|
* @param {string} entity The entity to resolve.
|
||
|
*
|
||
|
* @returns {string} The parsed entity.
|
||
|
*/
|
||
|
parseEntity(entity) {
|
||
|
if (entity[0] !== "#") {
|
||
|
const defined = this.ENTITIES[entity];
|
||
|
if (defined) {
|
||
|
return defined;
|
||
|
}
|
||
|
|
||
|
this.fail(this.isName(entity) ? "undefined entity." :
|
||
|
"disallowed character in entity name.");
|
||
|
return `&${entity};`;
|
||
|
}
|
||
|
|
||
|
let num = NaN;
|
||
|
if (entity[1] === "x" && /^#x[0-9a-f]+$/i.test(entity)) {
|
||
|
num = parseInt(entity.slice(2), 16);
|
||
|
}
|
||
|
else if (/^#[0-9]+$/.test(entity)) {
|
||
|
num = parseInt(entity.slice(1), 10);
|
||
|
}
|
||
|
|
||
|
// The character reference is required to match the CHAR production.
|
||
|
if (!isChar(num)) {
|
||
|
this.fail("malformed character entity.");
|
||
|
return `&${entity};`;
|
||
|
}
|
||
|
|
||
|
return String.fromCodePoint(num);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
exports.SaxesParser = SaxesParser;
|