import { splitAndCapture } from "../utils";
import { CommentNode } from "./comment-node";
import { ElementNode } from "./element-node";
import { EntityNode } from "./entity-node";
import { TextNode } from "./text-node";
/**
 * @private
 * @property {RegExp} htmlRegex
 *
 * The regular expression used to pull out HTML tags from a string. Handles namespaced HTML tags and
 * attribute names, as specified by http://www.w3.org/TR/html-markup/syntax.html.
 *
 * Capturing groups:
 *
 * 1. The "!DOCTYPE" tag name, if a tag is a &lt;!DOCTYPE&gt; tag.
 * 2. If it is an end tag, this group will have the '/'.
 * 3. If it is a comment tag, this group will hold the comment text (i.e.
 *    the text inside the `&lt;!--` and `--&gt;`.
 * 4. The tag name for a tag without attributes (other than the &lt;!DOCTYPE&gt; tag)
 * 5. The tag name for a tag with attributes (other than the &lt;!DOCTYPE&gt; tag)
 */
var htmlRegex = (function () {
    var commentTagRegex = /!--([\s\S]+?)--/, tagNameRegex = /[0-9a-zA-Z][0-9a-zA-Z:]*/, attrNameRegex = /[^\s"'>\/=\x00-\x1F\x7F]+/, // the unicode range accounts for excluding control chars, and the delete char
    attrValueRegex = /(?:"[^"]*?"|'[^']*?'|[^'"=<>`\s]+)/, // double quoted, single quoted, or unquoted attribute values
    optionalAttrValueRegex = '(?:\\s*?=\\s*?' + attrValueRegex.source + ')?'; // optional '=[value]'
    var getNameEqualsValueRegex = function (group) {
        return '(?=(' + attrNameRegex.source + '))\\' + group + optionalAttrValueRegex;
    };
    return new RegExp([
        // for <!DOCTYPE> tag. Ex: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">)
        '(?:',
        '<(!DOCTYPE)',
        // Zero or more attributes following the tag name
        '(?:',
        '\\s+',
        // Either:
        // A. attr="value", or
        // B. "value" alone (To cover example doctype tag: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">)
        // *** Capturing Group 2 - Pseudo-atomic group for attrNameRegex
        '(?:', getNameEqualsValueRegex(2), '|', attrValueRegex.source + ')',
        ')*',
        '>',
        ')',
        '|',
        // All other HTML tags (i.e. tags that are not <!DOCTYPE>)
        '(?:',
        '<(/)?',
        // *** Capturing Group 3: The slash or an empty string. Slash ('/') for end tag, empty string for start or self-closing tag.
        '(?:',
        commentTagRegex.source,
        '|',
        // Handle tag without attributes.
        // Doing this separately from a tag that has attributes
        // to fix a regex time complexity issue seen with the
        // example in https://github.com/gregjacobs/Autolinker.js/issues/172
        '(?:',
        // *** Capturing Group 5 - The tag name for a tag without attributes
        '(' + tagNameRegex.source + ')',
        '\\s*/?',
        ')',
        '|',
        // Handle tag with attributes
        // Doing this separately from a tag with no attributes
        // to fix a regex time complexity issue seen with the
        // example in https://github.com/gregjacobs/Autolinker.js/issues/172
        '(?:',
        // *** Capturing Group 6 - The tag name for a tag with attributes
        '(' + tagNameRegex.source + ')',
        '\\s+',
        // Zero or more attributes following the tag name
        '(?:',
        '(?:\\s+|\\b)',
        // *** Capturing Group 7 - Pseudo-atomic group for attrNameRegex
        getNameEqualsValueRegex(7),
        ')*',
        '\\s*/?',
        ')',
        ')',
        '>',
        ')'
    ].join(""), 'gi');
})();
/**
 * @private
 * @property {RegExp} htmlCharacterEntitiesRegex
 *
 * The regular expression that matches common HTML character entities.
 *
 * Ignoring &amp; as it could be part of a query string -- handling it separately.
 */
var htmlCharacterEntitiesRegex = /(&nbsp;|&#160;|&lt;|&#60;|&gt;|&#62;|&quot;|&#34;|&#39;)/gi;
/**
 * @class Autolinker.htmlParser.HtmlParser
 * @extends Object
 *
 * An HTML parser implementation which simply walks an HTML string and returns an array of
 * {@link Autolinker.htmlParser.HtmlNode HtmlNodes} that represent the basic HTML structure of the input string.
 *
 * Autolinker uses this to only link URLs/emails/mentions within text nodes, effectively ignoring / "walking
 * around" HTML tags.
 */
var HtmlParser = /** @class */ (function () {
    function HtmlParser() {
    }
    /**
     * Parses an HTML string and returns a simple array of {@link Autolinker.htmlParser.HtmlNode HtmlNodes}
     * to represent the HTML structure of the input string.
     *
     * @param {String} html The HTML to parse.
     * @return {Autolinker.htmlParser.HtmlNode[]}
     */
    HtmlParser.prototype.parse = function (html) {
        var currentResult, lastIndex = 0, textAndEntityNodes, nodes = []; // will be the result of the method
        while ((currentResult = htmlRegex.exec(html)) !== null) {
            var tagText = currentResult[0], commentText = currentResult[4], // if we've matched a comment
            tagName = currentResult[1] || currentResult[5] || currentResult[6], // The <!DOCTYPE> tag (ex: "!DOCTYPE"), or another tag (ex: "a" or "img")
            isClosingTag = !!currentResult[3], offset = currentResult.index, inBetweenTagsText = html.substring(lastIndex, offset);
            // Push TextNodes and EntityNodes for any text found between tags
            if (inBetweenTagsText) {
                textAndEntityNodes = this.parseTextAndEntityNodes(lastIndex, inBetweenTagsText);
                nodes.push.apply(nodes, textAndEntityNodes);
            }
            // Push the CommentNode or ElementNode
            if (commentText) {
                nodes.push(this.createCommentNode(offset, tagText, commentText));
            }
            else {
                nodes.push(this.createElementNode(offset, tagText, tagName, isClosingTag));
            }
            lastIndex = offset + tagText.length;
        }
        // Process any remaining text after the last HTML element. Will process all of the text if there were no HTML elements.
        if (lastIndex < html.length) {
            var text = html.substring(lastIndex);
            // Push TextNodes and EntityNodes for any text found between tags
            if (text) {
                textAndEntityNodes = this.parseTextAndEntityNodes(lastIndex, text);
                // Note: the following 3 lines were previously:
                //   nodes.push.apply( nodes, textAndEntityNodes );
                // but this was causing a "Maximum Call Stack Size Exceeded"
                // error on inputs with a large number of html entities.
                textAndEntityNodes.forEach(function (node) { return nodes.push(node); });
            }
        }
        return nodes;
    };
    /**
     * Parses text and HTML entity nodes from a given string. The input string
     * should not have any HTML tags (elements) within it.
     *
     * @private
     * @param {Number} offset The offset of the text node match within the
     *   original HTML string.
     * @param {String} text The string of text to parse. This is from an HTML
     *   text node.
     * @return {Autolinker.htmlParser.HtmlNode[]} An array of HtmlNodes to
     *   represent the {@link Autolinker.htmlParser.TextNode TextNodes} and
     *   {@link Autolinker.htmlParser.EntityNode EntityNodes} found.
     */
    HtmlParser.prototype.parseTextAndEntityNodes = function (offset, text) {
        var nodes = [], textAndEntityTokens = splitAndCapture(text, htmlCharacterEntitiesRegex); // split at HTML entities, but include the HTML entities in the results array
        // Every even numbered token is a TextNode, and every odd numbered token is an EntityNode
        // For example: an input `text` of "Test &quot;this&quot; today" would turn into the
        //   `textAndEntityTokens`: [ 'Test ', '&quot;', 'this', '&quot;', ' today' ]
        for (var i = 0, len = textAndEntityTokens.length; i < len; i += 2) {
            var textToken = textAndEntityTokens[i], entityToken = textAndEntityTokens[i + 1];
            if (textToken) {
                nodes.push(this.createTextNode(offset, textToken));
                offset += textToken.length;
            }
            if (entityToken) {
                nodes.push(this.createEntityNode(offset, entityToken));
                offset += entityToken.length;
            }
        }
        return nodes;
    };
    /**
     * Factory method to create an {@link Autolinker.htmlParser.CommentNode CommentNode}.
     *
     * @private
     * @param {Number} offset The offset of the match within the original HTML
     *   string.
     * @param {String} tagText The full text of the tag (comment) that was
     *   matched, including its &lt;!-- and --&gt;.
     * @param {String} commentText The full text of the comment that was matched.
     */
    HtmlParser.prototype.createCommentNode = function (offset, tagText, commentText) {
        return new CommentNode({
            offset: offset,
            text: tagText,
            comment: commentText.trim()
        });
    };
    /**
     * Factory method to create an {@link Autolinker.htmlParser.ElementNode ElementNode}.
     *
     * @private
     * @param {Number} offset The offset of the match within the original HTML
     *   string.
     * @param {String} tagText The full text of the tag (element) that was
     *   matched, including its attributes.
     * @param {String} tagName The name of the tag. Ex: An &lt;img&gt; tag would
     *   be passed to this method as "img".
     * @param {Boolean} isClosingTag `true` if it's a closing tag, false
     *   otherwise.
     * @return {Autolinker.htmlParser.ElementNode}
     */
    HtmlParser.prototype.createElementNode = function (offset, tagText, tagName, isClosingTag) {
        return new ElementNode({
            offset: offset,
            text: tagText,
            tagName: tagName.toLowerCase(),
            closing: isClosingTag
        });
    };
    /**
     * Factory method to create a {@link Autolinker.htmlParser.EntityNode EntityNode}.
     *
     * @private
     * @param {Number} offset The offset of the match within the original HTML
     *   string.
     * @param {String} text The text that was matched for the HTML entity (such
     *   as '&amp;nbsp;').
     * @return {Autolinker.htmlParser.EntityNode}
     */
    HtmlParser.prototype.createEntityNode = function (offset, text) {
        return new EntityNode({ offset: offset, text: text });
    };
    /**
     * Factory method to create a {@link Autolinker.htmlParser.TextNode TextNode}.
     *
     * @private
     * @param {Number} offset The offset of the match within the original HTML
     *   string.
     * @param {String} text The text that was matched.
     * @return {Autolinker.htmlParser.TextNode}
     */
    HtmlParser.prototype.createTextNode = function (offset, text) {
        return new TextNode({ offset: offset, text: text });
    };
    return HtmlParser;
}());
export { HtmlParser };

//# sourceMappingURL=html-parser.js.map
