const { getUnclosedTagException, getUnopenedTagException, getDuplicateOpenTagException, getDuplicateCloseTagException, throwMalformedXml, throwXmlInvalid, } = require("./errors"); const { concatArrays, isTextStart, isTextEnd } = require("./doc-utils"); const NONE = -2; const EQUAL = 0; const START = -1; const END = 1; function inRange(range, match) { return range[0] <= match.offset && match.offset < range[1]; } function updateInTextTag(part, inTextTag) { if (isTextStart(part)) { if (inTextTag) { throwMalformedXml(part); } return true; } if (isTextEnd(part)) { if (!inTextTag) { throwMalformedXml(part); } return false; } return inTextTag; } function getTag(tag) { let position = ""; let start = 1; let end = tag.indexOf(" "); if (tag[tag.length - 2] === "/") { position = "selfclosing"; if (end === -1) { end = tag.length - 2; } } else if (tag[1] === "/") { start = 2; position = "end"; if (end === -1) { end = tag.length - 1; } } else { position = "start"; if (end === -1) { end = tag.length - 1; } } return { tag: tag.slice(start, end), position, }; } function tagMatcher(content, textMatchArray, othersMatchArray) { let cursor = 0; const contentLength = content.length; const allMatches = concatArrays([ textMatchArray.map(function (tag) { return { tag, text: true }; }), othersMatchArray.map(function (tag) { return { tag, text: false }; }), ]).reduce(function (allMatches, t) { allMatches[t.tag] = t.text; return allMatches; }, {}); const totalMatches = []; while (cursor < contentLength) { cursor = content.indexOf("<", cursor); if (cursor === -1) { break; } const offset = cursor; const nextOpening = content.indexOf("<", cursor + 1); cursor = content.indexOf(">", cursor); if (cursor === -1 || (nextOpening !== -1 && cursor > nextOpening)) { throwXmlInvalid(content, offset); } const tagText = content.slice(offset, cursor + 1); const { tag, position } = getTag(tagText); const text = allMatches[tag]; if (text == null) { continue; } totalMatches.push({ type: "tag", position, text, offset, value: tagText, tag, }); } return totalMatches; } function getDelimiterErrors(delimiterMatches, fullText, ranges) { if (delimiterMatches.length === 0) { return []; } const errors = []; let inDelimiter = false; let lastDelimiterMatch = { offset: 0 }; let xtag; let rangeIndex = 0; delimiterMatches.forEach(function (delimiterMatch) { while (ranges[rangeIndex + 1]) { if (ranges[rangeIndex + 1].offset > delimiterMatch.offset) { break; } rangeIndex++; } xtag = fullText.substr( lastDelimiterMatch.offset, delimiterMatch.offset - lastDelimiterMatch.offset ); if ( (delimiterMatch.position === "start" && inDelimiter) || (delimiterMatch.position === "end" && !inDelimiter) ) { if (delimiterMatch.position === "start") { if ( lastDelimiterMatch.offset + lastDelimiterMatch.length === delimiterMatch.offset ) { xtag = fullText.substr( lastDelimiterMatch.offset, delimiterMatch.offset - lastDelimiterMatch.offset + lastDelimiterMatch.length + 4 ); errors.push( getDuplicateOpenTagException({ xtag, offset: lastDelimiterMatch.offset, }) ); } else { errors.push( getUnclosedTagException({ xtag, offset: lastDelimiterMatch.offset }) ); } delimiterMatch.error = true; } else { if ( lastDelimiterMatch.offset + lastDelimiterMatch.length === delimiterMatch.offset ) { xtag = fullText.substr( lastDelimiterMatch.offset - 4, delimiterMatch.offset - lastDelimiterMatch.offset + 4 + lastDelimiterMatch.length ); errors.push( getDuplicateCloseTagException({ xtag, offset: lastDelimiterMatch.offset, }) ); } else { errors.push( getUnopenedTagException({ xtag, offset: delimiterMatch.offset }) ); } delimiterMatch.error = true; } } else { inDelimiter = !inDelimiter; } lastDelimiterMatch = delimiterMatch; }); const delimiterMatch = { offset: fullText.length }; xtag = fullText.substr( lastDelimiterMatch.offset, delimiterMatch.offset - lastDelimiterMatch.offset ); if (inDelimiter) { errors.push( getUnclosedTagException({ xtag, offset: lastDelimiterMatch.offset }) ); delimiterMatch.error = true; } return errors; } function compareOffsets(startOffset, endOffset) { if (startOffset === -1 && endOffset === -1) { return NONE; } if (startOffset === endOffset) { return EQUAL; } if (startOffset === -1 || endOffset === -1) { return endOffset < startOffset ? START : END; } return startOffset < endOffset ? START : END; } function splitDelimiters(inside) { const newDelimiters = inside.split(" "); if (newDelimiters.length !== 2) { throw new Error("New Delimiters cannot be parsed"); } const [start, end] = newDelimiters; if (start.length === 0 || end.length === 0) { throw new Error("New Delimiters cannot be parsed"); } return [start, end]; } function getAllIndexes(fullText, delimiters) { const indexes = []; let { start, end } = delimiters; let offset = -1; let insideTag = false; while (true) { const startOffset = fullText.indexOf(start, offset + 1); const endOffset = fullText.indexOf(end, offset + 1); let position = null; let len; let compareResult = compareOffsets(startOffset, endOffset); if (compareResult === NONE) { return indexes; } if (compareResult === EQUAL) { if (!insideTag) { compareResult = START; } else { compareResult = END; } } if (compareResult === END) { insideTag = false; offset = endOffset; position = "end"; len = end.length; } if (compareResult === START) { insideTag = true; offset = startOffset; position = "start"; len = start.length; } if (position === "start" && fullText[offset + start.length] === "=") { indexes.push({ offset: startOffset, position: "start", length: start.length, changedelimiter: true, }); const nextEqual = fullText.indexOf("=", offset + start.length + 1); const endOffset = fullText.indexOf(end, nextEqual + 1); indexes.push({ offset: endOffset, position: "end", length: end.length, changedelimiter: true, }); const insideTag = fullText.substr( offset + start.length + 1, nextEqual - offset - start.length - 1 ); [start, end] = splitDelimiters(insideTag); offset = endOffset; continue; } indexes.push({ offset, position, length: len }); } } function parseDelimiters(innerContentParts, delimiters) { const full = innerContentParts.map((p) => p.value).join(""); const delimiterMatches = getAllIndexes(full, delimiters); let offset = 0; const ranges = innerContentParts.map(function (part) { offset += part.value.length; return { offset: offset - part.value.length, lIndex: part.lIndex }; }); const errors = getDelimiterErrors(delimiterMatches, full, ranges); let cutNext = 0; let delimiterIndex = 0; const parsed = ranges.map(function (p, i) { const { offset } = p; const range = [offset, offset + innerContentParts[i].value.length]; const partContent = innerContentParts[i].value; const delimitersInOffset = []; while ( delimiterIndex < delimiterMatches.length && inRange(range, delimiterMatches[delimiterIndex]) ) { delimitersInOffset.push(delimiterMatches[delimiterIndex]); delimiterIndex++; } const parts = []; let cursor = 0; if (cutNext > 0) { cursor = cutNext; cutNext = 0; } let insideDelimiterChange; delimitersInOffset.forEach(function (delimiterInOffset) { const value = partContent.substr( cursor, delimiterInOffset.offset - offset - cursor ); if (value.length > 0) { if (insideDelimiterChange) { if (delimiterInOffset.changedelimiter) { cursor = delimiterInOffset.offset - offset + delimiterInOffset.length; insideDelimiterChange = delimiterInOffset.position === "start"; } return; } parts.push({ type: "content", value, offset: cursor + offset }); cursor += value.length; } const delimiterPart = { type: "delimiter", position: delimiterInOffset.position, offset: cursor + offset, }; if (delimiterInOffset.error) { delimiterPart.error = delimiterInOffset.error; } if (delimiterInOffset.changedelimiter) { insideDelimiterChange = delimiterInOffset.position === "start"; cursor = delimiterInOffset.offset - offset + delimiterInOffset.length; return; } parts.push(delimiterPart); cursor = delimiterInOffset.offset - offset + delimiterInOffset.length; }); cutNext = cursor - partContent.length; const value = partContent.substr(cursor); if (value.length > 0) { parts.push({ type: "content", value, offset }); } return parts; }, this); return { parsed, errors }; } function getContentParts(xmlparsed) { let inTextTag = false; const innerContentParts = []; xmlparsed.forEach(function (part) { inTextTag = updateInTextTag(part, inTextTag); if (inTextTag && part.type === "content") { innerContentParts.push(part); } }); return innerContentParts; } module.exports = { parseDelimiters, parse(xmlparsed, delimiters) { let inTextTag = false; const { parsed: delimiterParsed, errors } = parseDelimiters( getContentParts(xmlparsed), delimiters ); let lexed = []; let index = 0; xmlparsed.forEach(function (part) { inTextTag = updateInTextTag(part, inTextTag); if (part.type === "content") { part.position = inTextTag ? "insidetag" : "outsidetag"; } if (inTextTag && part.type === "content") { Array.prototype.push.apply( lexed, delimiterParsed[index].map(function (p) { if (p.type === "content") { p.position = "insidetag"; } return p; }) ); index++; } else { lexed.push(part); } }); lexed = lexed.map(function (p, i) { p.lIndex = i; return p; }); return { errors, lexed }; }, xmlparse(content, xmltags) { const matches = tagMatcher(content, xmltags.text, xmltags.other); let cursor = 0; const parsed = matches.reduce(function (parsed, match) { const value = content.substr(cursor, match.offset - cursor); if (value.length > 0) { parsed.push({ type: "content", value }); } cursor = match.offset + match.value.length; delete match.offset; if (match.value.length > 0) { parsed.push(match); } return parsed; }, []); const value = content.substr(cursor); if (value.length > 0) { parsed.push({ type: "content", value }); } return parsed; }, };