"use strict"; const { DOMParser, XMLSerializer } = require("xmldom"); const { throwXmlTagNotFound } = require("./errors"); const { last, first } = require("./utils"); function parser(tag) { return { get(scope) { if (tag === ".") { return scope; } return scope[tag]; }, }; } function getNearestLeftIndex(parsed, elements, index) { for (let i = index; i >= 0; i--) { const part = parsed[i]; for (let j = 0, len = elements.length; j < len; j++) { const element = elements[j]; if (isStarting(part.value, element)) { return j; } } } return null; } function getNearestRightIndex(parsed, elements, index) { for (let i = index, l = parsed.length; i < l; i++) { const part = parsed[i]; for (let j = 0, len = elements.length; j < len; j++) { const element = elements[j]; if (isEnding(part.value, element)) { return j; } } } return -1; } function getNearestLeft(parsed, elements, index) { const found = getNearestLeftIndex(parsed, elements, index); if (found !== -1) { return elements[found]; } return null; } function getNearestRight(parsed, elements, index) { const found = getNearestRightIndex(parsed, elements, index); if (found !== -1) { return elements[found]; } return null; } function buildNearestCache(postparsed, tags) { return postparsed.reduce(function (cached, part, i) { if (part.type === "tag" && tags.indexOf(part.tag) !== -1) { cached.push({ i, part }); } return cached; }, []); } function getNearestLeftIndexWithCache(index, cache) { if (cache.length === 0) { return -1; } for (let i = 0, len = cache.length; i < len; i++) { const current = cache[i]; const next = cache[i + 1]; if ( current.i < index && (!next || index < next.i) && current.part.position === "start" ) { return i; } } return -1; } function getNearestLeftWithCache(index, cache) { const found = getNearestLeftIndexWithCache(index, cache); if (found !== -1) { return cache[found].part.tag; } return null; } function getNearestRightIndexWithCache(index, cache) { if (cache.length === 0) { return -1; } for (let i = 0, len = cache.length; i < len; i++) { const current = cache[i]; const last = cache[i - 1]; if ( index < current.i && (!last || last.i < index) && current.part.position === "end" ) { return i; } } return -1; } function getNearestRightWithCache(index, cache) { const found = getNearestRightIndexWithCache(index, cache); if (found !== -1) { return cache[found].part.tag; } return null; } function endsWith(str, suffix) { return str.indexOf(suffix, str.length - suffix.length) !== -1; } function startsWith(str, prefix) { return str.substring(0, prefix.length) === prefix; } function unique(arr) { const hash = {}, result = []; for (let i = 0, l = arr.length; i < l; ++i) { if (!hash.hasOwnProperty(arr[i])) { hash[arr[i]] = true; result.push(arr[i]); } } return result; } function chunkBy(parsed, f) { return parsed .reduce( function (chunks, p) { const currentChunk = last(chunks); const res = f(p); if (currentChunk.length === 0) { currentChunk.push(p); return chunks; } if (res === "start") { chunks.push([p]); } else if (res === "end") { currentChunk.push(p); chunks.push([]); } else { currentChunk.push(p); } return chunks; }, [[]] ) .filter(function (p) { return p.length > 0; }); } const defaults = { paragraphLoop: false, nullGetter(part) { if (!part.module) { return "undefined"; } if (part.module === "rawxml") { return ""; } return ""; }, xmlFileNames: [], parser, linebreaks: false, fileTypeConfig: null, delimiters: { start: "{", end: "}", }, }; function mergeObjects() { const resObj = {}; let obj, keys; for (let i = 0; i < arguments.length; i += 1) { obj = arguments[i]; keys = Object.keys(obj); for (let j = 0; j < keys.length; j += 1) { resObj[keys[j]] = obj[keys[j]]; } } return resObj; } function xml2str(xmlNode) { const a = new XMLSerializer(); return a.serializeToString(xmlNode).replace(/xmlns(:[a-z0-9]+)?="" ?/g, ""); } function str2xml(str) { if (str.charCodeAt(0) === 65279) { // BOM sequence str = str.substr(1); } const parser = new DOMParser(); return parser.parseFromString(str, "text/xml"); } const charMap = [ ["&", "&"], ["<", "<"], [">", ">"], ['"', """], ["'", "'"], ]; function escapeRegExp(str) { // to be able to use a string as a regex return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } const charMapRegexes = charMap.map(function ([endChar, startChar]) { return { rstart: new RegExp(escapeRegExp(startChar), "g"), rend: new RegExp(escapeRegExp(endChar), "g"), start: startChar, end: endChar, }; }); function wordToUtf8(string) { let r; for (let i = charMapRegexes.length - 1; i >= 0; i--) { r = charMapRegexes[i]; string = string.replace(r.rstart, r.end); } return string; } function utf8ToWord(string) { if (typeof string !== "string") { string = string.toString(); } let r; for (let i = 0, l = charMapRegexes.length; i < l; i++) { r = charMapRegexes[i]; string = string.replace(r.rend, r.start); } return string; } // This function is written with for loops for performance function concatArrays(arrays) { const result = []; for (let i = 0; i < arrays.length; i++) { const array = arrays[i]; for (let j = 0, len = array.length; j < len; j++) { result.push(array[j]); } } return result; } const spaceRegexp = new RegExp(String.fromCharCode(160), "g"); function convertSpaces(s) { return s.replace(spaceRegexp, " "); } function pregMatchAll(regex, content) { /* regex is a string, content is the content. It returns an array of all matches with their offset, for example: regex=la content=lolalolilala returns: [{array: {0: 'la'},offset: 2},{array: {0: 'la'},offset: 8},{array: {0: 'la'} ,offset: 10}] */ const matchArray = []; let match; while ((match = regex.exec(content)) != null) { matchArray.push({ array: match, offset: match.index }); } return matchArray; } function isEnding(value, element) { return value === ""; } function isStarting(value, element) { return ( value.indexOf("<" + element) === 0 && [">", " "].indexOf(value[element.length + 1]) !== -1 ); } function getRight(parsed, element, index) { const val = getRightOrNull(parsed, element, index); if (val !== null) { return val; } throwXmlTagNotFound({ position: "right", element, parsed, index }); } function getRightOrNull(parsed, elements, index) { if (typeof elements === "string") { elements = [elements]; } let level = 1; for (let i = index, l = parsed.length; i < l; i++) { const part = parsed[i]; for (let j = 0, len = elements.length; j < len; j++) { const element = elements[j]; if (isEnding(part.value, element)) { level--; } if (isStarting(part.value, element)) { level++; } if (level === 0) { return i; } } } return null; } function getLeft(parsed, element, index) { const val = getLeftOrNull(parsed, element, index); if (val !== null) { return val; } throwXmlTagNotFound({ position: "left", element, parsed, index }); } function getLeftOrNull(parsed, elements, index) { if (typeof elements === "string") { elements = [elements]; } let level = 1; for (let i = index; i >= 0; i--) { const part = parsed[i]; for (let j = 0, len = elements.length; j < len; j++) { const element = elements[j]; if (isStarting(part.value, element)) { level--; } if (isEnding(part.value, element)) { level++; } if (level === 0) { return i; } } } return null; } function isTagStart(tagType, { type, tag, position }) { return type === "tag" && tag === tagType && position === "start"; } function isTagEnd(tagType, { type, tag, position }) { return type === "tag" && tag === tagType && position === "end"; } function isParagraphStart(options) { return isTagStart("w:p", options) || isTagStart("a:p", options); } function isParagraphEnd(options) { return isTagEnd("w:p", options) || isTagEnd("a:p", options); } function isTextStart(part) { return part.type === "tag" && part.position === "start" && part.text; } function isTextEnd(part) { return part.type === "tag" && part.position === "end" && part.text; } function isContent(p) { return ( p.type === "placeholder" || (p.type === "content" && p.position === "insidetag") ); } const corruptCharacters = /[\x00-\x08\x0B\x0C\x0E-\x1F]/; // 00 NUL '\0' (null character) // 01 SOH (start of heading) // 02 STX (start of text) // 03 ETX (end of text) // 04 EOT (end of transmission) // 05 ENQ (enquiry) // 06 ACK (acknowledge) // 07 BEL '\a' (bell) // 08 BS '\b' (backspace) // 0B VT '\v' (vertical tab) // 0C FF '\f' (form feed) // 0E SO (shift out) // 0F SI (shift in) // 10 DLE (data link escape) // 11 DC1 (device control 1) // 12 DC2 (device control 2) // 13 DC3 (device control 3) // 14 DC4 (device control 4) // 15 NAK (negative ack.) // 16 SYN (synchronous idle) // 17 ETB (end of trans. blk) // 18 CAN (cancel) // 19 EM (end of medium) // 1A SUB (substitute) // 1B ESC (escape) // 1C FS (file separator) // 1D GS (group separator) // 1E RS (record separator) // 1F US (unit separator) function hasCorruptCharacters(string) { return corruptCharacters.test(string); } function invertMap(map) { return Object.keys(map).reduce(function (invertedMap, key) { const value = map[key]; invertedMap[value] = invertedMap[value] || []; invertedMap[value].push(key); return invertedMap; }, {}); } module.exports = { endsWith, startsWith, getNearestLeft, getNearestRight, getNearestLeftWithCache, getNearestRightWithCache, getNearestLeftIndex, getNearestRightIndex, getNearestLeftIndexWithCache, getNearestRightIndexWithCache, buildNearestCache, isContent, isParagraphStart, isParagraphEnd, isTagStart, isTagEnd, isTextStart, isTextEnd, unique, chunkBy, last, first, mergeObjects, xml2str, str2xml, getRightOrNull, getRight, getLeftOrNull, getLeft, pregMatchAll, convertSpaces, escapeRegExp, charMapRegexes, hasCorruptCharacters, defaults, wordToUtf8, utf8ToWord, concatArrays, invertMap, charMap, };