You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

encode.js 3.0KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. "use strict";
  2. var __importDefault = (this && this.__importDefault) || function (mod) {
  3. return (mod && mod.__esModule) ? mod : { "default": mod };
  4. };
  5. Object.defineProperty(exports, "__esModule", { value: true });
  6. exports.encodeHTML = encodeHTML;
  7. exports.encodeNonAsciiHTML = encodeNonAsciiHTML;
  8. const encode_html_js_1 = __importDefault(require("./generated/encode-html.js"));
  9. const escape_js_1 = require("./escape.js");
  10. const htmlReplacer = /[\t\n\f!-,./:-@[-`{-}\u0080-\uFFFF]/g;
  11. /**
  12. * Encodes all characters in the input using HTML entities. This includes
  13. * characters that are valid ASCII characters in HTML documents, such as `#`.
  14. *
  15. * To get a more compact output, consider using the `encodeNonAsciiHTML`
  16. * function, which will only encode characters that are not valid in HTML
  17. * documents, as well as non-ASCII characters.
  18. *
  19. * If a character has no equivalent entity, a numeric hexadecimal reference
  20. * (eg. `ü`) will be used.
  21. */
  22. function encodeHTML(input) {
  23. return encodeHTMLTrieRe(htmlReplacer, input);
  24. }
  25. /**
  26. * Encodes all non-ASCII characters, as well as characters not valid in HTML
  27. * documents using HTML entities. This function will not encode characters that
  28. * are valid in HTML documents, such as `#`.
  29. *
  30. * If a character has no equivalent entity, a numeric hexadecimal reference
  31. * (eg. `ü`) will be used.
  32. */
  33. function encodeNonAsciiHTML(input) {
  34. return encodeHTMLTrieRe(escape_js_1.xmlReplacer, input);
  35. }
  36. function encodeHTMLTrieRe(regExp, input) {
  37. let returnValue = "";
  38. let lastIndex = 0;
  39. let match;
  40. while ((match = regExp.exec(input)) !== null) {
  41. const { index } = match;
  42. returnValue += input.substring(lastIndex, index);
  43. const char = input.charCodeAt(index);
  44. let next = encode_html_js_1.default.get(char);
  45. if (typeof next === "object") {
  46. // We are in a branch. Try to match the next char.
  47. if (index + 1 < input.length) {
  48. const nextChar = input.charCodeAt(index + 1);
  49. const value = typeof next.n === "number"
  50. ? next.n === nextChar
  51. ? next.o
  52. : undefined
  53. : next.n.get(nextChar);
  54. if (value !== undefined) {
  55. returnValue += value;
  56. lastIndex = regExp.lastIndex += 1;
  57. continue;
  58. }
  59. }
  60. next = next.v;
  61. }
  62. // We might have a tree node without a value; skip and use a numeric entity.
  63. if (next === undefined) {
  64. const cp = (0, escape_js_1.getCodePoint)(input, index);
  65. returnValue += `&#x${cp.toString(16)};`;
  66. // Increase by 1 if we have a surrogate pair
  67. lastIndex = regExp.lastIndex += Number(cp !== char);
  68. }
  69. else {
  70. returnValue += next;
  71. lastIndex = index + 1;
  72. }
  73. }
  74. return returnValue + input.substr(lastIndex);
  75. }
  76. //# sourceMappingURL=encode.js.map