shim.js 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. /* eslint no-bitwise: "off", max-statements: "off", max-lines: "off" */
  2. // Taken from: https://github.com/walling/unorm/blob/master/lib/unorm.js
  3. /*
  4. * UnicodeNormalizer 1.0.0
  5. * Copyright (c) 2008 Matsuza
  6. * Dual licensed under the MIT (MIT-LICENSE.txt) and
  7. * GPL (GPL-LICENSE.txt) licenses.
  8. * $Date: 2008-06-05 16:44:17 +0200 (Thu, 05 Jun 2008) $
  9. * $Rev: 13309 $
  10. */
  11. "use strict";
  12. var primitiveSet = require("../../../object/primitive-set")
  13. , validValue = require("../../../object/valid-value")
  14. , data = require("./_data");
  15. var floor = Math.floor
  16. , forms = primitiveSet("NFC", "NFD", "NFKC", "NFKD")
  17. , DEFAULT_FEATURE = [null, 0, {}]
  18. , CACHE_THRESHOLD = 10
  19. , SBase = 0xac00
  20. , LBase = 0x1100
  21. , VBase = 0x1161
  22. , TBase = 0x11a7
  23. , LCount = 19
  24. , VCount = 21
  25. , TCount = 28
  26. , NCount = VCount * TCount
  27. , SCount = LCount * NCount
  28. , UChar
  29. , cache = {}
  30. , cacheCounter = []
  31. , fromCache
  32. , fromData
  33. , fromCpOnly
  34. , fromRuleBasedJamo
  35. , fromCpFilter
  36. , strategies
  37. , UCharIterator
  38. , RecursDecompIterator
  39. , DecompIterator
  40. , CompIterator
  41. , createIterator
  42. , normalize;
  43. UChar = function (cp, feature) {
  44. this.codepoint = cp;
  45. this.feature = feature;
  46. };
  47. // Strategies
  48. (function () {
  49. for (var i = 0; i <= 0xff; ++i) cacheCounter[i] = 0;
  50. }());
  51. fromCache = function (nextStep, cp, needFeature) {
  52. var ret = cache[cp];
  53. if (!ret) {
  54. ret = nextStep(cp, needFeature);
  55. if (Boolean(ret.feature) && ++cacheCounter[(cp >> 8) & 0xff] > CACHE_THRESHOLD) {
  56. cache[cp] = ret;
  57. }
  58. }
  59. return ret;
  60. };
  61. fromData = function (next, cp) {
  62. var hash = cp & 0xff00, dunit = UChar.udata[hash] || {}, feature = dunit[cp];
  63. return feature ? new UChar(cp, feature) : new UChar(cp, DEFAULT_FEATURE);
  64. };
  65. fromCpOnly = function (next, cp, needFeature) {
  66. return needFeature ? next(cp, needFeature) : new UChar(cp, null);
  67. };
  68. fromRuleBasedJamo = function (next, cp, needFeature) {
  69. var char, base, i, arr, SIndex, TIndex, feature, j;
  70. if (cp < LBase || (LBase + LCount <= cp && cp < SBase) || SBase + SCount < cp) {
  71. return next(cp, needFeature);
  72. }
  73. if (LBase <= cp && cp < LBase + LCount) {
  74. char = {};
  75. base = (cp - LBase) * VCount;
  76. for (i = 0; i < VCount; ++i) {
  77. char[VBase + i] = SBase + TCount * (i + base);
  78. }
  79. arr = new Array(3);
  80. arr[2] = char;
  81. return new UChar(cp, arr);
  82. }
  83. SIndex = cp - SBase;
  84. TIndex = SIndex % TCount;
  85. feature = [];
  86. if (TIndex === 0) {
  87. feature[0] = [LBase + floor(SIndex / NCount), VBase + floor(SIndex % NCount / TCount)];
  88. feature[2] = {};
  89. for (j = 1; j < TCount; ++j) {
  90. feature[2][TBase + j] = cp + j;
  91. }
  92. } else {
  93. feature[0] = [SBase + SIndex - TIndex, TBase + TIndex];
  94. }
  95. return new UChar(cp, feature);
  96. };
  97. fromCpFilter = function (next, cp, needFeature) {
  98. return cp < 60 || (cp > 13311 && cp < 42607)
  99. ? new UChar(cp, DEFAULT_FEATURE)
  100. : next(cp, needFeature);
  101. };
  102. strategies = [fromCpFilter, fromCache, fromCpOnly, fromRuleBasedJamo, fromData];
  103. UChar.fromCharCode = strategies.reduceRight(function (next, strategy) {
  104. return function (cp, needFeature) {
  105. return strategy(next, cp, needFeature);
  106. };
  107. }, null);
  108. UChar.isHighSurrogate = function (cp) {
  109. return cp >= 0xd800 && cp <= 0xdbff;
  110. };
  111. UChar.isLowSurrogate = function (cp) {
  112. return cp >= 0xdc00 && cp <= 0xdfff;
  113. };
  114. UChar.prototype.prepFeature = function () {
  115. if (!this.feature) {
  116. this.feature = UChar.fromCharCode(this.codepoint, true).feature;
  117. }
  118. };
  119. UChar.prototype.toString = function () {
  120. var num;
  121. if (this.codepoint < 0x10000) return String.fromCharCode(this.codepoint);
  122. num = this.codepoint - 0x10000;
  123. return String.fromCharCode(floor(num / 0x400) + 0xd800, num % 0x400 + 0xdc00);
  124. };
  125. UChar.prototype.getDecomp = function () {
  126. this.prepFeature();
  127. return this.feature[0] || null;
  128. };
  129. UChar.prototype.isCompatibility = function () {
  130. this.prepFeature();
  131. return Boolean(this.feature[1]) && this.feature[1] & (1 << 8);
  132. };
  133. UChar.prototype.isExclude = function () {
  134. this.prepFeature();
  135. return Boolean(this.feature[1]) && this.feature[1] & (1 << 9);
  136. };
  137. UChar.prototype.getCanonicalClass = function () {
  138. this.prepFeature();
  139. return this.feature[1] ? this.feature[1] & 0xff : 0;
  140. };
  141. UChar.prototype.getComposite = function (following) {
  142. var cp;
  143. this.prepFeature();
  144. if (!this.feature[2]) return null;
  145. cp = this.feature[2][following.codepoint];
  146. return cp ? UChar.fromCharCode(cp) : null;
  147. };
  148. UCharIterator = function (str) {
  149. this.str = str;
  150. this.cursor = 0;
  151. };
  152. UCharIterator.prototype.next = function () {
  153. if (Boolean(this.str) && this.cursor < this.str.length) {
  154. var cp = this.str.charCodeAt(this.cursor++), d;
  155. if (
  156. UChar.isHighSurrogate(cp) &&
  157. this.cursor < this.str.length &&
  158. UChar.isLowSurrogate(d = this.str.charCodeAt(this.cursor))
  159. ) {
  160. cp = (cp - 0xd800) * 0x400 + (d - 0xdc00) + 0x10000;
  161. ++this.cursor;
  162. }
  163. return UChar.fromCharCode(cp);
  164. }
  165. this.str = null;
  166. return null;
  167. };
  168. RecursDecompIterator = function (it, cano) {
  169. this.it = it;
  170. this.canonical = cano;
  171. this.resBuf = [];
  172. };
  173. RecursDecompIterator.prototype.next = function () {
  174. var recursiveDecomp, uchar;
  175. recursiveDecomp = function (cano, ucharLoc) {
  176. var decomp = ucharLoc.getDecomp(), ret, i, a, j;
  177. if (Boolean(decomp) && !(cano && ucharLoc.isCompatibility())) {
  178. ret = [];
  179. for (i = 0; i < decomp.length; ++i) {
  180. a = recursiveDecomp(cano, UChar.fromCharCode(decomp[i]));
  181. // Ret.concat(a); //<-why does not this work?
  182. // following block is a workaround.
  183. for (j = 0; j < a.length; ++j) ret.push(a[j]);
  184. }
  185. return ret;
  186. }
  187. return [ucharLoc];
  188. };
  189. if (this.resBuf.length === 0) {
  190. uchar = this.it.next();
  191. if (!uchar) return null;
  192. this.resBuf = recursiveDecomp(this.canonical, uchar);
  193. }
  194. return this.resBuf.shift();
  195. };
  196. DecompIterator = function (it) {
  197. this.it = it;
  198. this.resBuf = [];
  199. };
  200. DecompIterator.prototype.next = function () {
  201. var cc, uchar, inspt, uchar2, cc2;
  202. if (this.resBuf.length === 0) {
  203. do {
  204. uchar = this.it.next();
  205. if (!uchar) break;
  206. cc = uchar.getCanonicalClass();
  207. inspt = this.resBuf.length;
  208. if (cc !== 0) {
  209. for (inspt; inspt > 0; --inspt) {
  210. uchar2 = this.resBuf[inspt - 1];
  211. cc2 = uchar2.getCanonicalClass();
  212. // eslint-disable-next-line max-depth
  213. if (cc2 <= cc) break;
  214. }
  215. }
  216. this.resBuf.splice(inspt, 0, uchar);
  217. } while (cc !== 0);
  218. }
  219. return this.resBuf.shift();
  220. };
  221. CompIterator = function (it) {
  222. this.it = it;
  223. this.procBuf = [];
  224. this.resBuf = [];
  225. this.lastClass = null;
  226. };
  227. CompIterator.prototype.next = function () {
  228. var uchar, starter, composite, cc;
  229. while (this.resBuf.length === 0) {
  230. uchar = this.it.next();
  231. if (!uchar) {
  232. this.resBuf = this.procBuf;
  233. this.procBuf = [];
  234. break;
  235. }
  236. if (this.procBuf.length === 0) {
  237. this.lastClass = uchar.getCanonicalClass();
  238. this.procBuf.push(uchar);
  239. } else {
  240. starter = this.procBuf[0];
  241. composite = starter.getComposite(uchar);
  242. cc = uchar.getCanonicalClass();
  243. if (Boolean(composite) && (this.lastClass < cc || this.lastClass === 0)) {
  244. this.procBuf[0] = composite;
  245. } else {
  246. if (cc === 0) {
  247. this.resBuf = this.procBuf;
  248. this.procBuf = [];
  249. }
  250. this.lastClass = cc;
  251. this.procBuf.push(uchar);
  252. }
  253. }
  254. }
  255. return this.resBuf.shift();
  256. };
  257. createIterator = function (mode, str) {
  258. switch (mode) {
  259. case "NFD":
  260. return new DecompIterator(new RecursDecompIterator(new UCharIterator(str), true));
  261. case "NFKD":
  262. return new DecompIterator(new RecursDecompIterator(new UCharIterator(str), false));
  263. case "NFC":
  264. return new CompIterator(
  265. new DecompIterator(new RecursDecompIterator(new UCharIterator(str), true))
  266. );
  267. case "NFKC":
  268. return new CompIterator(
  269. new DecompIterator(new RecursDecompIterator(new UCharIterator(str), false))
  270. );
  271. default:
  272. throw new Error(mode + " is invalid");
  273. }
  274. };
  275. normalize = function (mode, str) {
  276. var it = createIterator(mode, str), ret = "", uchar;
  277. while ((uchar = it.next())) ret += uchar.toString();
  278. return ret;
  279. };
  280. /* Unicode data */
  281. UChar.udata = data;
  282. module.exports = function (/* Form*/) {
  283. var str = String(validValue(this)), form = arguments[0];
  284. if (form === undefined) form = "NFC";
  285. else form = String(form);
  286. if (!forms[form]) throw new RangeError("Invalid normalization form: " + form);
  287. return normalize(form, str);
  288. };