util.js 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. var types = require('./types');
  2. var sets = require('./sets');
  3. // All of these are private and only used by randexp.
  4. // It's assumed that they will always be called with the correct input.
  5. var CTRL = '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^ ?';
  6. var SLSH = { '0': 0, 't': 9, 'n': 10, 'v': 11, 'f': 12, 'r': 13 };
  7. /**
  8. * Finds character representations in str and convert all to
  9. * their respective characters
  10. *
  11. * @param {String} str
  12. * @return {String}
  13. */
  14. exports.strToChars = function(str) {
  15. /* jshint maxlen: false */
  16. var chars_regex = /(\[\\b\])|(\\)?\\(?:u([A-F0-9]{4})|x([A-F0-9]{2})|(0?[0-7]{2})|c([@A-Z\[\\\]\^?])|([0tnvfr]))/g;
  17. str = str.replace(chars_regex, function(s, b, lbs, a16, b16, c8, dctrl, eslsh) {
  18. if (lbs) {
  19. return s;
  20. }
  21. var code = b ? 8 :
  22. a16 ? parseInt(a16, 16) :
  23. b16 ? parseInt(b16, 16) :
  24. c8 ? parseInt(c8, 8) :
  25. dctrl ? CTRL.indexOf(dctrl) :
  26. SLSH[eslsh];
  27. var c = String.fromCharCode(code);
  28. // Escape special regex characters.
  29. if (/[\[\]{}\^$.|?*+()]/.test(c)) {
  30. c = '\\' + c;
  31. }
  32. return c;
  33. });
  34. return str;
  35. };
  36. /**
  37. * turns class into tokens
  38. * reads str until it encounters a ] not preceeded by a \
  39. *
  40. * @param {String} str
  41. * @param {String} regexpStr
  42. * @return {Array.<Array.<Object>, Number>}
  43. */
  44. exports.tokenizeClass = function(str, regexpStr) {
  45. /* jshint maxlen: false */
  46. var tokens = [];
  47. var regexp = /\\(?:(w)|(d)|(s)|(W)|(D)|(S))|((?:(?:\\)(.)|([^\]\\]))-(?:\\)?([^\]]))|(\])|(?:\\)?(.)/g;
  48. var rs, c;
  49. while ((rs = regexp.exec(str)) != null) {
  50. if (rs[1]) {
  51. tokens.push(sets.words());
  52. } else if (rs[2]) {
  53. tokens.push(sets.ints());
  54. } else if (rs[3]) {
  55. tokens.push(sets.whitespace());
  56. } else if (rs[4]) {
  57. tokens.push(sets.notWords());
  58. } else if (rs[5]) {
  59. tokens.push(sets.notInts());
  60. } else if (rs[6]) {
  61. tokens.push(sets.notWhitespace());
  62. } else if (rs[7]) {
  63. tokens.push({
  64. type: types.RANGE,
  65. from: (rs[8] || rs[9]).charCodeAt(0),
  66. to: rs[10].charCodeAt(0),
  67. });
  68. } else if (c = rs[12]) {
  69. tokens.push({
  70. type: types.CHAR,
  71. value: c.charCodeAt(0),
  72. });
  73. } else {
  74. return [tokens, regexp.lastIndex];
  75. }
  76. }
  77. exports.error(regexpStr, 'Unterminated character class');
  78. };
  79. /**
  80. * Shortcut to throw errors.
  81. *
  82. * @param {String} regexp
  83. * @param {String} msg
  84. */
  85. exports.error = function(regexp, msg) {
  86. throw new SyntaxError('Invalid regular expression: /' + regexp + '/: ' + msg);
  87. };