tokenizer.js 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. module.exports = function(css) {
  2. var TokenType = require('../token-types');
  3. var tokens = [],
  4. urlMode = false,
  5. blockMode = 0,
  6. pos = 0,
  7. tn = 0,
  8. ln = 1,
  9. col = 1;
  10. var cssLength = 0;
  11. var Punctuation = {
  12. ' ': TokenType.Space,
  13. '\n': TokenType.Newline,
  14. '\r': TokenType.Newline,
  15. '\t': TokenType.Tab,
  16. '!': TokenType.ExclamationMark,
  17. '"': TokenType.QuotationMark,
  18. '#': TokenType.NumberSign,
  19. '$': TokenType.DollarSign,
  20. '%': TokenType.PercentSign,
  21. '&': TokenType.Ampersand,
  22. '\'': TokenType.Apostrophe,
  23. '(': TokenType.LeftParenthesis,
  24. ')': TokenType.RightParenthesis,
  25. '*': TokenType.Asterisk,
  26. '+': TokenType.PlusSign,
  27. ',': TokenType.Comma,
  28. '-': TokenType.HyphenMinus,
  29. '.': TokenType.FullStop,
  30. '/': TokenType.Solidus,
  31. ':': TokenType.Colon,
  32. ';': TokenType.Semicolon,
  33. '<': TokenType.LessThanSign,
  34. '=': TokenType.EqualsSign,
  35. '>': TokenType.GreaterThanSign,
  36. '?': TokenType.QuestionMark,
  37. '@': TokenType.CommercialAt,
  38. '[': TokenType.LeftSquareBracket,
  39. ']': TokenType.RightSquareBracket,
  40. '^': TokenType.CircumflexAccent,
  41. '_': TokenType.LowLine,
  42. '{': TokenType.LeftCurlyBracket,
  43. '|': TokenType.VerticalLine,
  44. '}': TokenType.RightCurlyBracket,
  45. '~': TokenType.Tilde
  46. };
  47. /**
  48. * Add a token to the token list
  49. * @param {string} type
  50. * @param {string} value
  51. */
  52. function pushToken(type, value, column) {
  53. tokens.push({
  54. tn: tn++,
  55. ln: ln,
  56. col: column,
  57. type: type,
  58. value: value
  59. });
  60. }
  61. /**
  62. * Check if a character is a decimal digit
  63. * @param {string} c Character
  64. * @returns {boolean}
  65. */
  66. function isDecimalDigit(c) {
  67. return '0123456789'.indexOf(c) >= 0;
  68. }
  69. /**
  70. * Parse spaces
  71. * @param {string} css Unparsed part of CSS string
  72. */
  73. function parseSpaces(css) {
  74. var start = pos;
  75. // Read the string until we meet a non-space character:
  76. for (; pos < cssLength; pos++) {
  77. if (css.charAt(pos) !== ' ') break;
  78. }
  79. // Add a substring containing only spaces to tokens:
  80. pushToken(TokenType.Space, css.substring(start, pos--), col);
  81. col += (pos - start);
  82. }
  83. /**
  84. * Parse a string within quotes
  85. * @param {string} css Unparsed part of CSS string
  86. * @param {string} q Quote (either `'` or `"`)
  87. */
  88. function parseString(css, q) {
  89. var start = pos;
  90. // Read the string until we meet a matching quote:
  91. for (pos++; pos < cssLength; pos++) {
  92. // Skip escaped quotes:
  93. if (css.charAt(pos) === '\\') pos++;
  94. else if (css.charAt(pos) === q) break;
  95. }
  96. // Add the string (including quotes) to tokens:
  97. pushToken(q === '"' ? TokenType.StringDQ : TokenType.StringSQ, css.substring(start, pos + 1), col);
  98. col += (pos - start);
  99. }
  100. /**
  101. * Parse numbers
  102. * @param {string} css Unparsed part of CSS string
  103. */
  104. function parseDecimalNumber(css) {
  105. var start = pos;
  106. // Read the string until we meet a character that's not a digit:
  107. for (; pos < cssLength; pos++) {
  108. if (!isDecimalDigit(css.charAt(pos))) break;
  109. }
  110. // Add the number to tokens:
  111. pushToken(TokenType.DecimalNumber, css.substring(start, pos--), col);
  112. col += (pos - start);
  113. }
  114. /**
  115. * Parse identifier
  116. * @param {string} css Unparsed part of CSS string
  117. */
  118. function parseIdentifier(css) {
  119. var start = pos;
  120. // Skip all opening slashes:
  121. while (css.charAt(pos) === '/') pos++;
  122. // Read the string until we meet a punctuation mark:
  123. for (; pos < cssLength; pos++) {
  124. // Skip all '\':
  125. if (css.charAt(pos) === '\\') pos++;
  126. else if (Punctuation[css.charAt(pos)]) break;
  127. }
  128. var ident = css.substring(start, pos--);
  129. // Enter url mode if parsed substring is `url`:
  130. urlMode = urlMode || ident === 'url';
  131. // Add identifier to tokens:
  132. pushToken(TokenType.Identifier, ident, col);
  133. col += (pos - start);
  134. }
  135. /**
  136. * Parse a multiline comment
  137. * @param {string} css Unparsed part of CSS string
  138. */
  139. function parseMLComment(css) {
  140. var start = pos;
  141. // Read the string until we meet `*/`.
  142. // Since we already know first 2 characters (`/*`), start reading
  143. // from `pos + 2`:
  144. for (pos = pos + 2; pos < cssLength; pos++) {
  145. if (css.charAt(pos) === '*' && css.charAt(pos + 1) === '/') {
  146. pos++;
  147. break;
  148. }
  149. }
  150. // Add full comment (including `/*` and `*/`) to the list of tokens:
  151. var comment = css.substring(start, pos + 1);
  152. pushToken(TokenType.CommentML, comment, col);
  153. var newlines = comment.split('\n');
  154. if (newlines.length > 1) {
  155. ln += newlines.length - 1;
  156. col = newlines[newlines.length - 1].length;
  157. } else {
  158. col += (pos - start);
  159. }
  160. }
  161. function parseSLComment(css) {
  162. var start = pos;
  163. // Read the string until we meet line break.
  164. // Since we already know first 2 characters (`//`), start reading
  165. // from `pos + 2`:
  166. for (pos+=2; pos < cssLength; pos++) {
  167. if (css.charAt(pos) === '\n' || css.charAt(pos) === '\r') {
  168. break;
  169. }
  170. }
  171. // Add comment (including `//` and line break) to the list of tokens:
  172. pushToken(TokenType.CommentSL, css.substring(start, pos--), col);
  173. col += pos - start;
  174. }
  175. /**
  176. * Convert a CSS string to a list of tokens
  177. * @param {string} css CSS string
  178. * @returns {Array} List of tokens
  179. * @private
  180. */
  181. function getTokens(css) {
  182. var c; // current character
  183. var cn; // next character
  184. cssLength = css.length;
  185. // Parse string, character by character:
  186. for (pos = 0; pos < cssLength; col++, pos++) {
  187. c = css.charAt(pos);
  188. cn = css.charAt(pos + 1);
  189. // If we meet `/*`, it's a start of a multiline comment.
  190. // Parse following characters as a multiline comment:
  191. if (c === '/' && cn === '*') {
  192. parseMLComment(css);
  193. }
  194. // If we meet `//` and it is not a part of url:
  195. else if (!urlMode && c === '/' && cn === '/') {
  196. // If we're currently inside a block, treat `//` as a start
  197. // of identifier. Else treat `//` as a start of a single-line
  198. // comment:
  199. if (blockMode > 0) parseIdentifier(css);
  200. else parseSLComment(css);
  201. }
  202. // If current character is a double or single quote, it's a start
  203. // of a string:
  204. else if (c === '"' || c === "'") {
  205. parseString(css, c);
  206. }
  207. // If current character is a space:
  208. else if (c === ' ') {
  209. parseSpaces(css);
  210. }
  211. // If current character is a punctuation mark:
  212. else if (Punctuation[c]) {
  213. // Add it to the list of tokens:
  214. pushToken(Punctuation[c], c, col);
  215. if (c === '\n' || c === '\r') {
  216. ln++;
  217. col = 0;
  218. } // Go to next line
  219. else if (c === ')') urlMode = false; // exit url mode
  220. else if (c === '{') blockMode++; // enter a block
  221. else if (c === '}') blockMode--; // exit a block
  222. }
  223. // If current character is a decimal digit:
  224. else if (isDecimalDigit(c)) {
  225. parseDecimalNumber(css);
  226. }
  227. // If current character is anything else:
  228. else {
  229. parseIdentifier(css);
  230. }
  231. }
  232. return tokens;
  233. }
  234. return getTokens(css);
  235. };