tokenizer.js 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. module.exports = function(css) {
  2. var TokenType = require('../token-types');
  3. var tokens = [],
  4. urlMode = false,
  5. blockMode = 0,
  6. c, // current character
  7. cn, // next character
  8. pos = 0,
  9. tn = 0,
  10. ln = 1,
  11. col = 1;
  12. var Punctuation = {
  13. ' ': TokenType.Space,
  14. '\n': TokenType.Newline,
  15. '\r': TokenType.Newline,
  16. '\t': TokenType.Tab,
  17. '!': TokenType.ExclamationMark,
  18. '"': TokenType.QuotationMark,
  19. '#': TokenType.NumberSign,
  20. '$': TokenType.DollarSign,
  21. '%': TokenType.PercentSign,
  22. '&': TokenType.Ampersand,
  23. '\'': TokenType.Apostrophe,
  24. '(': TokenType.LeftParenthesis,
  25. ')': TokenType.RightParenthesis,
  26. '*': TokenType.Asterisk,
  27. '+': TokenType.PlusSign,
  28. ',': TokenType.Comma,
  29. '-': TokenType.HyphenMinus,
  30. '.': TokenType.FullStop,
  31. '/': TokenType.Solidus,
  32. ':': TokenType.Colon,
  33. ';': TokenType.Semicolon,
  34. '<': TokenType.LessThanSign,
  35. '=': TokenType.EqualsSign,
  36. '==': TokenType.EqualitySign,
  37. '!=': TokenType.InequalitySign,
  38. '>': TokenType.GreaterThanSign,
  39. '?': TokenType.QuestionMark,
  40. '@': TokenType.CommercialAt,
  41. '[': TokenType.LeftSquareBracket,
  42. ']': TokenType.RightSquareBracket,
  43. '^': TokenType.CircumflexAccent,
  44. '_': TokenType.LowLine,
  45. '{': TokenType.LeftCurlyBracket,
  46. '|': TokenType.VerticalLine,
  47. '}': TokenType.RightCurlyBracket,
  48. '~': TokenType.Tilde
  49. };
  50. /**
  51. * Add a token to the token list
  52. * @param {string} type
  53. * @param {string} value
  54. */
  55. function pushToken(type, value, column) {
  56. tokens.push({
  57. tn: tn++,
  58. ln: ln,
  59. col: column,
  60. type: type,
  61. value: value
  62. });
  63. }
  64. /**
  65. * Check if a character is a decimal digit
  66. * @param {string} c Character
  67. * @returns {boolean}
  68. */
  69. function isDecimalDigit(c) {
  70. return '0123456789'.indexOf(c) >= 0;
  71. }
  72. /**
  73. * Parse spaces
  74. * @param {string} css Unparsed part of CSS string
  75. */
  76. function parseSpaces(css) {
  77. var start = pos;
  78. // Read the string until we meet a non-space character:
  79. for (; pos < css.length; pos++) {
  80. if (css.charAt(pos) !== ' ') break;
  81. }
  82. // Add a substring containing only spaces to tokens:
  83. pushToken(TokenType.Space, css.substring(start, pos--), col);
  84. col += pos - start;
  85. }
  86. /**
  87. * Parse a string within quotes
  88. * @param {string} css Unparsed part of CSS string
  89. * @param {string} q Quote (either `'` or `"`)
  90. */
  91. function parseString(css, q) {
  92. var start = pos;
  93. // Read the string until we meet a matching quote:
  94. for (pos++; pos < css.length; pos++) {
  95. // Skip escaped quotes:
  96. if (css.charAt(pos) === '\\') pos++;
  97. else if (css.charAt(pos) === q) break;
  98. }
  99. // Add the string (including quotes) to tokens:
  100. pushToken(q === '"' ? TokenType.StringDQ : TokenType.StringSQ, css.substring(start, pos + 1), col);
  101. col += pos - start;
  102. }
  103. /**
  104. * Parse numbers
  105. * @param {string} css Unparsed part of CSS string
  106. */
  107. function parseDecimalNumber(css) {
  108. var start = pos;
  109. // Read the string until we meet a character that's not a digit:
  110. for (; pos < css.length; pos++) {
  111. if (!isDecimalDigit(css.charAt(pos))) break;
  112. }
  113. // Add the number to tokens:
  114. pushToken(TokenType.DecimalNumber, css.substring(start, pos--), col);
  115. col += pos - start;
  116. }
  117. /**
  118. * Parse identifier
  119. * @param {string} css Unparsed part of CSS string
  120. */
  121. function parseIdentifier(css) {
  122. var start = pos;
  123. // Skip all opening slashes:
  124. while (css.charAt(pos) === '/') pos++;
  125. // Read the string until we meet a punctuation mark:
  126. for (; pos < css.length; pos++) {
  127. // Skip all '\':
  128. if (css.charAt(pos) === '\\') pos++;
  129. else if (css.charAt(pos) in Punctuation) break;
  130. }
  131. var ident = css.substring(start, pos--);
  132. // Enter url mode if parsed substring is `url`:
  133. urlMode = urlMode || ident === 'url';
  134. // Add identifier to tokens:
  135. pushToken(TokenType.Identifier, ident, col);
  136. col += pos - start;
  137. }
  138. /**
  139. * Parse equality sign
  140. * @param {string} sass Unparsed part of SASS string
  141. */
  142. function parseEquality(css) {
  143. pushToken(TokenType.EqualitySign, '==', col);
  144. pos++;
  145. col++;
  146. }
  147. /**
  148. * Parse inequality sign
  149. * @param {string} sass Unparsed part of SASS string
  150. */
  151. function parseInequality(css) {
  152. pushToken(TokenType.InequalitySign, '!=', col);
  153. pos++;
  154. col++;
  155. }
  156. /**
  157. * Parse a multiline comment
  158. * @param {string} css Unparsed part of CSS string
  159. */
  160. function parseMLComment(css) {
  161. var start = pos;
  162. // Read the string until we meet `*/`.
  163. // Since we already know first 2 characters (`/*`), start reading
  164. // from `pos + 2`:
  165. for (pos += 2; pos < css.length; pos++) {
  166. if (css.charAt(pos) === '*' && css.charAt(pos + 1) === '/') {
  167. pos++;
  168. break;
  169. }
  170. }
  171. // Add full comment (including `/*` and `*/`) to the list of tokens:
  172. var comment = css.substring(start, pos + 1);
  173. pushToken(TokenType.CommentML, comment, col);
  174. var newlines = comment.split('\n');
  175. if (newlines.length > 1) {
  176. ln += newlines.length - 1;
  177. col = newlines[newlines.length - 1].length;
  178. } else {
  179. col += (pos - start);
  180. }
  181. }
  182. /**
  183. * Parse a single line comment
  184. * @param {string} css Unparsed part of CSS string
  185. */
  186. function parseSLComment(css) {
  187. var start = pos;
  188. // Read the string until we meet line break.
  189. // Since we already know first 2 characters (`//`), start reading
  190. // from `pos + 2`:
  191. for (pos += 2; pos < css.length; pos++) {
  192. if (css.charAt(pos) === '\n' || css.charAt(pos) === '\r') {
  193. break;
  194. }
  195. }
  196. // Add comment (including `//` and line break) to the list of tokens:
  197. pushToken(TokenType.CommentSL, css.substring(start, pos--), col);
  198. col += pos - start;
  199. }
  200. /**
  201. * Convert a CSS string to a list of tokens
  202. * @param {string} css CSS string
  203. * @returns {Array} List of tokens
  204. * @private
  205. */
  206. function getTokens(css) {
  207. // Parse string, character by character:
  208. for (pos = 0; pos < css.length; col++, pos++) {
  209. c = css.charAt(pos);
  210. cn = css.charAt(pos + 1);
  211. // If we meet `/*`, it's a start of a multiline comment.
  212. // Parse following characters as a multiline comment:
  213. if (c === '/' && cn === '*') {
  214. parseMLComment(css);
  215. }
  216. // If we meet `//` and it is not a part of url:
  217. else if (!urlMode && c === '/' && cn === '/') {
  218. // If we're currently inside a block, treat `//` as a start
  219. // of identifier. Else treat `//` as a start of a single-line
  220. // comment:
  221. parseSLComment(css);
  222. }
  223. // If current character is a double or single quote, it's a start
  224. // of a string:
  225. else if (c === '"' || c === "'") {
  226. parseString(css, c);
  227. }
  228. // If current character is a space:
  229. else if (c === ' ') {
  230. parseSpaces(css);
  231. }
  232. // If current character is `=`, it must be combined with next `=`
  233. else if (c === '=' && cn === '=') {
  234. parseEquality(css);
  235. }
  236. // If we meet `!=`, this must be inequality
  237. else if (c === '!' && cn === '=') {
  238. parseInequality(css);
  239. }
  240. // If current character is a punctuation mark:
  241. else if (c in Punctuation) {
  242. // Add it to the list of tokens:
  243. pushToken(Punctuation[c], c, col);
  244. if (c === '\n' || c === '\r') {
  245. ln++;
  246. col = 0;
  247. } // Go to next line
  248. if (c === ')') urlMode = false; // exit url mode
  249. if (c === '{') blockMode++; // enter a block
  250. if (c === '}') blockMode--; // exit a block
  251. }
  252. // If current character is a decimal digit:
  253. else if (isDecimalDigit(c)) {
  254. parseDecimalNumber(css);
  255. }
  256. // If current character is anything else:
  257. else {
  258. parseIdentifier(css);
  259. }
  260. }
  261. return tokens;
  262. }
  263. return getTokens(css);
  264. };