Lexer.php 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\ExpressionLanguage;
  11. /**
  12. * Lexes an expression.
  13. *
  14. * @author Fabien Potencier <fabien@symfony.com>
  15. */
  16. class Lexer
  17. {
  18. /**
  19. * Tokenizes an expression.
  20. *
  21. * @param string $expression The expression to tokenize
  22. *
  23. * @return TokenStream A token stream instance
  24. *
  25. * @throws SyntaxError
  26. */
  27. public function tokenize($expression)
  28. {
  29. $expression = str_replace(["\r", "\n", "\t", "\v", "\f"], ' ', $expression);
  30. $cursor = 0;
  31. $tokens = [];
  32. $brackets = [];
  33. $end = \strlen($expression);
  34. while ($cursor < $end) {
  35. if (' ' == $expression[$cursor]) {
  36. ++$cursor;
  37. continue;
  38. }
  39. if (preg_match('/[0-9]+(?:\.[0-9]+)?/A', $expression, $match, 0, $cursor)) {
  40. // numbers
  41. $number = (float) $match[0]; // floats
  42. if (preg_match('/^[0-9]+$/', $match[0]) && $number <= PHP_INT_MAX) {
  43. $number = (int) $match[0]; // integers lower than the maximum
  44. }
  45. $tokens[] = new Token(Token::NUMBER_TYPE, $number, $cursor + 1);
  46. $cursor += \strlen($match[0]);
  47. } elseif (false !== strpos('([{', $expression[$cursor])) {
  48. // opening bracket
  49. $brackets[] = [$expression[$cursor], $cursor];
  50. $tokens[] = new Token(Token::PUNCTUATION_TYPE, $expression[$cursor], $cursor + 1);
  51. ++$cursor;
  52. } elseif (false !== strpos(')]}', $expression[$cursor])) {
  53. // closing bracket
  54. if (empty($brackets)) {
  55. throw new SyntaxError(sprintf('Unexpected "%s".', $expression[$cursor]), $cursor, $expression);
  56. }
  57. list($expect, $cur) = array_pop($brackets);
  58. if ($expression[$cursor] != strtr($expect, '([{', ')]}')) {
  59. throw new SyntaxError(sprintf('Unclosed "%s".', $expect), $cur, $expression);
  60. }
  61. $tokens[] = new Token(Token::PUNCTUATION_TYPE, $expression[$cursor], $cursor + 1);
  62. ++$cursor;
  63. } elseif (preg_match('/"([^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As', $expression, $match, 0, $cursor)) {
  64. // strings
  65. $tokens[] = new Token(Token::STRING_TYPE, stripcslashes(substr($match[0], 1, -1)), $cursor + 1);
  66. $cursor += \strlen($match[0]);
  67. } elseif (preg_match('/(?<=^|[\s(])not in(?=[\s(])|\!\=\=|(?<=^|[\s(])not(?=[\s(])|(?<=^|[\s(])and(?=[\s(])|\=\=\=|\>\=|(?<=^|[\s(])or(?=[\s(])|\<\=|\*\*|\.\.|(?<=^|[\s(])in(?=[\s(])|&&|\|\||(?<=^|[\s(])matches|\=\=|\!\=|\*|~|%|\/|\>|\||\!|\^|&|\+|\<|\-/A', $expression, $match, 0, $cursor)) {
  68. // operators
  69. $tokens[] = new Token(Token::OPERATOR_TYPE, $match[0], $cursor + 1);
  70. $cursor += \strlen($match[0]);
  71. } elseif (false !== strpos('.,?:', $expression[$cursor])) {
  72. // punctuation
  73. $tokens[] = new Token(Token::PUNCTUATION_TYPE, $expression[$cursor], $cursor + 1);
  74. ++$cursor;
  75. } elseif (preg_match('/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A', $expression, $match, 0, $cursor)) {
  76. // names
  77. $tokens[] = new Token(Token::NAME_TYPE, $match[0], $cursor + 1);
  78. $cursor += \strlen($match[0]);
  79. } else {
  80. // unlexable
  81. throw new SyntaxError(sprintf('Unexpected character "%s".', $expression[$cursor]), $cursor, $expression);
  82. }
  83. }
  84. $tokens[] = new Token(Token::EOF_TYPE, null, $cursor + 1);
  85. if (!empty($brackets)) {
  86. list($expect, $cur) = array_pop($brackets);
  87. throw new SyntaxError(sprintf('Unclosed "%s".', $expect), $cur, $expression);
  88. }
  89. return new TokenStream($tokens, $expression);
  90. }
  91. }