Parser.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\CssSelector\Parser;
  11. use Symfony\Component\CssSelector\Exception\SyntaxErrorException;
  12. use Symfony\Component\CssSelector\Node;
  13. use Symfony\Component\CssSelector\Parser\Tokenizer\Tokenizer;
  14. /**
  15. * CSS selector parser.
  16. *
  17. * This component is a port of the Python cssselect library,
  18. * which is copyright Ian Bicking, @see https://github.com/SimonSapin/cssselect.
  19. *
  20. * @author Jean-François Simon <jeanfrancois.simon@sensiolabs.com>
  21. *
  22. * @internal
  23. */
  24. class Parser implements ParserInterface
  25. {
  26. /**
  27. * @var Tokenizer
  28. */
  29. private $tokenizer;
  30. /**
  31. * Constructor.
  32. *
  33. * @param null|Tokenizer $tokenizer
  34. */
  35. public function __construct(Tokenizer $tokenizer = null)
  36. {
  37. $this->tokenizer = $tokenizer ?: new Tokenizer();
  38. }
  39. /**
  40. * {@inheritdoc}
  41. */
  42. public function parse($source)
  43. {
  44. $reader = new Reader($source);
  45. $stream = $this->tokenizer->tokenize($reader);
  46. return $this->parseSelectorList($stream);
  47. }
  48. /**
  49. * Parses the arguments for ":nth-child()" and friends.
  50. *
  51. * @param Token[] $tokens
  52. *
  53. * @throws SyntaxErrorException
  54. *
  55. * @return array
  56. */
  57. public static function parseSeries(array $tokens)
  58. {
  59. foreach ($tokens as $token) {
  60. if ($token->isString()) {
  61. throw SyntaxErrorException::stringAsFunctionArgument();
  62. }
  63. }
  64. $joined = trim(implode('', array_map(function (Token $token) {
  65. return $token->getValue();
  66. }, $tokens)));
  67. $int = function ($string) {
  68. if (!is_numeric($string)) {
  69. throw SyntaxErrorException::stringAsFunctionArgument();
  70. }
  71. return (int) $string;
  72. };
  73. switch (true) {
  74. case 'odd' === $joined:
  75. return array(2, 1);
  76. case 'even' === $joined:
  77. return array(2, 0);
  78. case 'n' === $joined:
  79. return array(1, 0);
  80. case false === strpos($joined, 'n'):
  81. return array(0, $int($joined));
  82. }
  83. $split = explode('n', $joined);
  84. $first = isset($split[0]) ? $split[0] : null;
  85. return array(
  86. $first ? ('-' === $first || '+' === $first ? $int($first.'1') : $int($first)) : 1,
  87. isset($split[1]) && $split[1] ? $int($split[1]) : 0,
  88. );
  89. }
  90. /**
  91. * Parses selector nodes.
  92. *
  93. * @param TokenStream $stream
  94. *
  95. * @return array
  96. */
  97. private function parseSelectorList(TokenStream $stream)
  98. {
  99. $stream->skipWhitespace();
  100. $selectors = array();
  101. while (true) {
  102. $selectors[] = $this->parserSelectorNode($stream);
  103. if ($stream->getPeek()->isDelimiter(array(','))) {
  104. $stream->getNext();
  105. $stream->skipWhitespace();
  106. } else {
  107. break;
  108. }
  109. }
  110. return $selectors;
  111. }
  112. /**
  113. * Parses next selector or combined node.
  114. *
  115. * @param TokenStream $stream
  116. *
  117. * @throws SyntaxErrorException
  118. *
  119. * @return Node\SelectorNode
  120. */
  121. private function parserSelectorNode(TokenStream $stream)
  122. {
  123. list($result, $pseudoElement) = $this->parseSimpleSelector($stream);
  124. while (true) {
  125. $stream->skipWhitespace();
  126. $peek = $stream->getPeek();
  127. if ($peek->isFileEnd() || $peek->isDelimiter(array(','))) {
  128. break;
  129. }
  130. if (null !== $pseudoElement) {
  131. throw SyntaxErrorException::pseudoElementFound($pseudoElement, 'not at the end of a selector');
  132. }
  133. if ($peek->isDelimiter(array('+', '>', '~'))) {
  134. $combinator = $stream->getNext()->getValue();
  135. $stream->skipWhitespace();
  136. } else {
  137. $combinator = ' ';
  138. }
  139. list($nextSelector, $pseudoElement) = $this->parseSimpleSelector($stream);
  140. $result = new Node\CombinedSelectorNode($result, $combinator, $nextSelector);
  141. }
  142. return new Node\SelectorNode($result, $pseudoElement);
  143. }
  144. /**
  145. * Parses next simple node (hash, class, pseudo, negation).
  146. *
  147. * @param TokenStream $stream
  148. * @param bool $insideNegation
  149. *
  150. * @throws SyntaxErrorException
  151. *
  152. * @return array
  153. */
  154. private function parseSimpleSelector(TokenStream $stream, $insideNegation = false)
  155. {
  156. $stream->skipWhitespace();
  157. $selectorStart = count($stream->getUsed());
  158. $result = $this->parseElementNode($stream);
  159. $pseudoElement = null;
  160. while (true) {
  161. $peek = $stream->getPeek();
  162. if ($peek->isWhitespace()
  163. || $peek->isFileEnd()
  164. || $peek->isDelimiter(array(',', '+', '>', '~'))
  165. || ($insideNegation && $peek->isDelimiter(array(')')))
  166. ) {
  167. break;
  168. }
  169. if (null !== $pseudoElement) {
  170. throw SyntaxErrorException::pseudoElementFound($pseudoElement, 'not at the end of a selector');
  171. }
  172. if ($peek->isHash()) {
  173. $result = new Node\HashNode($result, $stream->getNext()->getValue());
  174. } elseif ($peek->isDelimiter(array('.'))) {
  175. $stream->getNext();
  176. $result = new Node\ClassNode($result, $stream->getNextIdentifier());
  177. } elseif ($peek->isDelimiter(array('['))) {
  178. $stream->getNext();
  179. $result = $this->parseAttributeNode($result, $stream);
  180. } elseif ($peek->isDelimiter(array(':'))) {
  181. $stream->getNext();
  182. if ($stream->getPeek()->isDelimiter(array(':'))) {
  183. $stream->getNext();
  184. $pseudoElement = $stream->getNextIdentifier();
  185. continue;
  186. }
  187. $identifier = $stream->getNextIdentifier();
  188. if (in_array(strtolower($identifier), array('first-line', 'first-letter', 'before', 'after'))) {
  189. // Special case: CSS 2.1 pseudo-elements can have a single ':'.
  190. // Any new pseudo-element must have two.
  191. $pseudoElement = $identifier;
  192. continue;
  193. }
  194. if (!$stream->getPeek()->isDelimiter(array('('))) {
  195. $result = new Node\PseudoNode($result, $identifier);
  196. continue;
  197. }
  198. $stream->getNext();
  199. $stream->skipWhitespace();
  200. if ('not' === strtolower($identifier)) {
  201. if ($insideNegation) {
  202. throw SyntaxErrorException::nestedNot();
  203. }
  204. list($argument, $argumentPseudoElement) = $this->parseSimpleSelector($stream, true);
  205. $next = $stream->getNext();
  206. if (null !== $argumentPseudoElement) {
  207. throw SyntaxErrorException::pseudoElementFound($argumentPseudoElement, 'inside ::not()');
  208. }
  209. if (!$next->isDelimiter(array(')'))) {
  210. throw SyntaxErrorException::unexpectedToken('")"', $next);
  211. }
  212. $result = new Node\NegationNode($result, $argument);
  213. } else {
  214. $arguments = array();
  215. $next = null;
  216. while (true) {
  217. $stream->skipWhitespace();
  218. $next = $stream->getNext();
  219. if ($next->isIdentifier()
  220. || $next->isString()
  221. || $next->isNumber()
  222. || $next->isDelimiter(array('+', '-'))
  223. ) {
  224. $arguments[] = $next;
  225. } elseif ($next->isDelimiter(array(')'))) {
  226. break;
  227. } else {
  228. throw SyntaxErrorException::unexpectedToken('an argument', $next);
  229. }
  230. }
  231. if (empty($arguments)) {
  232. throw SyntaxErrorException::unexpectedToken('at least one argument', $next);
  233. }
  234. $result = new Node\FunctionNode($result, $identifier, $arguments);
  235. }
  236. } else {
  237. throw SyntaxErrorException::unexpectedToken('selector', $peek);
  238. }
  239. }
  240. if (count($stream->getUsed()) === $selectorStart) {
  241. throw SyntaxErrorException::unexpectedToken('selector', $stream->getPeek());
  242. }
  243. return array($result, $pseudoElement);
  244. }
  245. /**
  246. * Parses next element node.
  247. *
  248. * @param TokenStream $stream
  249. *
  250. * @return Node\ElementNode
  251. */
  252. private function parseElementNode(TokenStream $stream)
  253. {
  254. $peek = $stream->getPeek();
  255. if ($peek->isIdentifier() || $peek->isDelimiter(array('*'))) {
  256. if ($peek->isIdentifier()) {
  257. $namespace = $stream->getNext()->getValue();
  258. } else {
  259. $stream->getNext();
  260. $namespace = null;
  261. }
  262. if ($stream->getPeek()->isDelimiter(array('|'))) {
  263. $stream->getNext();
  264. $element = $stream->getNextIdentifierOrStar();
  265. } else {
  266. $element = $namespace;
  267. $namespace = null;
  268. }
  269. } else {
  270. $element = $namespace = null;
  271. }
  272. return new Node\ElementNode($namespace, $element);
  273. }
  274. /**
  275. * Parses next attribute node.
  276. *
  277. * @param Node\NodeInterface $selector
  278. * @param TokenStream $stream
  279. *
  280. * @throws SyntaxErrorException
  281. *
  282. * @return Node\AttributeNode
  283. */
  284. private function parseAttributeNode(Node\NodeInterface $selector, TokenStream $stream)
  285. {
  286. $stream->skipWhitespace();
  287. $attribute = $stream->getNextIdentifierOrStar();
  288. if (null === $attribute && !$stream->getPeek()->isDelimiter(array('|'))) {
  289. throw SyntaxErrorException::unexpectedToken('"|"', $stream->getPeek());
  290. }
  291. if ($stream->getPeek()->isDelimiter(array('|'))) {
  292. $stream->getNext();
  293. if ($stream->getPeek()->isDelimiter(array('='))) {
  294. $namespace = null;
  295. $stream->getNext();
  296. $operator = '|=';
  297. } else {
  298. $namespace = $attribute;
  299. $attribute = $stream->getNextIdentifier();
  300. $operator = null;
  301. }
  302. } else {
  303. $namespace = $operator = null;
  304. }
  305. if (null === $operator) {
  306. $stream->skipWhitespace();
  307. $next = $stream->getNext();
  308. if ($next->isDelimiter(array(']'))) {
  309. return new Node\AttributeNode($selector, $namespace, $attribute, 'exists', null);
  310. } elseif ($next->isDelimiter(array('='))) {
  311. $operator = '=';
  312. } elseif ($next->isDelimiter(array('^', '$', '*', '~', '|', '!'))
  313. && $stream->getPeek()->isDelimiter(array('='))
  314. ) {
  315. $operator = $next->getValue().'=';
  316. $stream->getNext();
  317. } else {
  318. throw SyntaxErrorException::unexpectedToken('operator', $next);
  319. }
  320. }
  321. $stream->skipWhitespace();
  322. $value = $stream->getNext();
  323. if ($value->isNumber()) {
  324. // if the value is a number, it's casted into a string
  325. $value = new Token(Token::TYPE_STRING, (string) $value->getValue(), $value->getPosition());
  326. }
  327. if (!($value->isIdentifier() || $value->isString())) {
  328. throw SyntaxErrorException::unexpectedToken('string or identifier', $value);
  329. }
  330. $stream->skipWhitespace();
  331. $next = $stream->getNext();
  332. if (!$next->isDelimiter(array(']'))) {
  333. throw SyntaxErrorException::unexpectedToken('"]"', $next);
  334. }
  335. return new Node\AttributeNode($selector, $namespace, $attribute, $operator, $value->getValue());
  336. }
  337. }