selectors = $parser->parseSelectorString($selector); } /** * Returns the selectors that where found in __construct * * @return array */ public function getSelectors() { return $this->selectors; } /** * Attempts to find the selectors starting from the given * node object. * * @param AbstractNode $node * @return Collection */ public function find(AbstractNode $node): Collection { $results = new Collection; foreach ($this->selectors as $selector) { $nodes = [$node]; if (count($selector) == 0) { continue; } $options = []; foreach ($selector as $rule) { if ($rule['alterNext']) { $options[] = $this->alterNext($rule); continue; } $nodes = $this->seek($nodes, $rule, $options); // clear the options $options = []; } // this is the final set of nodes foreach ($nodes as $result) { $results[] = $result; } } return $results; } /** * Attempts to find all children that match the rule * given. * * @param array $nodes * @param array $rule * @param array $options * @return array * @recursive */ protected function seek(array $nodes, array $rule, array $options): array { // XPath index if (array_key_exists('tag', $rule) && array_key_exists('key', $rule) && is_numeric($rule['key']) ) { $count = 0; /** @var AbstractNode $node */ foreach ($nodes as $node) { if ($rule['tag'] == '*' || $rule['tag'] == $node->getTag()->name() ) { ++$count; if ($count == $rule['key']) { // found the node we wanted return [$node]; } } } return []; } $options = $this->flattenOptions($options); $return = []; /** @var InnerNode $node */ foreach ($nodes as $node) { // check if we are a leaf if ($node instanceof LeafNode || ! $node->hasChildren() ) { continue; } $children = []; $child = $node->firstChild(); while ( ! is_null($child)) { // wild card, grab all if ($rule['tag'] == '*' && is_null($rule['key'])) { $return[] = $child; $child = $this->getNextChild($node, $child); continue; } $pass = $this->checkTag($rule, $child); if ($pass && ! is_null($rule['key'])) { $pass = $this->checkKey($rule, $child); } if ($pass && ! is_null($rule['key']) && ! is_null($rule['value']) && $rule['value'] != '*' ) { $pass = $this->checkComparison($rule, $child); } if ($pass) { // it passed all checks $return[] = $child; } else { // this child failed to be matched if ($child instanceof InnerNode && $child->hasChildren() ) { // we still want to check its children $children[] = $child; } } $child = $this->getNextChild($node, $child); } if (( ! isset($options['checkGrandChildren']) || $options['checkGrandChildren']) && count($children) > 0 ) { // we have children that failed but are not leaves. $matches = $this->seek($children, $rule, $options); foreach ($matches as $match) { $return[] = $match; } } } return $return; } /** * Attempts to match the given arguments with the given operator. * * @param string $operator * @param string $pattern * @param string $value * @return bool */ protected function match(string $operator, string $pattern, string $value): bool { $value = strtolower($value); $pattern = strtolower($pattern); switch ($operator) { case '=': return $value === $pattern; case '!=': return $value !== $pattern; case '^=': return preg_match('/^'.preg_quote($pattern, '/').'/', $value) == 1; case '$=': return preg_match('/'.preg_quote($pattern, '/').'$/', $value) == 1; case '*=': if ($pattern[0] == '/') { return preg_match($pattern, $value) == 1; } return preg_match("/".$pattern."/i", $value) == 1; } return false; } /** * Attempts to figure out what the alteration will be for * the next element. * * @param array $rule * @return array */ protected function alterNext(array $rule): array { $options = []; if ($rule['tag'] == '>') { $options['checkGrandChildren'] = false; } return $options; } /** * Flattens the option array. * * @param array $optionsArray * @return array */ protected function flattenOptions(array $optionsArray) { $options = []; foreach ($optionsArray as $optionArray) { foreach ($optionArray as $key => $option) { $options[$key] = $option; } } return $options; } /** * Returns the next child or null if no more children. * * @param AbstractNode $node * @param AbstractNode $currentChild * @return AbstractNode|null */ protected function getNextChild(AbstractNode $node, AbstractNode $currentChild) { try { // get next child $child = $node->nextChild($currentChild->id()); } catch (ChildNotFoundException $e) { // no more children $child = null; } return $child; } /** * Checks tag condition from rules against node. * * @param array $rule * @param AbstractNode $node * @return bool */ protected function checkTag(array $rule, AbstractNode $node): bool { if ( ! empty($rule['tag']) && $rule['tag'] != $node->getTag()->name() && $rule['tag'] != '*' ) { return false; } return true; } /** * Checks key condition from rules against node. * * @param array $rule * @param AbstractNode $node * @return bool */ protected function checkKey(array $rule, AbstractNode $node): bool { if ($rule['noKey']) { if ( ! is_null($node->getAttribute($rule['key']))) { return false; } } else { if ($rule['key'] != 'plaintext' && !$node->hasAttribute($rule['key'])) { return false; } } return true; } /** * Checks comparison condition from rules against node. * * @param array $rule * @param AbstractNode $node * @return bool */ public function checkComparison(array $rule, AbstractNode $node): bool { if ($rule['key'] == 'plaintext') { // plaintext search $nodeValue = $node->text(); } else { // normal search $nodeValue = $node->getAttribute($rule['key']); } $check = $this->match($rule['operator'], $rule['value'], $nodeValue); // handle multiple classes if ( ! $check && $rule['key'] == 'class') { $nodeClasses = explode(' ', $node->getAttribute('class')); foreach ($nodeClasses as $class) { if ( ! empty($class)) { $check = $this->match($rule['operator'], $rule['value'], $class); } if ($check) { break; } } } return $check; } }