at the end (html5 style) * * @var array */ protected $noSlash = []; /** * Returns the inner html of the root node. * * @return string */ public function __toString(): string { return $this->root->innerHtml(); } /** * A simple wrapper around the root node. * * @param string $name * @return mixed */ public function __get($name) { return $this->root->$name; } /** * Attempts to load the dom from any resource, string, file, or URL. * * @param string $str * @param array $options * @return Dom * @chainable */ public function load(string $str, array $options = []): Dom { AbstractNode::resetCount(); // check if it's a file if (strpos($str, "\n") === false && is_file($str)) { return $this->loadFromFile($str, $options); } // check if it's a url if (preg_match("/^https?:\/\//i", $str)) { return $this->loadFromUrl($str, $options); } return $this->loadStr($str, $options); } /** * Loads the dom from a document file/url * * @param string $file * @param array $options * @return Dom * @chainable */ public function loadFromFile(string $file, array $options = []): Dom { return $this->loadStr(file_get_contents($file), $options); } /** * Use a curl interface implementation to attempt to load * the content from a url. * * @param string $url * @param array $options * @param CurlInterface $curl * @return Dom * @chainable */ public function loadFromUrl(string $url, array $options = [], CurlInterface $curl = null): Dom { if (is_null($curl)) { // use the default curl interface $curl = new Curl; } $content = $curl->get($url); return $this->loadStr($content, $options); } /** * Parsers the html of the given string. Used for load(), loadFromFile(), * and loadFromUrl(). * * @param string $str * @param array $option * @return Dom * @chainable */ public function loadStr(string $str, array $option = []): Dom { $this->options = new Options; $this->options->setOptions($this->globalOptions) ->setOptions($option); $this->rawSize = strlen($str); $this->raw = $str; $html = $this->clean($str); $this->size = strlen($str); $this->content = new Content($html); $this->parse(); $this->detectCharset(); return $this; } /** * Sets a global options array to be used by all load calls. * * @param array $options * @return Dom * @chainable */ public function setOptions(array $options): Dom { $this->globalOptions = $options; return $this; } /** * Find elements by css selector on the root node. * * @param string $selector * @param int $nth * @return mixed */ public function find(string $selector, int $nth = null) { $this->isLoaded(); return $this->root->find($selector, $nth); } /** * Find element by Id on the root node * * @param int $id * @return mixed */ public function findById(int $id) { $this->isLoaded(); return $this->root->findById($id); } /** * Adds the tag (or tags in an array) to the list of tags that will always * be self closing. * * @param string|array $tag * @return Dom * @chainable */ public function addSelfClosingTag($tag): Dom { if ( ! is_array($tag)) { $tag = [$tag]; } foreach ($tag as $value) { $this->selfClosing[] = $value; } return $this; } /** * Removes the tag (or tags in an array) from the list of tags that will * always be self closing. * * @param string|array $tag * @return Dom * @chainable */ public function removeSelfClosingTag($tag): Dom { if ( ! is_array($tag)) { $tag = [$tag]; } $this->selfClosing = array_diff($this->selfClosing, $tag); return $this; } /** * Sets the list of self closing tags to empty. * * @return Dom * @chainable */ public function clearSelfClosingTags(): Dom { $this->selfClosing = []; return $this; } /** * Adds a tag to the list of self closing tags that should not have a trailing slash * * @param $tag * @return Dom * @chainable */ public function addNoSlashTag($tag): Dom { if ( ! is_array($tag)) { $tag = [$tag]; } foreach ($tag as $value) { $this->noSlash[] = $value; } return $this; } /** * Removes a tag from the list of no-slash tags. * * @param $tag * @return Dom * @chainable */ public function removeNoSlashTag($tag): Dom { if ( ! is_array($tag)) { $tag = [$tag]; } $this->noSlash = array_diff($this->noSlash, $tag); return $this; } /** * Empties the list of no-slash tags. * * @return Dom * @chainable */ public function clearNoSlashTags(): Dom { $this->noSlash = []; return $this; } /** * Simple wrapper function that returns the first child. * * @return \PHPHtmlParser\Dom\AbstractNode */ public function firstChild(): \PHPHtmlParser\Dom\AbstractNode { $this->isLoaded(); return $this->root->firstChild(); } /** * Simple wrapper function that returns the last child. * * @return \PHPHtmlParser\Dom\AbstractNode */ public function lastChild(): \PHPHtmlParser\Dom\AbstractNode { $this->isLoaded(); return $this->root->lastChild(); } /** * Simple wrapper function that returns count of child elements * * @return int */ public function countChildren(): int { $this->isLoaded(); return $this->root->countChildren(); } /** * Get array of children * * @return array */ public function getChildren(): array { $this->isLoaded(); return $this->root->getChildren(); } /** * Check if node have children nodes * * @return bool */ public function hasChildren(): bool { $this->isLoaded(); return $this->root->hasChildren(); } /** * Simple wrapper function that returns an element by the * id. * * @param string $id * @return \PHPHtmlParser\Dom\AbstractNode|null */ public function getElementById($id) { $this->isLoaded(); return $this->find('#'.$id, 0); } /** * Simple wrapper function that returns all elements by * tag name. * * @param string $name * @return mixed */ public function getElementsByTag(string $name) { $this->isLoaded(); return $this->find($name); } /** * Simple wrapper function that returns all elements by * class name. * * @param string $class * @return mixed */ public function getElementsByClass(string $class) { $this->isLoaded(); return $this->find('.'.$class); } /** * Checks if the load methods have been called. * * @throws NotLoadedException */ protected function isLoaded(): void { if (is_null($this->content)) { throw new NotLoadedException('Content is not loaded!'); } } /** * Cleans the html of any none-html information. * * @param string $str * @return string */ protected function clean(string $str): string { if ($this->options->get('cleanupInput') != true) { // skip entire cleanup step return $str; } // remove white space before closing tags $str = mb_eregi_replace("'\s+>", "'>", $str); $str = mb_eregi_replace('"\s+>', '">', $str); // clean out the \n\r $replace = ' '; if ($this->options->get('preserveLineBreaks')) { $replace = ' '; } $str = str_replace(["\r\n", "\r", "\n"], $replace, $str); // strip the doctype $str = mb_eregi_replace("", '', $str); // strip out comments $str = mb_eregi_replace("", '', $str); // strip out cdata $str = mb_eregi_replace("", '', $str); // strip out