at the end (html5 style)
*
* @var array
*/
protected $noSlash = [];
/**
* Returns the inner html of the root node.
*
* @return string
*/
public function __toString(): string
{
return $this->root->innerHtml();
}
/**
* A simple wrapper around the root node.
*
* @param string $name
* @return mixed
*/
public function __get($name)
{
return $this->root->$name;
}
/**
* Attempts to load the dom from any resource, string, file, or URL.
*
* @param string $str
* @param array $options
* @return Dom
* @chainable
*/
public function load(string $str, array $options = []): Dom
{
AbstractNode::resetCount();
// check if it's a file
if (strpos($str, "\n") === false && is_file($str)) {
return $this->loadFromFile($str, $options);
}
// check if it's a url
if (preg_match("/^https?:\/\//i", $str)) {
return $this->loadFromUrl($str, $options);
}
return $this->loadStr($str, $options);
}
/**
* Loads the dom from a document file/url
*
* @param string $file
* @param array $options
* @return Dom
* @chainable
*/
public function loadFromFile(string $file, array $options = []): Dom
{
return $this->loadStr(file_get_contents($file), $options);
}
/**
* Use a curl interface implementation to attempt to load
* the content from a url.
*
* @param string $url
* @param array $options
* @param CurlInterface $curl
* @return Dom
* @chainable
*/
public function loadFromUrl(string $url, array $options = [], CurlInterface $curl = null): Dom
{
if (is_null($curl)) {
// use the default curl interface
$curl = new Curl;
}
$content = $curl->get($url);
return $this->loadStr($content, $options);
}
/**
* Parsers the html of the given string. Used for load(), loadFromFile(),
* and loadFromUrl().
*
* @param string $str
* @param array $option
* @return Dom
* @chainable
*/
public function loadStr(string $str, array $option = []): Dom
{
$this->options = new Options;
$this->options->setOptions($this->globalOptions)
->setOptions($option);
$this->rawSize = strlen($str);
$this->raw = $str;
$html = $this->clean($str);
$this->size = strlen($str);
$this->content = new Content($html);
$this->parse();
$this->detectCharset();
return $this;
}
/**
* Sets a global options array to be used by all load calls.
*
* @param array $options
* @return Dom
* @chainable
*/
public function setOptions(array $options): Dom
{
$this->globalOptions = $options;
return $this;
}
/**
* Find elements by css selector on the root node.
*
* @param string $selector
* @param int $nth
* @return mixed
*/
public function find(string $selector, int $nth = null)
{
$this->isLoaded();
return $this->root->find($selector, $nth);
}
/**
* Find element by Id on the root node
*
* @param int $id
* @return mixed
*/
public function findById(int $id)
{
$this->isLoaded();
return $this->root->findById($id);
}
/**
* Adds the tag (or tags in an array) to the list of tags that will always
* be self closing.
*
* @param string|array $tag
* @return Dom
* @chainable
*/
public function addSelfClosingTag($tag): Dom
{
if ( ! is_array($tag)) {
$tag = [$tag];
}
foreach ($tag as $value) {
$this->selfClosing[] = $value;
}
return $this;
}
/**
* Removes the tag (or tags in an array) from the list of tags that will
* always be self closing.
*
* @param string|array $tag
* @return Dom
* @chainable
*/
public function removeSelfClosingTag($tag): Dom
{
if ( ! is_array($tag)) {
$tag = [$tag];
}
$this->selfClosing = array_diff($this->selfClosing, $tag);
return $this;
}
/**
* Sets the list of self closing tags to empty.
*
* @return Dom
* @chainable
*/
public function clearSelfClosingTags(): Dom
{
$this->selfClosing = [];
return $this;
}
/**
* Adds a tag to the list of self closing tags that should not have a trailing slash
*
* @param $tag
* @return Dom
* @chainable
*/
public function addNoSlashTag($tag): Dom
{
if ( ! is_array($tag)) {
$tag = [$tag];
}
foreach ($tag as $value) {
$this->noSlash[] = $value;
}
return $this;
}
/**
* Removes a tag from the list of no-slash tags.
*
* @param $tag
* @return Dom
* @chainable
*/
public function removeNoSlashTag($tag): Dom
{
if ( ! is_array($tag)) {
$tag = [$tag];
}
$this->noSlash = array_diff($this->noSlash, $tag);
return $this;
}
/**
* Empties the list of no-slash tags.
*
* @return Dom
* @chainable
*/
public function clearNoSlashTags(): Dom
{
$this->noSlash = [];
return $this;
}
/**
* Simple wrapper function that returns the first child.
*
* @return \PHPHtmlParser\Dom\AbstractNode
*/
public function firstChild(): \PHPHtmlParser\Dom\AbstractNode
{
$this->isLoaded();
return $this->root->firstChild();
}
/**
* Simple wrapper function that returns the last child.
*
* @return \PHPHtmlParser\Dom\AbstractNode
*/
public function lastChild(): \PHPHtmlParser\Dom\AbstractNode
{
$this->isLoaded();
return $this->root->lastChild();
}
/**
* Simple wrapper function that returns count of child elements
*
* @return int
*/
public function countChildren(): int
{
$this->isLoaded();
return $this->root->countChildren();
}
/**
* Get array of children
*
* @return array
*/
public function getChildren(): array
{
$this->isLoaded();
return $this->root->getChildren();
}
/**
* Check if node have children nodes
*
* @return bool
*/
public function hasChildren(): bool
{
$this->isLoaded();
return $this->root->hasChildren();
}
/**
* Simple wrapper function that returns an element by the
* id.
*
* @param string $id
* @return \PHPHtmlParser\Dom\AbstractNode|null
*/
public function getElementById($id)
{
$this->isLoaded();
return $this->find('#'.$id, 0);
}
/**
* Simple wrapper function that returns all elements by
* tag name.
*
* @param string $name
* @return mixed
*/
public function getElementsByTag(string $name)
{
$this->isLoaded();
return $this->find($name);
}
/**
* Simple wrapper function that returns all elements by
* class name.
*
* @param string $class
* @return mixed
*/
public function getElementsByClass(string $class)
{
$this->isLoaded();
return $this->find('.'.$class);
}
/**
* Checks if the load methods have been called.
*
* @throws NotLoadedException
*/
protected function isLoaded(): void
{
if (is_null($this->content)) {
throw new NotLoadedException('Content is not loaded!');
}
}
/**
* Cleans the html of any none-html information.
*
* @param string $str
* @return string
*/
protected function clean(string $str): string
{
if ($this->options->get('cleanupInput') != true) {
// skip entire cleanup step
return $str;
}
// remove white space before closing tags
$str = mb_eregi_replace("'\s+>", "'>", $str);
$str = mb_eregi_replace('"\s+>', '">', $str);
// clean out the \n\r
$replace = ' ';
if ($this->options->get('preserveLineBreaks')) {
$replace = '
';
}
$str = str_replace(["\r\n", "\r", "\n"], $replace, $str);
// strip the doctype
$str = mb_eregi_replace("", '', $str);
// strip out comments
$str = mb_eregi_replace("", '', $str);
// strip out cdata
$str = mb_eregi_replace("", '', $str);
// strip out