getElementsByTagName('div')->item(0); $container = $container->parentNode->removeChild($container); // Iterate over words. $words = new DOMWordsIterator($container); $truncated = false; foreach ($words as $word) { // If we have exceeded the limit, we delete the remainder of the content. if ($words->key() >= $limit) { // Grab current position. $currentWordPosition = $words->currentWordPosition(); $curNode = $currentWordPosition[0]; $offset = $currentWordPosition[1]; $words = $currentWordPosition[2]; $curNode->nodeValue = substr( $curNode->nodeValue, 0, $words[$offset][1] + strlen($words[$offset][0]) ); self::removeProceedingNodes($curNode, $container); if (!empty($ellipsis)) { self::insertEllipsis($curNode, $ellipsis); } $truncated = true; break; } } // Return original HTML if not truncated. if ($truncated) { $html = self::getCleanedHtml($doc, $container); } return $html; } /** * Safely truncates HTML by a given number of letters. * @param string $html Input HTML. * @param int $limit Limit to how many letters we preserve. * @param string $ellipsis String to use as ellipsis (if any). * @return string Safe truncated HTML. */ public static function truncateLetters($html, $limit = 0, $ellipsis = '') { if ($limit <= 0) { return $html; } $doc = self::htmlToDomDocument($html); $container = $doc->getElementsByTagName('div')->item(0); $container = $container->parentNode->removeChild($container); // Iterate over letters. $letters = new DOMLettersIterator($container); $truncated = false; foreach ($letters as $letter) { // If we have exceeded the limit, we want to delete the remainder of this document. if ($letters->key() >= $limit) { $currentText = $letters->currentTextPosition(); $currentText[0]->nodeValue = mb_substr($currentText[0]->nodeValue, 0, $currentText[1] + 1); self::removeProceedingNodes($currentText[0], $container); if (!empty($ellipsis)) { self::insertEllipsis($currentText[0], $ellipsis); } $truncated = true; break; } } // Return original HTML if not truncated. if ($truncated) { $html = self::getCleanedHtml($doc, $container); } return $html; } /** * Builds a DOMDocument object from a string containing HTML. * @param string $html HTML to load * @returns DOMDocument Returns a DOMDocument object. */ public static function htmlToDomDocument($html) { if (!$html) { $html = ''; } // Transform multibyte entities which otherwise display incorrectly. $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'); // Internal errors enabled as HTML5 not fully supported. libxml_use_internal_errors(true); // Instantiate new DOMDocument object, and then load in UTF-8 HTML. $dom = new DOMDocument(); $dom->encoding = 'UTF-8'; $dom->loadHTML("
$html
"); return $dom; } /** * Removes all nodes after the current node. * @param DOMNode|DOMElement $domNode * @param DOMNode|DOMElement $topNode * @return void */ private static function removeProceedingNodes($domNode, $topNode) { $nextNode = $domNode->nextSibling; if ($nextNode !== null) { self::removeProceedingNodes($nextNode, $topNode); $domNode->parentNode->removeChild($nextNode); } else { //scan upwards till we find a sibling $curNode = $domNode->parentNode; while ($curNode !== $topNode) { if ($curNode->nextSibling !== null) { $curNode = $curNode->nextSibling; self::removeProceedingNodes($curNode, $topNode); $curNode->parentNode->removeChild($curNode); break; } $curNode = $curNode->parentNode; } } } /** * Clean extra code * * @param DOMDocument $doc * @param $container * @return string */ private static function getCleanedHTML(DOMDocument $doc, $container) { while ($doc->firstChild) { $doc->removeChild($doc->firstChild); } while ($container->firstChild ) { $doc->appendChild($container->firstChild); } $html = trim($doc->saveHTML()); return $html; } /** * Inserts an ellipsis * @param DOMNode|DOMElement $domNode Element to insert after. * @param string $ellipsis Text used to suffix our document. * @return void */ private static function insertEllipsis($domNode, $ellipsis) { $avoid = array('a', 'strong', 'em', 'h1', 'h2', 'h3', 'h4', 'h5'); //html tags to avoid appending the ellipsis to if ($domNode->parentNode->parentNode !== null && in_array($domNode->parentNode->nodeName, $avoid, true)) { // Append as text node to parent instead $textNode = new DOMText($ellipsis); if ($domNode->parentNode->parentNode->nextSibling) { $domNode->parentNode->parentNode->insertBefore($textNode, $domNode->parentNode->parentNode->nextSibling); } else { $domNode->parentNode->parentNode->appendChild($textNode); } } else { // Append to current node $domNode->nodeValue = rtrim($domNode->nodeValue) . $ellipsis; } } /** * */ public function truncate( $text, $length = 100, $ending = '...', $exact = false, $considerHtml = true ) { if ($considerHtml) { // if the plain text is shorter than the maximum length, return the whole text if (strlen(preg_replace('/<.*?>/', '', $text)) <= $length) { return $text; } // splits all html-tags to scanable lines preg_match_all('/(<.+?>)?([^<>]*)/s', $text, $lines, PREG_SET_ORDER); $total_length = strlen($ending); $open_tags = array(); $truncate = ''; foreach ($lines as $line_matchings) { // if there is any html-tag in this line, handle it and add it (uncounted) to the output if (!empty($line_matchings[1])) { // if it's an "empty element" with or without xhtml-conform closing slash if (preg_match('/^<(\s*.+?\/\s*|\s*(img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param)(\s.+?)?)>$/is', $line_matchings[1])) { // do nothing // if tag is a closing tag } else if (preg_match('/^<\s*\/([^\s]+?)\s*>$/s', $line_matchings[1], $tag_matchings)) { // delete tag from $open_tags list $pos = array_search($tag_matchings[1], $open_tags); if ($pos !== false) { unset($open_tags[$pos]); } // if tag is an opening tag } else if (preg_match('/^<\s*([^\s>!]+).*?>$/s', $line_matchings[1], $tag_matchings)) { // add tag to the beginning of $open_tags list array_unshift($open_tags, strtolower($tag_matchings[1])); } // add html-tag to $truncate'd text $truncate .= $line_matchings[1]; } // calculate the length of the plain text part of the line; handle entities as one character $content_length = strlen(preg_replace('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|[0-9a-f]{1,6};/i', ' ', $line_matchings[2])); if ($total_length+$content_length> $length) { // the number of characters which are left $left = $length - $total_length; $entities_length = 0; // search for html entities if (preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|[0-9a-f]{1,6};/i', $line_matchings[2], $entities, PREG_OFFSET_CAPTURE)) { // calculate the real length of all entities in the legal range foreach ($entities[0] as $entity) { if ($entity[1]+1-$entities_length <= $left) { $left--; $entities_length += strlen($entity[0]); } else { // no more characters left break; } } } $truncate .= substr($line_matchings[2], 0, $left+$entities_length); // maximum lenght is reached, so get off the loop break; } else { $truncate .= $line_matchings[2]; $total_length += $content_length; } // if the maximum length is reached, get off the loop if($total_length>= $length) { break; } } } else { if (strlen($text) <= $length) { return $text; } else { $truncate = substr($text, 0, $length - strlen($ending)); } } // if the words shouldn't be cut in the middle... if (!$exact) { // ...search the last occurance of a space... $spacepos = strrpos($truncate, ' '); if (isset($spacepos)) { // ...and cut the text in this position $truncate = substr($truncate, 0, $spacepos); } } // add the defined ending to the text $truncate .= $ending; if($considerHtml) { // close all unclosed html-tags foreach ($open_tags as $tag) { $truncate .= ''; } } return $truncate; } }