123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398 |
- <?php
- define('CACHE_CSS', 0);
- define('CACHE_SELECTOR', 1);
- define('CACHE_XPATH', 2);
- class Emogrifier {
-
- const INDEX = 0;
- const MULTIPLIER = 1;
- private $html = '';
- private $css = '';
- private $unprocessableHTMLTags = array('wbr');
- private $caches = array();
-
-
-
-
-
- public $preserveEncoding = false;
- public function __construct($html = '', $css = '') {
- $this->html = $html;
- $this->css = $css;
- $this->clearCache();
- }
- public function setHTML($html = '') { $this->html = $html; }
- public function setCSS($css = '') {
- $this->css = $css;
- $this->clearCache(CACHE_CSS);
- }
- public function clearCache($key = null) {
- if (!is_null($key)) {
- if (isset($this->caches[$key])) $this->caches[$key] = array();
- } else {
- $this->caches = array(
- CACHE_CSS => array(),
- CACHE_SELECTOR => array(),
- CACHE_XPATH => array(),
- );
- }
- }
-
-
-
-
- public function addUnprocessableHTMLTag($tag) { $this->unprocessableHTMLTags[] = $tag; }
- public function removeUnprocessableHTMLTag($tag) {
- if (($key = array_search($tag,$this->unprocessableHTMLTags)) !== false)
- unset($this->unprocessableHTMLTags[$key]);
- }
-
- public function emogrify() {
- $body = $this->html;
-
- if (count($this->unprocessableHTMLTags)) {
- $unprocessableHTMLTags = implode('|',$this->unprocessableHTMLTags);
- $body = preg_replace("/<\/?($unprocessableHTMLTags)[^>]*>/i",'',$body);
- }
- $encoding = mb_detect_encoding($body);
- $body = mb_convert_encoding($body, 'HTML-ENTITIES', $encoding);
- $xmldoc = new DOMDocument;
- $xmldoc->encoding = $encoding;
- $xmldoc->strictErrorChecking = false;
- $xmldoc->formatOutput = true;
- $xmldoc->loadHTML($body);
- $xmldoc->normalizeDocument();
- $xpath = new DOMXPath($xmldoc);
-
-
-
- $vistedNodes = $vistedNodeRef = array();
- $nodes = @$xpath->query('//*[@style]');
- foreach ($nodes as $node) {
- $normalizedOrigStyle = preg_replace('/[A-z\-]+(?=\:)/Se',"strtolower('\\0')", $node->getAttribute('style'));
-
- $nodeKey = md5($node->getNodePath());
- if (!isset($vistedNodeRef[$nodeKey])) {
- $vistedNodeRef[$nodeKey] = $this->cssStyleDefinitionToArray($normalizedOrigStyle);
- $vistedNodes[$nodeKey] = $node;
- }
- $node->setAttribute('style', $normalizedOrigStyle);
- }
-
-
- $css = $this->css;
- $nodes = @$xpath->query('//style');
- foreach ($nodes as $node) {
-
- $css .= "\n\n{$node->nodeValue}";
-
- $node->parentNode->removeChild($node);
- }
-
- $search = array(
- '/\/\*.*\*\//sU',
- '/^\s*@import\s[^;]+;/misU',
- '/^\s*@media\s[^{]+{\s*}/misU',
- '/^\s*@media\s+((aural|braille|embossed|handheld|print|projection|speech|tty|tv)\s*,*\s*)+{.*}\s*}/misU',
- '/^\s*@media\s[^{]+{(.*})\s*}/misU',
- );
- $replace = array(
- '',
- '',
- '',
- '',
- '\\1',
- );
- $css = preg_replace($search, $replace, $css);
- $csskey = md5($css);
- if (!isset($this->caches[CACHE_CSS][$csskey])) {
-
- preg_match_all('/(^|[^{}])\s*([^{]+){([^}]*)}/mis', $css, $matches, PREG_SET_ORDER);
- $all_selectors = array();
- foreach ($matches as $key => $selectorString) {
-
- if (!strlen(trim($selectorString[3]))) continue;
-
- $selectors = explode(',',$selectorString[2]);
- foreach ($selectors as $selector) {
-
- if (strpos($selector, ':') !== false && !preg_match('/:\S+\-(child|type)\(/i', $selector)) continue;
- $all_selectors[] = array('selector' => trim($selector),
- 'attributes' => trim($selectorString[3]),
- 'line' => $key,
- );
- }
- }
-
- usort($all_selectors, array($this,'sortBySelectorPrecedence'));
- $this->caches[CACHE_CSS][$csskey] = $all_selectors;
- }
- foreach ($this->caches[CACHE_CSS][$csskey] as $value) {
-
- $nodes = $xpath->query($this->translateCSStoXpath(trim($value['selector'])));
- foreach($nodes as $node) {
-
- if ($node->hasAttribute('style')) {
-
- $oldStyleArr = $this->cssStyleDefinitionToArray($node->getAttribute('style'));
- $newStyleArr = $this->cssStyleDefinitionToArray($value['attributes']);
-
- $combinedArr = array_merge($oldStyleArr,$newStyleArr);
- $style = '';
- foreach ($combinedArr as $k => $v) $style .= (strtolower($k) . ':' . $v . ';');
- } else {
-
- $style = trim($value['attributes']);
- }
- $node->setAttribute('style', $style);
- }
- }
-
- foreach ($vistedNodeRef as $nodeKey => $origStyleArr) {
- $node = $vistedNodes[$nodeKey];
- $currStyleArr = $this->cssStyleDefinitionToArray($node->getAttribute('style'));
- $combinedArr = array_merge($currStyleArr, $origStyleArr);
- $style = '';
- foreach ($combinedArr as $k => $v) $style .= (strtolower($k) . ':' . $v . ';');
- $node->setAttribute('style', $style);
- }
-
-
-
-
- $nodes = $xpath->query('//*[contains(translate(translate(@style," ",""),"NOE","noe"),"display:none")]');
-
-
- if ($nodes->length > 0)
- foreach ($nodes as $node)
- if ($node->parentNode && is_callable(array($node->parentNode,'removeChild')))
- $node->parentNode->removeChild($node);
- if ($this->preserveEncoding) {
- return mb_convert_encoding($xmldoc->saveHTML(), $encoding, 'HTML-ENTITIES');
- } else {
- return $xmldoc->saveHTML();
- }
- }
- private function sortBySelectorPrecedence($a, $b) {
- $precedenceA = $this->getCSSSelectorPrecedence($a['selector']);
- $precedenceB = $this->getCSSSelectorPrecedence($b['selector']);
-
-
- return ($precedenceA == $precedenceB) ? ($a['line'] < $b['line'] ? -1 : 1) : ($precedenceA < $precedenceB ? -1 : 1);
- }
- private function getCSSSelectorPrecedence($selector) {
- $selectorkey = md5($selector);
- if (!isset($this->caches[CACHE_SELECTOR][$selectorkey])) {
- $precedence = 0;
- $value = 100;
- $search = array('\#','\.','');
- foreach ($search as $s) {
- if (trim($selector == '')) break;
- $num = 0;
- $selector = preg_replace('/'.$s.'\w+/','',$selector,-1,$num);
- $precedence += ($value * $num);
- $value /= 10;
- }
- $this->caches[CACHE_SELECTOR][$selectorkey] = $precedence;
- }
- return $this->caches[CACHE_SELECTOR][$selectorkey];
- }
-
-
- private function translateCSStoXpath($css_selector) {
- $css_selector = trim($css_selector);
- $xpathkey = md5($css_selector);
- if (!isset($this->caches[CACHE_XPATH][$xpathkey])) {
-
- $search = array(
- '/\s+>\s+/',
- '/\s+\+\s+/',
- '/\s+/',
- '/([^\/]+):first-child/i',
- '/([^\/]+):last-child/i',
- '/(\w)\[(\w+)\]/',
- '/(\w)\[(\w+)\=[\'"]?(\w+)[\'"]?\]/',
- '/(\w+)?\#([\w\-]+)/e',
- '/(\w+|[\*\]])?((\.[\w\-]+)+)/e',
- );
- $replace = array(
- '/',
- '/following-sibling::*[1]/self::',
- '//',
- '*[1]/self::\\1',
- '*[last()]/self::\\1',
- '\\1[@\\2]',
- '\\1[@\\2="\\3"]',
- "(strlen('\\1') ? '\\1' : '*').'[@id=\"\\2\"]'",
- "(strlen('\\1') ? '\\1' : '*').'[contains(concat(\" \",@class,\" \"),concat(\" \",\"'.implode('\",\" \"))][contains(concat(\" \",@class,\" \"),concat(\" \",\"',explode('.',substr('\\2',1))).'\",\" \"))]'",
- );
- $css_selector = '//'.preg_replace($search, $replace, $css_selector);
-
-
- $css_selector = preg_replace_callback('/([^\/]+):nth-child\(\s*(odd|even|[+\-]?\d|[+\-]?\d?n(\s*[+\-]\s*\d)?)\s*\)/i', array($this, 'translateNthChild'), $css_selector);
- $css_selector = preg_replace_callback('/([^\/]+):nth-of-type\(\s*(odd|even|[+\-]?\d|[+\-]?\d?n(\s*[+\-]\s*\d)?)\s*\)/i', array($this, 'translateNthOfType'), $css_selector);
- $this->caches[CACHE_SELECTOR][$xpathkey] = $css_selector;
- }
- return $this->caches[CACHE_SELECTOR][$xpathkey];
- }
- private function translateNthChild($match) {
- $result = $this->parseNth($match);
- if (isset($result[self::MULTIPLIER])) {
- if ($result[self::MULTIPLIER] < 0) {
- $result[self::MULTIPLIER] = abs($result[self::MULTIPLIER]);
- return sprintf("*[(last() - position()) mod %u = %u]/self::%s", $result[self::MULTIPLIER], $result[self::INDEX], $match[1]);
- } else {
- return sprintf("*[position() mod %u = %u]/self::%s", $result[self::MULTIPLIER], $result[self::INDEX], $match[1]);
- }
- } else {
- return sprintf("*[%u]/self::%s", $result[self::INDEX], $match[1]);
- }
- }
- private function translateNthOfType($match) {
- $result = $this->parseNth($match);
- if (isset($result[self::MULTIPLIER])) {
- if ($result[self::MULTIPLIER] < 0) {
- $result[self::MULTIPLIER] = abs($result[self::MULTIPLIER]);
- return sprintf("%s[(last() - position()) mod %u = %u]", $match[1], $result[self::MULTIPLIER], $result[self::INDEX]);
- } else {
- return sprintf("%s[position() mod %u = %u]", $match[1], $result[self::MULTIPLIER], $result[self::INDEX]);
- }
- } else {
- return sprintf("%s[%u]", $match[1], $result[self::INDEX]);
- }
- }
- private function parseNth($match) {
- if (in_array(strtolower($match[2]), array('even','odd'))) {
- $index = strtolower($match[2]) == 'even' ? 0 : 1;
- return array(self::MULTIPLIER => 2, self::INDEX => $index);
-
- } else if (stripos($match[2], 'n') === false) {
- $index = intval(str_replace(' ', '', $match[2]));
- return array(self::INDEX => $index);
- } else {
- if (isset($match[3])) {
- $multiple_term = str_replace($match[3], '', $match[2]);
- $index = intval(str_replace(' ', '', $match[3]));
- } else {
- $multiple_term = $match[2];
- $index = 0;
- }
- $multiplier = str_ireplace('n', '', $multiple_term);
- if (!strlen($multiplier)) $multiplier = 1;
- elseif ($multiplier == 0) return array(self::INDEX => $index);
- else $multiplier = intval($multiplier);
- while ($index < 0) $index += abs($multiplier);
- return array(self::MULTIPLIER => $multiplier, self::INDEX => $index);
- }
- }
- private function cssStyleDefinitionToArray($style) {
- $definitions = explode(';',$style);
- $retArr = array();
- foreach ($definitions as $def) {
- if (empty($def) || strpos($def, ':') === false) continue;
- list($key,$value) = explode(':',$def,2);
- if (empty($key) || strlen(trim($value)) === 0) continue;
- $retArr[trim($key)] = trim($value);
- }
- return $retArr;
- }
- }
|