Tag.php 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. <?php
  2. namespace PicoFeed\Filter;
  3. use DOMXPath;
  4. use PicoFeed\Base;
  5. use PicoFeed\Parser\XmlParser;
  6. /**
  7. * Tag Filter class.
  8. *
  9. * @author Frederic Guillot
  10. */
  11. class Tag extends Base
  12. {
  13. /**
  14. * Tags blacklist (Xpath expressions).
  15. *
  16. * @var array
  17. */
  18. private $tag_blacklist = array(
  19. '//script',
  20. '//style',
  21. );
  22. /**
  23. * Tags whitelist.
  24. *
  25. * @var array
  26. */
  27. private $tag_whitelist = array(
  28. 'audio',
  29. 'video',
  30. 'source',
  31. 'dt',
  32. 'dd',
  33. 'dl',
  34. 'table',
  35. 'caption',
  36. 'tr',
  37. 'th',
  38. 'td',
  39. 'tbody',
  40. 'thead',
  41. 'h1',
  42. 'h2',
  43. 'h3',
  44. 'h4',
  45. 'h5',
  46. 'h6',
  47. 'strong',
  48. 'em',
  49. 'code',
  50. 'pre',
  51. 'blockquote',
  52. 'p',
  53. 'ul',
  54. 'li',
  55. 'ol',
  56. 'br',
  57. 'del',
  58. 'a',
  59. 'img',
  60. 'figure',
  61. 'figcaption',
  62. 'cite',
  63. 'time',
  64. 'abbr',
  65. 'iframe',
  66. 'q',
  67. 'sup',
  68. 'sub',
  69. );
  70. /**
  71. * Check if the tag is allowed and is not a pixel tracker.
  72. *
  73. * @param string $tag Tag name
  74. * @param array $attributes Attributes dictionary
  75. * @return bool
  76. */
  77. public function isAllowed($tag, array $attributes)
  78. {
  79. return $this->isAllowedTag($tag) && !$this->isPixelTracker($tag, $attributes);
  80. }
  81. /**
  82. * Return the HTML opening tag.
  83. *
  84. * @param string $tag Tag name
  85. * @param string $attributes Attributes converted in html
  86. * @return string
  87. */
  88. public function openHtmlTag($tag, $attributes = '')
  89. {
  90. return '<'.$tag.(empty($attributes) ? '' : ' '.$attributes).($this->isSelfClosingTag($tag) ? '/>' : '>');
  91. }
  92. /**
  93. * Return the HTML closing tag.
  94. *
  95. * @param string $tag Tag name
  96. * @return string
  97. */
  98. public function closeHtmlTag($tag)
  99. {
  100. return $this->isSelfClosingTag($tag) ? '' : '</'.$tag.'>';
  101. }
  102. /**
  103. * Return true is the tag is self-closing.
  104. *
  105. * @param string $tag Tag name
  106. * @return bool
  107. */
  108. public function isSelfClosingTag($tag)
  109. {
  110. return $tag === 'br' || $tag === 'img';
  111. }
  112. /**
  113. * Check if a tag is on the whitelist.
  114. *
  115. * @param string $tag Tag name
  116. * @return bool
  117. */
  118. public function isAllowedTag($tag)
  119. {
  120. return in_array($tag, array_merge(
  121. $this->tag_whitelist,
  122. array_keys($this->config->getFilterWhitelistedTags(array()))
  123. ));
  124. }
  125. /**
  126. * Detect if an image tag is a pixel tracker.
  127. *
  128. * @param string $tag Tag name
  129. * @param array $attributes Tag attributes
  130. * @return bool
  131. */
  132. public function isPixelTracker($tag, array $attributes)
  133. {
  134. return $tag === 'img' &&
  135. isset($attributes['height']) && isset($attributes['width']) &&
  136. $attributes['height'] == 1 && $attributes['width'] == 1;
  137. }
  138. /**
  139. * Remove script tags.
  140. *
  141. * @param string $data Input data
  142. * @return string
  143. */
  144. public function removeBlacklistedTags($data)
  145. {
  146. $dom = XmlParser::getDomDocument($data);
  147. if ($dom === false) {
  148. return '';
  149. }
  150. $xpath = new DOMXpath($dom);
  151. $nodes = $xpath->query(implode(' | ', $this->tag_blacklist));
  152. foreach ($nodes as $node) {
  153. $node->parentNode->removeChild($node);
  154. }
  155. return $dom->saveXML();
  156. }
  157. /**
  158. * Remove empty tags.
  159. *
  160. * @param string $data Input data
  161. * @return string
  162. */
  163. public function removeEmptyTags($data)
  164. {
  165. return preg_replace('/<([^<\/>]*)>([\s]*?|(?R))<\/\1>/imsU', '', $data);
  166. }
  167. /**
  168. * Replace <br/><br/> by only one.
  169. *
  170. * @param string $data Input data
  171. * @return string
  172. */
  173. public function removeMultipleBreakTags($data)
  174. {
  175. return preg_replace("/(<br\s*\/?>\s*)+/", '<br/>', $data);
  176. }
  177. /**
  178. * Set whitelisted tags adn attributes for each tag.
  179. *
  180. * @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']]
  181. * @return Tag
  182. */
  183. public function setWhitelistedTags(array $values)
  184. {
  185. $this->tag_whitelist = $values ?: $this->tag_whitelist;
  186. return $this;
  187. }
  188. }