123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700 |
- <?php
- namespace PicoFeed\Filter;
- use PicoFeed\Client\Url;
- /**
- * Attribute Filter class.
- *
- * @author Frederic Guillot
- */
- class Attribute
- {
- /**
- * Image proxy url.
- *
- * @var string
- */
- private $image_proxy_url = '';
- /**
- * Image proxy callback.
- *
- * @var \Closure|null
- */
- private $image_proxy_callback = null;
- /**
- * limits the image proxy usage to this protocol.
- *
- * @var string
- */
- private $image_proxy_limit_protocol = '';
- /**
- * Tags and attribute whitelist.
- *
- * @var array
- */
- private $attribute_whitelist = array(
- 'audio' => array('controls', 'src'),
- 'video' => array('poster', 'controls', 'height', 'width', 'src'),
- 'source' => array('src', 'type'),
- 'dt' => array(),
- 'dd' => array(),
- 'dl' => array(),
- 'table' => array(),
- 'caption' => array(),
- 'tr' => array(),
- 'th' => array(),
- 'td' => array(),
- 'tbody' => array(),
- 'thead' => array(),
- 'h1' => array(),
- 'h2' => array(),
- 'h3' => array(),
- 'h4' => array(),
- 'h5' => array(),
- 'h6' => array(),
- 'strong' => array(),
- 'em' => array(),
- 'code' => array(),
- 'pre' => array(),
- 'blockquote' => array(),
- 'p' => array(),
- 'ul' => array(),
- 'li' => array(),
- 'ol' => array(),
- 'br' => array(),
- 'del' => array(),
- 'a' => array('href'),
- 'img' => array('src', 'title', 'alt'),
- 'figure' => array(),
- 'figcaption' => array(),
- 'cite' => array(),
- 'time' => array('datetime'),
- 'abbr' => array('title'),
- 'iframe' => array('width', 'height', 'frameborder', 'src', 'allowfullscreen'),
- 'q' => array('cite'),
- );
- /**
- * Scheme whitelist.
- *
- * For a complete list go to http://en.wikipedia.org/wiki/URI_scheme
- *
- * @var array
- */
- private $scheme_whitelist = array(
- 'bitcoin:',
- 'callto:',
- 'ed2k://',
- 'facetime://',
- 'feed:',
- 'ftp://',
- 'geo:',
- 'git://',
- 'http://',
- 'https://',
- 'irc://',
- 'irc6://',
- 'ircs://',
- 'jabber:',
- 'magnet:',
- 'mailto:',
- 'nntp://',
- 'rtmp://',
- 'sftp://',
- 'sip:',
- 'sips:',
- 'skype:',
- 'smb://',
- 'sms:',
- 'spotify:',
- 'ssh:',
- 'steam:',
- 'svn://',
- 'tel:',
- );
- /**
- * Iframe source whitelist, everything else is ignored.
- *
- * @var array
- */
- private $iframe_whitelist = array(
- 'http://www.youtube.com',
- 'https://www.youtube.com',
- 'http://player.vimeo.com',
- 'https://player.vimeo.com',
- 'http://www.dailymotion.com',
- 'https://www.dailymotion.com',
- 'http://vk.com',
- 'https://vk.com',
- );
- /**
- * Blacklisted resources.
- *
- * @var array
- */
- private $media_blacklist = array(
- 'api.flattr.com',
- 'feeds.feedburner.com',
- 'share.feedsportal.com',
- 'da.feedsportal.com',
- 'rc.feedsportal.com',
- 'rss.feedsportal.com',
- 'res.feedsportal.com',
- 'res1.feedsportal.com',
- 'res2.feedsportal.com',
- 'res3.feedsportal.com',
- 'pi.feedsportal.com',
- 'rss.nytimes.com',
- 'feeds.wordpress.com',
- 'stats.wordpress.com',
- 'rss.cnn.com',
- 'twitter.com/home?status=',
- 'twitter.com/share',
- 'twitter_icon_large.png',
- 'www.facebook.com/sharer.php',
- 'facebook_icon_large.png',
- 'plus.google.com/share',
- 'www.gstatic.com/images/icons/gplus-16.png',
- 'www.gstatic.com/images/icons/gplus-32.png',
- 'www.gstatic.com/images/icons/gplus-64.png',
- );
- /**
- * Attributes used for external resources.
- *
- * @var array
- */
- private $media_attributes = array(
- 'src',
- 'href',
- 'poster',
- );
- /**
- * Attributes that must be integer.
- *
- * @var array
- */
- private $integer_attributes = array(
- 'width',
- 'height',
- 'frameborder',
- );
- /**
- * Mandatory attributes for specified tags.
- *
- * @var array
- */
- private $required_attributes = array(
- 'a' => array('href'),
- 'img' => array('src'),
- 'iframe' => array('src'),
- 'audio' => array('src'),
- 'source' => array('src'),
- );
- /**
- * Add attributes to specified tags.
- *
- * @var array
- */
- private $add_attributes = array(
- 'a' => array('rel' => 'noreferrer', 'target' => '_blank'),
- 'video' => array('controls' => 'true'),
- );
- /**
- * List of filters to apply.
- *
- * @var array
- */
- private $filters = array(
- 'filterAllowedAttribute',
- 'filterIntegerAttribute',
- 'rewriteAbsoluteUrl',
- 'filterIframeAttribute',
- 'filterBlacklistResourceAttribute',
- 'filterProtocolUrlAttribute',
- 'rewriteImageProxyUrl',
- 'secureIframeSrc',
- 'removeYouTubeAutoplay',
- );
- /**
- * Add attributes to specified tags.
- *
- * @var \PicoFeed\Client\Url
- */
- private $website;
- /**
- * Constructor.
- *
- * @param \PicoFeed\Client\Url $website Website url instance
- */
- public function __construct(Url $website)
- {
- $this->website = $website;
- }
- /**
- * Apply filters to the attributes list.
- *
- * @param string $tag Tag name
- * @param array $attributes Attributes dictionary
- *
- * @return array Filtered attributes
- */
- public function filter($tag, array $attributes)
- {
- foreach ($attributes as $attribute => &$value) {
- foreach ($this->filters as $filter) {
- if (!$this->$filter($tag, $attribute, $value)) {
- unset($attributes[$attribute]);
- break;
- }
- }
- }
- return $attributes;
- }
- /**
- * Return true if the value is allowed (remove not allowed attributes).
- *
- * @param string $tag Tag name
- * @param string $attribute Attribute name
- * @param string $value Attribute value
- *
- * @return bool
- */
- public function filterAllowedAttribute($tag, $attribute, $value)
- {
- return isset($this->attribute_whitelist[$tag]) && in_array($attribute, $this->attribute_whitelist[$tag]);
- }
- /**
- * Return true if the value is not integer (remove attributes that should have an integer value).
- *
- * @param string $tag Tag name
- * @param string $attribute Attribute name
- * @param string $value Attribute value
- *
- * @return bool
- */
- public function filterIntegerAttribute($tag, $attribute, $value)
- {
- if (in_array($attribute, $this->integer_attributes)) {
- return ctype_digit($value);
- }
- return true;
- }
- /**
- * Return true if the iframe source is allowed (remove not allowed iframe).
- *
- * @param string $tag Tag name
- * @param string $attribute Attribute name
- * @param string $value Attribute value
- *
- * @return bool
- */
- public function filterIframeAttribute($tag, $attribute, $value)
- {
- if ($tag === 'iframe' && $attribute === 'src') {
- foreach ($this->iframe_whitelist as $url) {
- if (strpos($value, $url) === 0) {
- return true;
- }
- }
- return false;
- }
- return true;
- }
- /**
- * Return true if the resource is not blacklisted (remove blacklisted resource attributes).
- *
- * @param string $tag Tag name
- * @param string $attribute Attribute name
- * @param string $value Attribute value
- *
- * @return bool
- */
- public function filterBlacklistResourceAttribute($tag, $attribute, $value)
- {
- if ($this->isResource($attribute) && $this->isBlacklistedMedia($value)) {
- return false;
- }
- return true;
- }
- /**
- * Convert all relative links to absolute url.
- *
- * @param string $tag Tag name
- * @param string $attribute Attribute name
- * @param string $value Attribute value
- *
- * @return bool
- */
- public function rewriteAbsoluteUrl($tag, $attribute, &$value)
- {
- if ($this->isResource($attribute)) {
- $value = Url::resolve($value, $this->website);
- }
- return true;
- }
- /**
- * Turns iframes' src attribute from http to https to prevent
- * mixed active content.
- *
- * @param string $tag Tag name
- * @param array $attribute Atttributes name
- * @param string $value Attribute value
- *
- * @return bool
- */
- public function secureIframeSrc($tag, $attribute, &$value)
- {
- if ($tag === 'iframe' && $attribute === 'src' && strpos($value, 'http://') === 0) {
- $value = substr_replace($value, 's', 4, 0);
- }
- return true;
- }
- /**
- * Removes YouTube autoplay from iframes.
- *
- * @param string $tag Tag name
- * @param array $attribute Atttributes name
- * @param string $value Attribute value
- *
- * @return bool
- */
- public function removeYouTubeAutoplay($tag, $attribute, &$value)
- {
- $regex = '%^(https://(?:www\.)?youtube.com/.*\?.*autoplay=)(1)(.*)%i';
- if ($tag === 'iframe' && $attribute === 'src' && preg_match($regex, $value)) {
- $value = preg_replace($regex, '${1}0$3', $value);
- }
- return true;
- }
- /**
- * Rewrite image url to use with a proxy.
- *
- * @param string $tag Tag name
- * @param string $attribute Attribute name
- * @param string $value Attribute value
- *
- * @return bool
- */
- public function rewriteImageProxyUrl($tag, $attribute, &$value)
- {
- if ($tag === 'img' && $attribute === 'src'
- && !($this->image_proxy_limit_protocol !== '' && stripos($value, $this->image_proxy_limit_protocol.':') !== 0)) {
- if ($this->image_proxy_url) {
- $value = sprintf($this->image_proxy_url, rawurlencode($value));
- } elseif (is_callable($this->image_proxy_callback)) {
- $value = call_user_func($this->image_proxy_callback, $value);
- }
- }
- return true;
- }
- /**
- * Return true if the scheme is authorized.
- *
- * @param string $tag Tag name
- * @param string $attribute Attribute name
- * @param string $value Attribute value
- *
- * @return bool
- */
- public function filterProtocolUrlAttribute($tag, $attribute, $value)
- {
- if ($this->isResource($attribute) && !$this->isAllowedProtocol($value)) {
- return false;
- }
- return true;
- }
- /**
- * Automatically add/override some attributes for specific tags.
- *
- * @param string $tag Tag name
- * @param array $attributes Attributes list
- *
- * @return array
- */
- public function addAttributes($tag, array $attributes)
- {
- if (isset($this->add_attributes[$tag])) {
- $attributes += $this->add_attributes[$tag];
- }
- return $attributes;
- }
- /**
- * Return true if all required attributes are present.
- *
- * @param string $tag Tag name
- * @param array $attributes Attributes list
- *
- * @return bool
- */
- public function hasRequiredAttributes($tag, array $attributes)
- {
- if (isset($this->required_attributes[$tag])) {
- foreach ($this->required_attributes[$tag] as $attribute) {
- if (!isset($attributes[$attribute])) {
- return false;
- }
- }
- }
- return true;
- }
- /**
- * Check if an attribute name is an external resource.
- *
- * @param string $attribute Attribute name
- *
- * @return bool
- */
- public function isResource($attribute)
- {
- return in_array($attribute, $this->media_attributes);
- }
- /**
- * Detect if the protocol is allowed or not.
- *
- * @param string $value Attribute value
- *
- * @return bool
- */
- public function isAllowedProtocol($value)
- {
- foreach ($this->scheme_whitelist as $protocol) {
- if (strpos($value, $protocol) === 0) {
- return true;
- }
- }
- return false;
- }
- /**
- * Detect if an url is blacklisted.
- *
- * @param string $resource Attribute value (URL)
- *
- * @return bool
- */
- public function isBlacklistedMedia($resource)
- {
- foreach ($this->media_blacklist as $name) {
- if (strpos($resource, $name) !== false) {
- return true;
- }
- }
- return false;
- }
- /**
- * Convert the attribute list to html.
- *
- * @param array $attributes Attributes
- *
- * @return string
- */
- public function toHtml(array $attributes)
- {
- $html = array();
- foreach ($attributes as $attribute => $value) {
- $html[] = sprintf('%s="%s"', $attribute, Filter::escape($value));
- }
- return implode(' ', $html);
- }
- /**
- * Set whitelisted tags and attributes for each tag.
- *
- * @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']]
- *
- * @return Attribute
- */
- public function setWhitelistedAttributes(array $values)
- {
- $this->attribute_whitelist = $values ?: $this->attribute_whitelist;
- return $this;
- }
- /**
- * Set scheme whitelist.
- *
- * @param array $values List of scheme: ['http://', 'ftp://']
- *
- * @return Attribute
- */
- public function setSchemeWhitelist(array $values)
- {
- $this->scheme_whitelist = $values ?: $this->scheme_whitelist;
- return $this;
- }
- /**
- * Set media attributes (used to load external resources).
- *
- * @param array $values List of values: ['src', 'href']
- *
- * @return Attribute
- */
- public function setMediaAttributes(array $values)
- {
- $this->media_attributes = $values ?: $this->media_attributes;
- return $this;
- }
- /**
- * Set blacklisted external resources.
- *
- * @param array $values List of tags: ['http://google.com/', '...']
- *
- * @return Attribute
- */
- public function setMediaBlacklist(array $values)
- {
- $this->media_blacklist = $values ?: $this->media_blacklist;
- return $this;
- }
- /**
- * Set mandatory attributes for whitelisted tags.
- *
- * @param array $values List of tags: ['img' => 'src']
- *
- * @return Attribute
- */
- public function setRequiredAttributes(array $values)
- {
- $this->required_attributes = $values ?: $this->required_attributes;
- return $this;
- }
- /**
- * Set attributes to automatically to specific tags.
- *
- * @param array $values List of tags: ['a' => 'target="_blank"']
- *
- * @return Attribute
- */
- public function setAttributeOverrides(array $values)
- {
- $this->add_attributes = $values ?: $this->add_attributes;
- return $this;
- }
- /**
- * Set attributes that must be an integer.
- *
- * @param array $values List of tags: ['width', 'height']
- *
- * @return Attribute
- */
- public function setIntegerAttributes(array $values)
- {
- $this->integer_attributes = $values ?: $this->integer_attributes;
- return $this;
- }
- /**
- * Set allowed iframe resources.
- *
- * @param array $values List of tags: ['http://www.youtube.com']
- *
- * @return Attribute
- */
- public function setIframeWhitelist(array $values)
- {
- $this->iframe_whitelist = $values ?: $this->iframe_whitelist;
- return $this;
- }
- /**
- * Set image proxy URL.
- *
- * The original image url will be urlencoded
- *
- * @param string $url Proxy URL
- *
- * @return Attribute
- */
- public function setImageProxyUrl($url)
- {
- $this->image_proxy_url = $url ?: $this->image_proxy_url;
- return $this;
- }
- /**
- * Set image proxy callback.
- *
- * @param \Closure $callback
- *
- * @return Attribute
- */
- public function setImageProxyCallback($callback)
- {
- $this->image_proxy_callback = $callback ?: $this->image_proxy_callback;
- return $this;
- }
- /**
- * Set image proxy protocol restriction.
- *
- * @param string $value
- *
- * @return Attribute
- */
- public function setImageProxyProtocol($value)
- {
- $this->image_proxy_limit_protocol = $value ?: $this->image_proxy_limit_protocol;
- return $this;
- }
- }
|