123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189 |
- <?php
- namespace PicoFeed\Reader;
- use DOMXPath;
- use PicoFeed\Base;
- use PicoFeed\Client\Client;
- use PicoFeed\Client\Url;
- use PicoFeed\Logging\Logger;
- use PicoFeed\Parser\XmlParser;
- /**
- * Reader class.
- *
- * @author Frederic Guillot
- */
- class Reader extends Base
- {
- /**
- * Feed formats for detection.
- *
- * @var array
- */
- private $formats = array(
- 'Atom' => '//feed',
- 'Rss20' => '//rss[@version="2.0"]',
- 'Rss92' => '//rss[@version="0.92"]',
- 'Rss91' => '//rss[@version="0.91"]',
- 'Rss10' => '//rdf',
- );
- /**
- * Download a feed (no discovery).
- *
- * @param string $url Feed url
- * @param string $last_modified Last modified HTTP header
- * @param string $etag Etag HTTP header
- * @param string $username HTTP basic auth username
- * @param string $password HTTP basic auth password
- *
- * @return \PicoFeed\Client\Client
- */
- public function download($url, $last_modified = '', $etag = '', $username = '', $password = '')
- {
- $url = $this->prependScheme($url);
- return Client::getInstance()
- ->setConfig($this->config)
- ->setLastModified($last_modified)
- ->setEtag($etag)
- ->setUsername($username)
- ->setPassword($password)
- ->execute($url);
- }
- /**
- * Discover and download a feed.
- *
- * @param string $url Feed or website url
- * @param string $last_modified Last modified HTTP header
- * @param string $etag Etag HTTP header
- * @param string $username HTTP basic auth username
- * @param string $password HTTP basic auth password
- * @return Client
- * @throws SubscriptionNotFoundException
- */
- public function discover($url, $last_modified = '', $etag = '', $username = '', $password = '')
- {
- $client = $this->download($url, $last_modified, $etag, $username, $password);
- // It's already a feed or the feed was not modified
- if (!$client->isModified() || $this->detectFormat($client->getContent())) {
- return $client;
- }
- // Try to find a subscription
- $links = $this->find($client->getUrl(), $client->getContent());
- if (empty($links)) {
- throw new SubscriptionNotFoundException('Unable to find a subscription');
- }
- return $this->download($links[0], $last_modified, $etag, $username, $password);
- }
- /**
- * Find feed urls inside a HTML document.
- *
- * @param string $url Website url
- * @param string $html HTML content
- *
- * @return array List of feed links
- */
- public function find($url, $html)
- {
- Logger::setMessage(get_called_class().': Try to discover subscriptions');
- $dom = XmlParser::getHtmlDocument($html);
- $xpath = new DOMXPath($dom);
- $links = array();
- $queries = array(
- '//link[@type="application/rss+xml"]',
- '//link[@type="application/atom+xml"]',
- );
- foreach ($queries as $query) {
- $nodes = $xpath->query($query);
- foreach ($nodes as $node) {
- $link = $node->getAttribute('href');
- if (!empty($link)) {
- $feedUrl = new Url($link);
- $siteUrl = new Url($url);
- $links[] = $feedUrl->getAbsoluteUrl($feedUrl->isRelativeUrl() ? $siteUrl->getBaseUrl() : '');
- }
- }
- }
- Logger::setMessage(get_called_class().': '.implode(', ', $links));
- return $links;
- }
- /**
- * Get a parser instance.
- *
- * @param string $url Site url
- * @param string $content Feed content
- * @param string $encoding HTTP encoding
- * @return \PicoFeed\Parser\Parser
- * @throws UnsupportedFeedFormatException
- */
- public function getParser($url, $content, $encoding)
- {
- $format = $this->detectFormat($content);
- if (empty($format)) {
- throw new UnsupportedFeedFormatException('Unable to detect feed format');
- }
- $className = '\PicoFeed\Parser\\'.$format;
- $parser = new $className($content, $encoding, $url);
- $parser->setHashAlgo($this->config->getParserHashAlgo());
- $parser->setConfig($this->config);
- return $parser;
- }
- /**
- * Detect the feed format.
- *
- * @param string $content Feed content
- * @return string
- */
- public function detectFormat($content)
- {
- $dom = XmlParser::getHtmlDocument($content);
- $xpath = new DOMXPath($dom);
- foreach ($this->formats as $parser_name => $query) {
- $nodes = $xpath->query($query);
- if ($nodes->length === 1) {
- return $parser_name;
- }
- }
- return '';
- }
- /**
- * Add the prefix "http://" if the end-user just enter a domain name.
- *
- * @param string $url Url
- * @return string
- */
- public function prependScheme($url)
- {
- if (!preg_match('%^https?://%', $url)) {
- $url = 'http://'.$url;
- }
- return $url;
- }
- }
|